001
014
015 package com.liferay.portal.metadata;
016
017 import com.liferay.portal.kernel.exception.SystemException;
018 import com.liferay.portal.kernel.io.DummyWriter;
019 import com.liferay.portal.kernel.log.Log;
020 import com.liferay.portal.kernel.log.LogFactoryUtil;
021 import com.liferay.portal.kernel.util.StreamUtil;
022
023 import java.io.File;
024 import java.io.FileInputStream;
025 import java.io.IOException;
026 import java.io.InputStream;
027
028 import org.apache.commons.lang.exception.ExceptionUtils;
029 import org.apache.pdfbox.exceptions.CryptographyException;
030 import org.apache.poi.EncryptedDocumentException;
031 import org.apache.tika.metadata.Metadata;
032 import org.apache.tika.parser.ParseContext;
033 import org.apache.tika.parser.Parser;
034 import org.apache.tika.sax.WriteOutContentHandler;
035
036 import org.xml.sax.ContentHandler;
037
038
043 public class TikaRawMetadataProcessor extends XugglerRawMetadataProcessor {
044
045 public void setParser(Parser parser) {
046 _parser = parser;
047 }
048
049 protected Metadata extractMetadata(
050 InputStream inputStream, Metadata metadata)
051 throws IOException {
052
053 if (metadata == null) {
054 metadata = new Metadata();
055 }
056
057 ParseContext parserContext = new ParseContext();
058
059 parserContext.set(Parser.class, _parser);
060
061 ContentHandler contentHandler = new WriteOutContentHandler(
062 new DummyWriter());
063
064 try {
065 _parser.parse(inputStream, contentHandler, metadata, parserContext);
066 }
067 catch (Exception e) {
068 Throwable throwable = ExceptionUtils.getRootCause(e);
069
070 if ((throwable instanceof CryptographyException) ||
071 (throwable instanceof EncryptedDocumentException)) {
072
073 if (_log.isWarnEnabled()) {
074 _log.warn(
075 "Unable to extract metadata from an encrypted file");
076 }
077 }
078 else {
079 _log.error(e, e);
080 }
081
082 throw new IOException(e.getMessage());
083 }
084
085
086
087 metadata.remove(XMPDM.ABS_PEAK_AUDIO_FILE_PATH.getName());
088 metadata.remove(XMPDM.RELATIVE_PEAK_AUDIO_FILE_PATH.getName());
089
090 return metadata;
091 }
092
093 @Override
094 protected Metadata extractMetadata(
095 String extension, String mimeType, File file)
096 throws SystemException {
097
098 Metadata metadata = super.extractMetadata(extension, mimeType, file);
099
100 InputStream inputStream = null;
101
102 try {
103 inputStream = new FileInputStream(file);
104
105 return extractMetadata(inputStream, metadata);
106 }
107 catch (IOException ioe) {
108 throw new SystemException(ioe);
109 }
110 finally {
111 StreamUtil.cleanUp(inputStream);
112 }
113 }
114
115 @Override
116 protected Metadata extractMetadata(
117 String extension, String mimeType, InputStream inputStream)
118 throws SystemException {
119
120 Metadata metadata = super.extractMetadata(
121 extension, mimeType, inputStream);
122
123 try {
124 return extractMetadata(inputStream, metadata);
125 }
126 catch (IOException ioe) {
127 throw new SystemException(ioe);
128 }
129 }
130
131 private static Log _log = LogFactoryUtil.getLog(
132 TikaRawMetadataProcessor.class);
133
134 private Parser _parser;
135
136 }