001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.exception.SystemException;
018 import com.liferay.portal.kernel.log.Log;
019 import com.liferay.portal.kernel.log.LogFactoryUtil;
020 import com.liferay.portal.kernel.util.ContentTypes;
021 import com.liferay.portal.kernel.util.GetterUtil;
022 import com.liferay.portal.kernel.util.MimeTypes;
023 import com.liferay.portal.kernel.util.StreamUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.io.File;
027 import java.io.FileNotFoundException;
028 import java.io.InputStream;
029
030 import java.net.URL;
031
032 import java.util.Collections;
033 import java.util.HashMap;
034 import java.util.HashSet;
035 import java.util.Map;
036 import java.util.Set;
037
038 import javax.xml.parsers.DocumentBuilder;
039 import javax.xml.parsers.DocumentBuilderFactory;
040
041 import org.apache.tika.detect.DefaultDetector;
042 import org.apache.tika.detect.Detector;
043 import org.apache.tika.io.CloseShieldInputStream;
044 import org.apache.tika.io.TikaInputStream;
045 import org.apache.tika.metadata.Metadata;
046 import org.apache.tika.mime.MediaType;
047 import org.apache.tika.mime.MimeTypesReaderMetKeys;
048
049 import org.w3c.dom.Document;
050 import org.w3c.dom.Element;
051 import org.w3c.dom.Node;
052 import org.w3c.dom.NodeList;
053
054 import org.xml.sax.InputSource;
055
056
061 public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
062
063 public MimeTypesImpl() {
064 _detector = new DefaultDetector(
065 org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
066
067 URL url = org.apache.tika.mime.MimeTypes.class.getResource(
068 "tika-mimetypes.xml");
069
070 try {
071 read(url.openStream());
072 }
073 catch (Exception e) {
074 _log.error("Unable to populate extensions map", e);
075 }
076 }
077
078 @Override
079 public String getContentType(File file) {
080 return getContentType(file, file.getName());
081 }
082
083 @Override
084 public String getContentType(File file, String fileName) {
085 if ((file == null) || !file.exists()) {
086 return getContentType(fileName);
087 }
088
089 InputStream is = null;
090
091 try {
092 is = TikaInputStream.get(file);
093
094 return getContentType(is, fileName);
095 }
096 catch (FileNotFoundException fnfe) {
097 return getContentType(fileName);
098 }
099 finally {
100 StreamUtil.cleanUp(is);
101 }
102 }
103
104 @Override
105 public String getContentType(InputStream inputStream, String fileName) {
106 if (inputStream == null) {
107 return getContentType(fileName);
108 }
109
110 String contentType = null;
111
112 try {
113 CloseShieldInputStream closeShieldInputStream =
114 new CloseShieldInputStream(inputStream);
115
116 Metadata metadata = new Metadata();
117
118 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
119
120 MediaType mediaType = _detector.detect(
121 TikaInputStream.get(closeShieldInputStream), metadata);
122
123 contentType = mediaType.toString();
124
125 if (contentType.contains("tika")) {
126 if (_log.isDebugEnabled()) {
127 _log.debug("Retrieved invalid content type " + contentType);
128 }
129
130 contentType = getContentType(fileName);
131 }
132
133 if (contentType.contains("tika")) {
134 if (_log.isDebugEnabled()) {
135 _log.debug("Retrieved invalid content type " + contentType);
136 }
137
138 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
139 }
140 }
141 catch (Exception e) {
142 _log.error(e, e);
143
144 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
145 }
146
147 return contentType;
148 }
149
150 @Override
151 public String getContentType(String fileName) {
152 if (Validator.isNull(fileName)) {
153 return ContentTypes.APPLICATION_OCTET_STREAM;
154 }
155
156 try {
157 Metadata metadata = new Metadata();
158
159 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
160
161 MediaType mediaType = _detector.detect(null, metadata);
162
163 String contentType = mediaType.toString();
164
165 if (!contentType.contains("tika")) {
166 return contentType;
167 }
168 else if (_log.isDebugEnabled()) {
169 _log.debug("Retrieved invalid content type " + contentType);
170 }
171 }
172 catch (Exception e) {
173 _log.error(e, e);
174 }
175
176 return ContentTypes.APPLICATION_OCTET_STREAM;
177 }
178
179 @Override
180 public Set<String> getExtensions(String contentType) {
181 Set<String> extensions = _extensionsMap.get(contentType);
182
183 if (extensions == null) {
184 extensions = Collections.emptySet();
185 }
186
187 return extensions;
188 }
189
190 protected void read(InputStream stream) throws Exception {
191 DocumentBuilderFactory documentBuilderFactory =
192 DocumentBuilderFactory.newInstance();
193
194 DocumentBuilder documentBuilder =
195 documentBuilderFactory.newDocumentBuilder();
196
197 Document document = documentBuilder.parse(new InputSource(stream));
198
199 Element element = document.getDocumentElement();
200
201 if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
202 throw new SystemException("Invalid configuration file");
203 }
204
205 NodeList nodeList = element.getChildNodes();
206
207 for (int i = 0; i < nodeList.getLength(); i++) {
208 Node node = nodeList.item(i);
209
210 if (node.getNodeType() != Node.ELEMENT_NODE) {
211 continue;
212 }
213
214 Element childElement = (Element)node;
215
216 if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
217 readMimeType(childElement);
218 }
219 }
220 }
221
222 protected void readMimeType(Element element) {
223 Set<String> mimeTypes = new HashSet<String>();
224
225 Set<String> extensions = new HashSet<String>();
226
227 String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
228
229 mimeTypes.add(name);
230
231 NodeList nodeList = element.getChildNodes();
232
233 for (int i = 0; i < nodeList.getLength(); i++) {
234 Node node = nodeList.item(i);
235
236 if (node.getNodeType() != Node.ELEMENT_NODE) {
237 continue;
238 }
239
240 Element childElement = (Element)node;
241
242 if (ALIAS_TAG.equals(childElement.getTagName())) {
243 String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
244
245 mimeTypes.add(alias);
246 }
247 else if (GLOB_TAG.equals(childElement.getTagName())) {
248 boolean isRegex = GetterUtil.getBoolean(
249 childElement.getAttribute(ISREGEX_ATTR));
250
251 if (isRegex) {
252 continue;
253 }
254
255 String pattern = childElement.getAttribute(PATTERN_ATTR);
256
257 if (!pattern.startsWith("*")) {
258 continue;
259 }
260
261 String extension = pattern.substring(1);
262
263 if (!extension.contains("*") && !extension.contains("?") &&
264 !extension.contains("[")) {
265
266 extensions.add(extension);
267 }
268 }
269 }
270
271 for (String mimeType : mimeTypes) {
272 _extensionsMap.put(mimeType, extensions);
273 }
274 }
275
276 private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
277
278 private Detector _detector;
279 private Map<String, Set<String>> _extensionsMap =
280 new HashMap<String, Set<String>>();
281
282 }