001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.exception.SystemException;
018 import com.liferay.portal.kernel.log.Log;
019 import com.liferay.portal.kernel.log.LogFactoryUtil;
020 import com.liferay.portal.kernel.util.ContentTypes;
021 import com.liferay.portal.kernel.util.GetterUtil;
022 import com.liferay.portal.kernel.util.MimeTypes;
023 import com.liferay.portal.kernel.util.SetUtil;
024 import com.liferay.portal.kernel.util.StreamUtil;
025 import com.liferay.portal.kernel.util.Validator;
026
027 import java.io.File;
028 import java.io.FileNotFoundException;
029 import java.io.InputStream;
030
031 import java.net.URL;
032
033 import java.util.Collections;
034 import java.util.HashMap;
035 import java.util.HashSet;
036 import java.util.Map;
037 import java.util.Set;
038
039 import javax.xml.parsers.DocumentBuilder;
040 import javax.xml.parsers.DocumentBuilderFactory;
041
042 import org.apache.tika.detect.DefaultDetector;
043 import org.apache.tika.detect.Detector;
044 import org.apache.tika.io.CloseShieldInputStream;
045 import org.apache.tika.io.TikaInputStream;
046 import org.apache.tika.metadata.Metadata;
047 import org.apache.tika.mime.MediaType;
048 import org.apache.tika.mime.MimeTypesReaderMetKeys;
049
050 import org.w3c.dom.Document;
051 import org.w3c.dom.Element;
052 import org.w3c.dom.Node;
053 import org.w3c.dom.NodeList;
054
055 import org.xml.sax.InputSource;
056
057
062 public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
063
064 public MimeTypesImpl() {
065 _detector = new DefaultDetector(
066 org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
067
068 _webImageMimeTypes = SetUtil.fromArray(
069 PropsValues.MIME_TYPES_WEB_IMAGES);
070 }
071
072 public void afterPropertiesSet() {
073 URL url = org.apache.tika.mime.MimeTypes.class.getResource(
074 "tika-mimetypes.xml");
075
076 try {
077 read(url.openStream());
078 }
079 catch (Exception e) {
080 _log.error("Unable to populate extensions map", e);
081 }
082 }
083
084 @Override
085 public String getContentType(File file) {
086 return getContentType(file, file.getName());
087 }
088
089 @Override
090 public String getContentType(File file, String fileName) {
091 if ((file == null) || !file.exists()) {
092 return getContentType(fileName);
093 }
094
095 InputStream is = null;
096
097 try {
098 is = TikaInputStream.get(file);
099
100 return getContentType(is, fileName);
101 }
102 catch (FileNotFoundException fnfe) {
103 return getContentType(fileName);
104 }
105 finally {
106 StreamUtil.cleanUp(is);
107 }
108 }
109
110 @Override
111 public String getContentType(InputStream inputStream, String fileName) {
112 if (inputStream == null) {
113 return getContentType(fileName);
114 }
115
116 String contentType = null;
117
118 TikaInputStream tikaInputStream = null;
119
120 try {
121 tikaInputStream = TikaInputStream.get(
122 new CloseShieldInputStream(inputStream));
123
124 Metadata metadata = new Metadata();
125
126 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
127
128 MediaType mediaType = _detector.detect(tikaInputStream, metadata);
129
130 contentType = mediaType.toString();
131
132 if (contentType.contains("tika")) {
133 if (_log.isDebugEnabled()) {
134 _log.debug("Retrieved invalid content type " + contentType);
135 }
136
137 contentType = getContentType(fileName);
138 }
139
140 if (contentType.contains("tika")) {
141 if (_log.isDebugEnabled()) {
142 _log.debug("Retrieved invalid content type " + contentType);
143 }
144
145 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
146 }
147 }
148 catch (Exception e) {
149 _log.error(e, e);
150
151 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
152 }
153 finally {
154 StreamUtil.cleanUp(tikaInputStream);
155 }
156
157 return contentType;
158 }
159
160 @Override
161 public String getContentType(String fileName) {
162 if (Validator.isNull(fileName)) {
163 return ContentTypes.APPLICATION_OCTET_STREAM;
164 }
165
166 try {
167 Metadata metadata = new Metadata();
168
169 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
170
171 MediaType mediaType = _detector.detect(null, metadata);
172
173 String contentType = mediaType.toString();
174
175 if (!contentType.contains("tika")) {
176 return contentType;
177 }
178 else if (_log.isDebugEnabled()) {
179 _log.debug("Retrieved invalid content type " + contentType);
180 }
181 }
182 catch (Exception e) {
183 _log.error(e, e);
184 }
185
186 return ContentTypes.APPLICATION_OCTET_STREAM;
187 }
188
189 @Override
190 public String getExtensionContentType(String extension) {
191 if (Validator.isNull(extension)) {
192 return ContentTypes.APPLICATION_OCTET_STREAM;
193 }
194
195 return getContentType("A.".concat(extension));
196 }
197
198 @Override
199 public Set<String> getExtensions(String contentType) {
200 Set<String> extensions = _extensionsMap.get(contentType);
201
202 if (extensions == null) {
203 extensions = Collections.emptySet();
204 }
205
206 return extensions;
207 }
208
209 @Override
210 public boolean isWebImage(String mimeType) {
211 return _webImageMimeTypes.contains(mimeType);
212 }
213
214 protected void read(InputStream stream) throws Exception {
215 DocumentBuilderFactory documentBuilderFactory =
216 DocumentBuilderFactory.newInstance();
217
218 DocumentBuilder documentBuilder =
219 documentBuilderFactory.newDocumentBuilder();
220
221 Document document = documentBuilder.parse(new InputSource(stream));
222
223 Element element = document.getDocumentElement();
224
225 if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
226 throw new SystemException("Invalid configuration file");
227 }
228
229 NodeList nodeList = element.getChildNodes();
230
231 for (int i = 0; i < nodeList.getLength(); i++) {
232 Node node = nodeList.item(i);
233
234 if (node.getNodeType() != Node.ELEMENT_NODE) {
235 continue;
236 }
237
238 Element childElement = (Element)node;
239
240 if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
241 readMimeType(childElement);
242 }
243 }
244 }
245
246 protected void readMimeType(Element element) {
247 Set<String> mimeTypes = new HashSet<String>();
248
249 Set<String> extensions = new HashSet<String>();
250
251 String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
252
253 mimeTypes.add(name);
254
255 NodeList nodeList = element.getChildNodes();
256
257 for (int i = 0; i < nodeList.getLength(); i++) {
258 Node node = nodeList.item(i);
259
260 if (node.getNodeType() != Node.ELEMENT_NODE) {
261 continue;
262 }
263
264 Element childElement = (Element)node;
265
266 if (ALIAS_TAG.equals(childElement.getTagName())) {
267 String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
268
269 mimeTypes.add(alias);
270 }
271 else if (GLOB_TAG.equals(childElement.getTagName())) {
272 boolean isRegex = GetterUtil.getBoolean(
273 childElement.getAttribute(ISREGEX_ATTR));
274
275 if (isRegex) {
276 continue;
277 }
278
279 String pattern = childElement.getAttribute(PATTERN_ATTR);
280
281 if (!pattern.startsWith("*")) {
282 continue;
283 }
284
285 String extension = pattern.substring(1);
286
287 if (!extension.contains("*") && !extension.contains("?") &&
288 !extension.contains("[")) {
289
290 extensions.add(extension);
291 }
292 }
293 }
294
295 for (String mimeType : mimeTypes) {
296 _extensionsMap.put(mimeType, extensions);
297 }
298 }
299
300 private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
301
302 private Detector _detector;
303 private Map<String, Set<String>> _extensionsMap =
304 new HashMap<String, Set<String>>();
305 private Set<String> _webImageMimeTypes = new HashSet<String>();
306
307 }