001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.exception.SystemException;
018    import com.liferay.portal.kernel.log.Log;
019    import com.liferay.portal.kernel.log.LogFactoryUtil;
020    import com.liferay.portal.kernel.util.ContentTypes;
021    import com.liferay.portal.kernel.util.GetterUtil;
022    import com.liferay.portal.kernel.util.MimeTypes;
023    import com.liferay.portal.kernel.util.SetUtil;
024    import com.liferay.portal.kernel.util.StreamUtil;
025    import com.liferay.portal.kernel.util.Validator;
026    
027    import java.io.File;
028    import java.io.FileNotFoundException;
029    import java.io.InputStream;
030    
031    import java.net.URL;
032    
033    import java.util.Collections;
034    import java.util.HashMap;
035    import java.util.HashSet;
036    import java.util.Map;
037    import java.util.Set;
038    
039    import javax.xml.parsers.DocumentBuilder;
040    import javax.xml.parsers.DocumentBuilderFactory;
041    
042    import org.apache.tika.detect.DefaultDetector;
043    import org.apache.tika.detect.Detector;
044    import org.apache.tika.io.CloseShieldInputStream;
045    import org.apache.tika.io.TikaInputStream;
046    import org.apache.tika.metadata.Metadata;
047    import org.apache.tika.mime.MediaType;
048    import org.apache.tika.mime.MimeTypesReaderMetKeys;
049    
050    import org.w3c.dom.Document;
051    import org.w3c.dom.Element;
052    import org.w3c.dom.Node;
053    import org.w3c.dom.NodeList;
054    
055    import org.xml.sax.InputSource;
056    
057    /**
058     * @author Jorge Ferrer
059     * @author Brian Wing Shun Chan
060     * @author Alexander Chow
061     */
062    public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
063    
064            public MimeTypesImpl() {
065                    _detector = new DefaultDetector(
066                            org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
067    
068                    _webImageMimeTypes = SetUtil.fromArray(
069                            PropsValues.MIME_TYPES_WEB_IMAGES);
070            }
071    
072            public void afterPropertiesSet() {
073                    URL url = org.apache.tika.mime.MimeTypes.class.getResource(
074                            "tika-mimetypes.xml");
075    
076                    try {
077                            read(url.openStream());
078                    }
079                    catch (Exception e) {
080                            _log.error("Unable to populate extensions map", e);
081                    }
082            }
083    
084            @Override
085            public String getContentType(File file) {
086                    return getContentType(file, file.getName());
087            }
088    
089            @Override
090            public String getContentType(File file, String fileName) {
091                    if ((file == null) || !file.exists()) {
092                            return getContentType(fileName);
093                    }
094    
095                    InputStream is = null;
096    
097                    try {
098                            is = TikaInputStream.get(file);
099    
100                            return getContentType(is, fileName);
101                    }
102                    catch (FileNotFoundException fnfe) {
103                            return getContentType(fileName);
104                    }
105                    finally {
106                            StreamUtil.cleanUp(is);
107                    }
108            }
109    
110            @Override
111            public String getContentType(InputStream inputStream, String fileName) {
112                    if (inputStream == null) {
113                            return getContentType(fileName);
114                    }
115    
116                    String contentType = null;
117    
118                    TikaInputStream tikaInputStream = null;
119    
120                    try {
121                            tikaInputStream = TikaInputStream.get(
122                                    new CloseShieldInputStream(inputStream));
123    
124                            Metadata metadata = new Metadata();
125    
126                            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
127    
128                            MediaType mediaType = _detector.detect(tikaInputStream, metadata);
129    
130                            contentType = mediaType.toString();
131    
132                            if (contentType.contains("tika")) {
133                                    if (_log.isDebugEnabled()) {
134                                            _log.debug("Retrieved invalid content type " + contentType);
135                                    }
136    
137                                    contentType = getContentType(fileName);
138                            }
139    
140                            if (contentType.contains("tika")) {
141                                    if (_log.isDebugEnabled()) {
142                                            _log.debug("Retrieved invalid content type " + contentType);
143                                    }
144    
145                                    contentType = ContentTypes.APPLICATION_OCTET_STREAM;
146                            }
147                    }
148                    catch (Exception e) {
149                            _log.error(e, e);
150    
151                            contentType = ContentTypes.APPLICATION_OCTET_STREAM;
152                    }
153                    finally {
154                            StreamUtil.cleanUp(tikaInputStream);
155                    }
156    
157                    return contentType;
158            }
159    
160            @Override
161            public String getContentType(String fileName) {
162                    if (Validator.isNull(fileName)) {
163                            return ContentTypes.APPLICATION_OCTET_STREAM;
164                    }
165    
166                    try {
167                            Metadata metadata = new Metadata();
168    
169                            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
170    
171                            MediaType mediaType = _detector.detect(null, metadata);
172    
173                            String contentType = mediaType.toString();
174    
175                            if (!contentType.contains("tika")) {
176                                    return contentType;
177                            }
178                            else if (_log.isDebugEnabled()) {
179                                    _log.debug("Retrieved invalid content type " + contentType);
180                            }
181                    }
182                    catch (Exception e) {
183                            _log.error(e, e);
184                    }
185    
186                    return ContentTypes.APPLICATION_OCTET_STREAM;
187            }
188    
189            @Override
190            public String getExtensionContentType(String extension) {
191                    if (Validator.isNull(extension)) {
192                            return ContentTypes.APPLICATION_OCTET_STREAM;
193                    }
194    
195                    return getContentType("A.".concat(extension));
196            }
197    
198            @Override
199            public Set<String> getExtensions(String contentType) {
200                    Set<String> extensions = _extensionsMap.get(contentType);
201    
202                    if (extensions == null) {
203                            extensions = Collections.emptySet();
204                    }
205    
206                    return extensions;
207            }
208    
209            @Override
210            public boolean isWebImage(String mimeType) {
211                    return _webImageMimeTypes.contains(mimeType);
212            }
213    
214            protected void read(InputStream stream) throws Exception {
215                    DocumentBuilderFactory documentBuilderFactory =
216                            DocumentBuilderFactory.newInstance();
217    
218                    DocumentBuilder documentBuilder =
219                            documentBuilderFactory.newDocumentBuilder();
220    
221                    Document document = documentBuilder.parse(new InputSource(stream));
222    
223                    Element element = document.getDocumentElement();
224    
225                    if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
226                            throw new SystemException("Invalid configuration file");
227                    }
228    
229                    NodeList nodeList = element.getChildNodes();
230    
231                    for (int i = 0; i < nodeList.getLength(); i++) {
232                            Node node = nodeList.item(i);
233    
234                            if (node.getNodeType() != Node.ELEMENT_NODE) {
235                                    continue;
236                            }
237    
238                            Element childElement = (Element)node;
239    
240                            if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
241                                    readMimeType(childElement);
242                            }
243                    }
244            }
245    
246            protected void readMimeType(Element element) {
247                    Set<String> mimeTypes = new HashSet<String>();
248    
249                    Set<String> extensions = new HashSet<String>();
250    
251                    String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
252    
253                    mimeTypes.add(name);
254    
255                    NodeList nodeList = element.getChildNodes();
256    
257                    for (int i = 0; i < nodeList.getLength(); i++) {
258                            Node node = nodeList.item(i);
259    
260                            if (node.getNodeType() != Node.ELEMENT_NODE) {
261                                    continue;
262                            }
263    
264                            Element childElement = (Element)node;
265    
266                            if (ALIAS_TAG.equals(childElement.getTagName())) {
267                                    String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
268    
269                                    mimeTypes.add(alias);
270                            }
271                            else if (GLOB_TAG.equals(childElement.getTagName())) {
272                                    boolean isRegex = GetterUtil.getBoolean(
273                                            childElement.getAttribute(ISREGEX_ATTR));
274    
275                                    if (isRegex) {
276                                            continue;
277                                    }
278    
279                                    String pattern = childElement.getAttribute(PATTERN_ATTR);
280    
281                                    if (!pattern.startsWith("*")) {
282                                            continue;
283                                    }
284    
285                                    String extension = pattern.substring(1);
286    
287                                    if (!extension.contains("*") && !extension.contains("?") &&
288                                            !extension.contains("[")) {
289    
290                                            extensions.add(extension);
291                                    }
292                            }
293                    }
294    
295                    for (String mimeType : mimeTypes) {
296                            _extensionsMap.put(mimeType, extensions);
297                    }
298            }
299    
300            private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
301    
302            private Detector _detector;
303            private Map<String, Set<String>> _extensionsMap =
304                    new HashMap<String, Set<String>>();
305            private Set<String> _webImageMimeTypes = new HashSet<String>();
306    
307    }