001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.exception.SystemException;
018    import com.liferay.portal.kernel.log.Log;
019    import com.liferay.portal.kernel.log.LogFactoryUtil;
020    import com.liferay.portal.kernel.util.ContentTypes;
021    import com.liferay.portal.kernel.util.GetterUtil;
022    import com.liferay.portal.kernel.util.MimeTypes;
023    import com.liferay.portal.kernel.util.StreamUtil;
024    import com.liferay.portal.kernel.util.Validator;
025    
026    import java.io.File;
027    import java.io.FileNotFoundException;
028    import java.io.InputStream;
029    
030    import java.net.URL;
031    
032    import java.util.Collections;
033    import java.util.HashMap;
034    import java.util.HashSet;
035    import java.util.Map;
036    import java.util.Set;
037    
038    import javax.xml.parsers.DocumentBuilder;
039    import javax.xml.parsers.DocumentBuilderFactory;
040    
041    import org.apache.tika.detect.DefaultDetector;
042    import org.apache.tika.detect.Detector;
043    import org.apache.tika.io.CloseShieldInputStream;
044    import org.apache.tika.io.TikaInputStream;
045    import org.apache.tika.metadata.Metadata;
046    import org.apache.tika.mime.MediaType;
047    import org.apache.tika.mime.MimeTypesReaderMetKeys;
048    
049    import org.w3c.dom.Document;
050    import org.w3c.dom.Element;
051    import org.w3c.dom.Node;
052    import org.w3c.dom.NodeList;
053    
054    import org.xml.sax.InputSource;
055    
056    /**
057     * @author Jorge Ferrer
058     * @author Brian Wing Shun Chan
059     * @author Alexander Chow
060     */
061    public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
062    
063            public MimeTypesImpl() {
064                    _detector = new DefaultDetector(
065                            org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
066    
067                    URL url = org.apache.tika.mime.MimeTypes.class.getResource(
068                            "tika-mimetypes.xml");
069    
070                    try {
071                            read(url.openStream());
072                    }
073                    catch (Exception e) {
074                            _log.error("Unable to populate extensions map", e);
075                    }
076            }
077    
078            @Override
079            public String getContentType(File file) {
080                    return getContentType(file, file.getName());
081            }
082    
083            @Override
084            public String getContentType(File file, String fileName) {
085                    if ((file == null) || !file.exists()) {
086                            return getContentType(fileName);
087                    }
088    
089                    InputStream is = null;
090    
091                    try {
092                            is = TikaInputStream.get(file);
093    
094                            return getContentType(is, fileName);
095                    }
096                    catch (FileNotFoundException fnfe) {
097                            return getContentType(fileName);
098                    }
099                    finally {
100                            StreamUtil.cleanUp(is);
101                    }
102            }
103    
104            @Override
105            public String getContentType(InputStream inputStream, String fileName) {
106                    if (inputStream == null) {
107                            return getContentType(fileName);
108                    }
109    
110                    String contentType = null;
111    
112                    try {
113                            CloseShieldInputStream closeShieldInputStream =
114                                    new CloseShieldInputStream(inputStream);
115    
116                            Metadata metadata = new Metadata();
117    
118                            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
119    
120                            MediaType mediaType = _detector.detect(
121                                    TikaInputStream.get(closeShieldInputStream), metadata);
122    
123                            contentType = mediaType.toString();
124    
125                            if (contentType.contains("tika")) {
126                                    if (_log.isDebugEnabled()) {
127                                            _log.debug("Retrieved invalid content type " + contentType);
128                                    }
129    
130                                    contentType = getContentType(fileName);
131                            }
132    
133                            if (contentType.contains("tika")) {
134                                    if (_log.isDebugEnabled()) {
135                                            _log.debug("Retrieved invalid content type " + contentType);
136                                    }
137    
138                                    contentType = ContentTypes.APPLICATION_OCTET_STREAM;
139                            }
140                    }
141                    catch (Exception e) {
142                            _log.error(e, e);
143    
144                            contentType = ContentTypes.APPLICATION_OCTET_STREAM;
145                    }
146    
147                    return contentType;
148            }
149    
150            @Override
151            public String getContentType(String fileName) {
152                    if (Validator.isNull(fileName)) {
153                            return ContentTypes.APPLICATION_OCTET_STREAM;
154                    }
155    
156                    try {
157                            Metadata metadata = new Metadata();
158    
159                            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
160    
161                            MediaType mediaType = _detector.detect(null, metadata);
162    
163                            String contentType = mediaType.toString();
164    
165                            if (!contentType.contains("tika")) {
166                                    return contentType;
167                            }
168                            else if (_log.isDebugEnabled()) {
169                                    _log.debug("Retrieved invalid content type " + contentType);
170                            }
171                    }
172                    catch (Exception e) {
173                            _log.error(e, e);
174                    }
175    
176                    return ContentTypes.APPLICATION_OCTET_STREAM;
177            }
178    
179            @Override
180            public Set<String> getExtensions(String contentType) {
181                    Set<String> extensions = _extensionsMap.get(contentType);
182    
183                    if (extensions == null) {
184                            extensions = Collections.emptySet();
185                    }
186    
187                    return extensions;
188            }
189    
190            protected void read(InputStream stream) throws Exception {
191                    DocumentBuilderFactory documentBuilderFactory =
192                            DocumentBuilderFactory.newInstance();
193    
194                    DocumentBuilder documentBuilder =
195                            documentBuilderFactory.newDocumentBuilder();
196    
197                    Document document = documentBuilder.parse(new InputSource(stream));
198    
199                    Element element = document.getDocumentElement();
200    
201                    if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
202                            throw new SystemException("Invalid configuration file");
203                    }
204    
205                    NodeList nodeList = element.getChildNodes();
206    
207                    for (int i = 0; i < nodeList.getLength(); i++) {
208                            Node node = nodeList.item(i);
209    
210                            if (node.getNodeType() != Node.ELEMENT_NODE) {
211                                    continue;
212                            }
213    
214                            Element childElement = (Element)node;
215    
216                            if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
217                                    readMimeType(childElement);
218                            }
219                    }
220            }
221    
222            protected void readMimeType(Element element) {
223                    Set<String> mimeTypes = new HashSet<String>();
224    
225                    Set<String> extensions = new HashSet<String>();
226    
227                    String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
228    
229                    mimeTypes.add(name);
230    
231                    NodeList nodeList = element.getChildNodes();
232    
233                    for (int i = 0; i < nodeList.getLength(); i++) {
234                            Node node = nodeList.item(i);
235    
236                            if (node.getNodeType() != Node.ELEMENT_NODE) {
237                                    continue;
238                            }
239    
240                            Element childElement = (Element)node;
241    
242                            if (ALIAS_TAG.equals(childElement.getTagName())) {
243                                    String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
244    
245                                    mimeTypes.add(alias);
246                            }
247                            else if (GLOB_TAG.equals(childElement.getTagName())) {
248                                    boolean isRegex = GetterUtil.getBoolean(
249                                            childElement.getAttribute(ISREGEX_ATTR));
250    
251                                    if (isRegex) {
252                                            continue;
253                                    }
254    
255                                    String pattern = childElement.getAttribute(PATTERN_ATTR);
256    
257                                    if (!pattern.startsWith("*")) {
258                                            continue;
259                                    }
260    
261                                    String extension = pattern.substring(1);
262    
263                                    if (!extension.contains("*") && !extension.contains("?") &&
264                                            !extension.contains("[")) {
265    
266                                            extensions.add(extension);
267                                    }
268                            }
269                    }
270    
271                    for (String mimeType : mimeTypes) {
272                            _extensionsMap.put(mimeType, extensions);
273                    }
274            }
275    
276            private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
277    
278            private Detector _detector;
279            private Map<String, Set<String>> _extensionsMap =
280                    new HashMap<String, Set<String>>();
281    
282    }