001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.kernel.search;
016    
017    import com.liferay.portal.kernel.configuration.Filter;
018    import com.liferay.portal.kernel.log.Log;
019    import com.liferay.portal.kernel.log.LogFactoryUtil;
020    import com.liferay.portal.kernel.nio.charset.CharsetEncoderUtil;
021    import com.liferay.portal.kernel.util.ArrayUtil;
022    import com.liferay.portal.kernel.util.Base64;
023    import com.liferay.portal.kernel.util.Digester;
024    import com.liferay.portal.kernel.util.DigesterUtil;
025    import com.liferay.portal.kernel.util.PortalClassLoaderUtil;
026    import com.liferay.portal.kernel.util.PropsKeys;
027    import com.liferay.portal.kernel.util.PropsUtil;
028    import com.liferay.portal.kernel.util.StreamUtil;
029    import com.liferay.portal.kernel.util.StringBundler;
030    import com.liferay.portal.kernel.util.StringPool;
031    import com.liferay.portal.kernel.util.StringUtil;
032    import com.liferay.portal.model.Group;
033    import com.liferay.portal.service.GroupLocalServiceUtil;
034    import com.liferay.portal.util.PortletKeys;
035    
036    import java.io.InputStream;
037    
038    import java.net.URL;
039    
040    import java.nio.CharBuffer;
041    import java.nio.charset.CharsetEncoder;
042    
043    import java.util.List;
044    
045    /**
046     * @author Michael C. Han
047     */
048    public abstract class BaseSpellCheckIndexWriter
049            implements SpellCheckIndexWriter {
050    
051            @Override
052            public void indexKeyword(
053                            SearchContext searchContext, float weight, String keywordType)
054                    throws SearchException {
055    
056                    if (!keywordType.equals(SuggestionConstants.TYPE_QUERY_SUGGESTION) &&
057                            !keywordType.equals(SuggestionConstants.TYPE_SPELL_CHECKER)) {
058    
059                            throw new IllegalArgumentException(
060                                    "Invalid keyword type " + keywordType);
061                    }
062    
063                    long groupId = 0;
064    
065                    long[] groupIds = searchContext.getGroupIds();
066    
067                    if ((groupIds != null) && (groupIds.length > 0)) {
068                            groupId = groupIds[1];
069                    }
070    
071                    String keywordFieldName = Field.KEYWORD_SEARCH;
072                    String typeFieldValue = SuggestionConstants.TYPE_QUERY_SUGGESTION;
073                    int maxNGramLength = _querySuggestionMaxNGramLength;
074    
075                    if (keywordType.equals(SuggestionConstants.TYPE_SPELL_CHECKER)) {
076                            keywordFieldName = Field.SPELL_CHECK_WORD;
077                            typeFieldValue = SuggestionConstants.TYPE_SPELL_CHECKER;
078                            maxNGramLength = 0;
079                    }
080    
081                    try {
082                            indexKeyword(
083                                    searchContext.getCompanyId(), groupId,
084                                    searchContext.getLanguageId(), searchContext.getKeywords(),
085                                    weight, keywordFieldName, typeFieldValue, maxNGramLength);
086                    }
087                    catch (Exception e) {
088                            throw new SearchException(e);
089                    }
090            }
091    
092            @Override
093            public void indexQuerySuggestionDictionaries(SearchContext searchContext)
094                    throws SearchException {
095    
096                    try {
097                            for (String languageId : _SUPPORTED_LOCALES) {
098                                    indexKeywords(
099                                            searchContext.getCompanyId(), languageId,
100                                            PropsKeys.INDEX_SEARCH_QUERY_SUGGESTION_DICTIONARY,
101                                            Field.KEYWORD_SEARCH,
102                                            SuggestionConstants.TYPE_QUERY_SUGGESTION,
103                                            _querySuggestionMaxNGramLength);
104                            }
105                    }
106                    catch (Exception e) {
107                            throw new SearchException(e);
108                    }
109            }
110    
111            @Override
112            public void indexQuerySuggestionDictionary(SearchContext searchContext)
113                    throws SearchException {
114    
115                    try {
116                            indexKeywords(
117                                    searchContext.getCompanyId(), searchContext.getLanguageId(),
118                                    PropsKeys.INDEX_SEARCH_QUERY_SUGGESTION_DICTIONARY,
119                                    Field.KEYWORD_SEARCH, SuggestionConstants.TYPE_QUERY_SUGGESTION,
120                                    _querySuggestionMaxNGramLength);
121                    }
122                    catch (Exception e) {
123                            throw new SearchException(e);
124                    }
125            }
126    
127            @Override
128            public void indexSpellCheckerDictionaries(SearchContext searchContext)
129                    throws SearchException {
130    
131                    try {
132                            for (String languageId : _SUPPORTED_LOCALES) {
133                                    indexKeywords(
134                                            searchContext.getCompanyId(), languageId,
135                                            PropsKeys.INDEX_SEARCH_SPELL_CHECKER_DICTIONARY,
136                                            Field.SPELL_CHECK_WORD,
137                                            SuggestionConstants.TYPE_SPELL_CHECKER, 0);
138                            }
139                    }
140                    catch (Exception e) {
141                            throw new SearchException(e);
142                    }
143            }
144    
145            @Override
146            public void indexSpellCheckerDictionary(SearchContext searchContext)
147                    throws SearchException {
148    
149                    try {
150                            indexKeywords(
151                                    searchContext.getCompanyId(), searchContext.getLanguageId(),
152                                    PropsKeys.INDEX_SEARCH_SPELL_CHECKER_DICTIONARY,
153                                    Field.SPELL_CHECK_WORD, SuggestionConstants.TYPE_SPELL_CHECKER,
154                                    0);
155                    }
156                    catch (Exception e) {
157                            throw new SearchException(e);
158                    }
159            }
160    
161            public void setQuerySuggestionMaxNGramLength(
162                    int querySuggestionMaxNGramLength) {
163    
164                    _querySuggestionMaxNGramLength = querySuggestionMaxNGramLength;
165            }
166    
167            protected URL getResource(String name) {
168                    ClassLoader contextClassLoader =
169                            Thread.currentThread().getContextClassLoader();
170    
171                    URL url = contextClassLoader.getResource(name);
172    
173                    if (url == null) {
174                            ClassLoader portalClassLoader =
175                                    PortalClassLoaderUtil.getClassLoader();
176    
177                            url = portalClassLoader.getResource(name);
178                    }
179    
180                    return url;
181            }
182    
183            protected String getUID(
184                    long companyId, String languageId, String word, String... parameters) {
185    
186                    StringBundler uidSB = new StringBundler();
187    
188                    uidSB.append(String.valueOf(companyId));
189                    uidSB.append(StringPool.UNDERLINE);
190                    uidSB.append(PortletKeys.SEARCH);
191                    uidSB.append(_PORTLET_SEPARATOR);
192    
193                    int length = 4;
194    
195                    if (parameters != null) {
196                            length += parameters.length;
197                    }
198    
199                    try {
200                            CharsetEncoder charsetEncoder =
201                                    CharsetEncoderUtil.getCharsetEncoder(StringPool.UTF8);
202    
203                            StringBundler keySB = new StringBundler(length);
204    
205                            keySB.append(languageId);
206                            keySB.append(StringPool.UNDERLINE);
207                            keySB.append(word);
208                            keySB.append(StringPool.UNDERLINE);
209    
210                            keySB.append(StringUtil.toLowerCase(word));
211    
212                            if (parameters != null) {
213                                    for (String parameter : parameters) {
214                                            keySB.append(parameter);
215                                            keySB.append(StringPool.UNDERLINE);
216                                    }
217                            }
218    
219                            String key = keySB.toString();
220    
221                            byte[] bytes = DigesterUtil.digestRaw(
222                                    Digester.MD5, charsetEncoder.encode(CharBuffer.wrap(key)));
223    
224                            uidSB.append(Base64.encode(bytes));
225                    }
226                    catch (Exception e) {
227                            throw new IllegalStateException(e);
228                    }
229    
230                    return uidSB.toString();
231            }
232    
233            protected abstract void indexKeyword(
234                            long companyId, long groupId, String languageId, String keyword,
235                            float weight, String keywordFieldName, String typeFieldValue,
236                            int maxNGramLength)
237                    throws Exception;
238    
239            protected abstract void indexKeywords(
240                            long companyId, long groupId, String languageId,
241                            InputStream inputStream, String keywordFieldName,
242                            String typeFieldValue, int maxNGramLength)
243                    throws Exception;
244    
245            protected void indexKeywords(
246                            long companyId, long groupId, String languageId,
247                            String[] dictionaryFileNames, String keywordFieldName,
248                            String typeFieldValue, int maxNGramLength)
249                    throws Exception {
250    
251                    for (String dictionaryFileName : dictionaryFileNames) {
252                            InputStream inputStream = null;
253    
254                            if (_log.isInfoEnabled()) {
255                                    _log.info(
256                                            "Start indexing dictionary for " + dictionaryFileName);
257                            }
258    
259                            try {
260                                    URL url = getResource(dictionaryFileName);
261    
262                                    if (url == null) {
263                                            if (_log.isWarnEnabled()) {
264                                                    _log.warn("Unable to read " + dictionaryFileName);
265                                            }
266    
267                                            continue;
268                                    }
269    
270                                    inputStream = url.openStream();
271    
272                                    if (inputStream == null) {
273                                            if (_log.isWarnEnabled()) {
274                                                    _log.warn("Unable to read " + dictionaryFileName);
275                                            }
276    
277                                            continue;
278                                    }
279    
280                                    indexKeywords(
281                                            companyId, groupId, languageId, inputStream,
282                                            keywordFieldName, typeFieldValue, maxNGramLength);
283                            }
284                            finally {
285                                    StreamUtil.cleanUp(inputStream);
286                            }
287    
288                            if (_log.isInfoEnabled()) {
289                                    _log.info(
290                                            "Finished indexing dictionary for " + dictionaryFileName);
291                            }
292                    }
293            }
294    
295            protected void indexKeywords(
296                            long companyId, String languageId, String propsKey,
297                            String keywordFieldName, String typeFieldValue, int maxNGramLength)
298                    throws Exception {
299    
300                    String[] dictionaryFileNames = PropsUtil.getArray(
301                            propsKey, new Filter(languageId));
302    
303                    indexKeywords(
304                            companyId, 0, languageId, dictionaryFileNames, keywordFieldName,
305                            typeFieldValue, maxNGramLength);
306    
307                    List<Group> groups = GroupLocalServiceUtil.getLiveGroups();
308    
309                    for (Group group : groups) {
310                            String[] groupDictionaryFileNames = PropsUtil.getArray(
311                                    propsKey,
312                                    new Filter(languageId, String.valueOf(group.getGroupId())));
313    
314                            if (ArrayUtil.isEmpty(groupDictionaryFileNames)) {
315                                    continue;
316                            }
317    
318                            indexKeywords(
319                                    companyId, group.getGroupId(), languageId,
320                                    groupDictionaryFileNames, keywordFieldName, typeFieldValue,
321                                    maxNGramLength);
322                    }
323            }
324    
325            private static final String _PORTLET_SEPARATOR = "_PORTLET_";
326    
327            private static final String[] _SUPPORTED_LOCALES = StringUtil.split(
328                    PropsUtil.get(PropsKeys.INDEX_SEARCH_SPELL_CHECKER_SUPPORTED_LOCALES));
329    
330            private static Log _log = LogFactoryUtil.getLog(
331                    BaseSpellCheckIndexWriter.class);
332    
333            private int _querySuggestionMaxNGramLength = 50;
334    
335    }