001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.log.Log;
018    import com.liferay.portal.kernel.log.LogFactoryUtil;
019    import com.liferay.portal.kernel.search.BaseQuerySuggester;
020    import com.liferay.portal.kernel.search.DocumentImpl;
021    import com.liferay.portal.kernel.search.Field;
022    import com.liferay.portal.kernel.search.NGramHolder;
023    import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
024    import com.liferay.portal.kernel.search.SearchContext;
025    import com.liferay.portal.kernel.search.SearchException;
026    import com.liferay.portal.kernel.search.SuggestionConstants;
027    import com.liferay.portal.kernel.search.TokenizerUtil;
028    import com.liferay.portal.kernel.util.ArrayUtil;
029    import com.liferay.portal.util.PortletKeys;
030    
031    import java.io.IOException;
032    
033    import java.util.ArrayList;
034    import java.util.Arrays;
035    import java.util.Collections;
036    import java.util.Comparator;
037    import java.util.LinkedHashMap;
038    import java.util.List;
039    import java.util.Map;
040    
041    import org.apache.lucene.document.Document;
042    import org.apache.lucene.document.Fieldable;
043    import org.apache.lucene.index.IndexReader;
044    import org.apache.lucene.index.Term;
045    import org.apache.lucene.search.BooleanClause;
046    import org.apache.lucene.search.BooleanQuery;
047    import org.apache.lucene.search.IndexSearcher;
048    import org.apache.lucene.search.Query;
049    import org.apache.lucene.search.ScoreDoc;
050    import org.apache.lucene.search.TermQuery;
051    import org.apache.lucene.search.TopDocs;
052    import org.apache.lucene.search.spell.StringDistance;
053    import org.apache.lucene.search.spell.SuggestWord;
054    import org.apache.lucene.search.spell.SuggestWordQueue;
055    import org.apache.lucene.util.ReaderUtil;
056    
057    /**
058     * @author Michael C. Han
059     */
060    public class LuceneQuerySuggester extends BaseQuerySuggester {
061    
062            public void setBoostEnd(float boostEnd) {
063                    _boostEnd = boostEnd;
064            }
065    
066            public void setBoostStart(float boostStart) {
067                    _boostStart = boostStart;
068            }
069    
070            public void setQuerySuggestionMaxNGramLength(
071                    int querySuggestionMaxNGramLength) {
072    
073                    _querySuggestionMaxNGramLength = querySuggestionMaxNGramLength;
074            }
075    
076            public void setStringDistance(StringDistance stringDistance) {
077                    _stringDistance = stringDistance;
078            }
079    
080            public void setSuggestWordComparator(
081                    Comparator<SuggestWord> suggestWordComparator) {
082    
083                    _suggestWordComparator = suggestWordComparator;
084            }
085    
086            @Override
087            public Map<String, List<String>> spellCheckKeywords(
088                            SearchContext searchContext, int max)
089                    throws SearchException {
090    
091                    String languageId = searchContext.getLanguageId();
092    
093                    String localizedFieldName = DocumentImpl.getLocalizedName(
094                            languageId, Field.SPELL_CHECK_WORD);
095    
096                    List<String> keywords = TokenizerUtil.tokenize(
097                            localizedFieldName, searchContext.getKeywords(), languageId);
098    
099                    return spellCheckKeywords(
100                            keywords, localizedFieldName, searchContext, languageId, max);
101            }
102    
103            @Override
104            public String[] suggestKeywordQueries(SearchContext searchContext, int max)
105                    throws SearchException {
106    
107                    IndexSearcher indexSearcher = null;
108    
109                    try {
110                            indexSearcher = LuceneHelperUtil.getIndexSearcher(
111                                    searchContext.getCompanyId());
112    
113                            String localizedKeywordFieldName = DocumentImpl.getLocalizedName(
114                                    searchContext.getLanguageId(), Field.KEYWORD_SEARCH);
115    
116                            BooleanQuery suggestKeywordQuery = buildSpellCheckQuery(
117                                    searchContext.getGroupIds(), searchContext.getKeywords(),
118                                    searchContext.getLanguageId(),
119                                    SuggestionConstants.TYPE_QUERY_SUGGESTION,
120                                    _querySuggestionMaxNGramLength);
121    
122                            return search(
123                                    indexSearcher, suggestKeywordQuery, localizedKeywordFieldName,
124                                    _relevancyChecker, max);
125                    }
126                    catch (Exception e) {
127                            throw new SearchException("Unable to suggest query", e);
128                    }
129                    finally {
130                            try {
131                                    LuceneHelperUtil.releaseIndexSearcher(
132                                            searchContext.getCompanyId(), indexSearcher);
133                            }
134                            catch (IOException ioe) {
135                                    _log.error("Unable to release searcher", ioe);
136                            }
137                    }
138            }
139    
140            protected void addNGramTermQuery(
141                    BooleanQuery booleanQuery, Map<String, String> nGrams, Float boost,
142                    BooleanClause.Occur occur) {
143    
144                    for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
145                            String name = nGramEntry.getKey();
146                            String value = nGramEntry.getValue();
147    
148                            addTermQuery(booleanQuery, name, value, boost, occur);
149                    }
150            }
151    
152            protected void addTermQuery(
153                    BooleanQuery booleanQuery, String termName, String termValue,
154                    Float boost, BooleanClause.Occur occur) {
155    
156                    Query query = new TermQuery(new Term(termName, termValue));
157    
158                    if (boost != null) {
159                            query.setBoost(boost);
160                    }
161    
162                    BooleanClause booleanClause = new BooleanClause(query, occur);
163    
164                    booleanQuery.add(booleanClause);
165            }
166    
167            protected BooleanQuery buildGroupIdQuery(long[] groupIds) {
168                    BooleanQuery booleanQuery = new BooleanQuery();
169    
170                    addTermQuery(
171                            booleanQuery, Field.GROUP_ID, String.valueOf(0), null,
172                            BooleanClause.Occur.SHOULD);
173    
174                    if (ArrayUtil.isNotEmpty(groupIds)) {
175                            for (long groupId : groupIds) {
176                                    addTermQuery(
177                                            booleanQuery, Field.GROUP_ID, String.valueOf(groupId), null,
178                                            BooleanClause.Occur.SHOULD);
179                            }
180                    }
181    
182                    return booleanQuery;
183            }
184    
185            protected BooleanQuery buildNGramQuery(String word, int maxNGramLength)
186                    throws SearchException {
187    
188                    NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
189                            word, maxNGramLength);
190    
191                    BooleanQuery booleanQuery = new BooleanQuery();
192    
193                    if (_boostEnd > 0) {
194                            Map<String, String> nGramEnds = nGramHolder.getNGramEnds();
195    
196                            addNGramTermQuery(
197                                    booleanQuery, nGramEnds, _boostEnd, BooleanClause.Occur.SHOULD);
198                    }
199    
200                    Map<String, List<String>> nGrams = nGramHolder.getNGrams();
201    
202                    for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
203                            String fieldName = entry.getKey();
204    
205                            for (String nGram : entry.getValue()) {
206                                    addTermQuery(
207                                            booleanQuery, fieldName, nGram, null,
208                                            BooleanClause.Occur.SHOULD);
209                            }
210                    }
211    
212                    if (_boostStart > 0) {
213                            Map<String, String> nGramStarts = nGramHolder.getNGramStarts();
214    
215                            addNGramTermQuery(
216                                    booleanQuery, nGramStarts, _boostStart,
217                                    BooleanClause.Occur.SHOULD);
218                    }
219    
220                    return booleanQuery;
221            }
222    
223            protected BooleanQuery buildSpellCheckQuery(
224                            long groupIds[], String word, String languageId,
225                            String typeFieldValue, int maxNGramLength)
226                    throws SearchException {
227    
228                    BooleanQuery suggestWordQuery = new BooleanQuery();
229    
230                    BooleanQuery nGramQuery = buildNGramQuery(word, maxNGramLength);
231    
232                    BooleanClause booleanNGramQueryClause = new BooleanClause(
233                            nGramQuery, BooleanClause.Occur.MUST);
234    
235                    suggestWordQuery.add(booleanNGramQueryClause);
236    
237                    BooleanQuery groupIdQuery = buildGroupIdQuery(groupIds);
238    
239                    BooleanClause groupIdQueryClause = new BooleanClause(
240                            groupIdQuery, BooleanClause.Occur.MUST);
241    
242                    suggestWordQuery.add(groupIdQueryClause);
243    
244                    addTermQuery(
245                            suggestWordQuery, Field.LANGUAGE_ID, languageId, null,
246                            BooleanClause.Occur.MUST);
247                    addTermQuery(
248                            suggestWordQuery, Field.PORTLET_ID, PortletKeys.SEARCH, null,
249                            BooleanClause.Occur.MUST);
250                    addTermQuery(
251                            suggestWordQuery, Field.TYPE, typeFieldValue, null,
252                            BooleanClause.Occur.MUST);
253    
254                    return suggestWordQuery;
255            }
256    
257            protected String[] search(
258                            IndexSearcher indexSearcher, Query query, String fieldName,
259                            RelevancyChecker relevancyChecker, int max)
260                    throws IOException {
261    
262                    int maxScoreDocs = max * 10;
263    
264                    TopDocs topDocs = indexSearcher.search(query, null, maxScoreDocs);
265    
266                    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
267    
268                    SuggestWordQueue suggestWordQueue = new SuggestWordQueue(
269                            max, _suggestWordComparator);
270    
271                    int stop = Math.min(scoreDocs.length, maxScoreDocs);
272    
273                    for (int i = 0; i < stop; i++) {
274                            SuggestWord suggestWord = new SuggestWord();
275    
276                            Document document = indexSearcher.doc(scoreDocs[i].doc);
277    
278                            Fieldable fieldable = document.getFieldable(fieldName);
279    
280                            suggestWord.string = fieldable.stringValue();
281    
282                            boolean relevant = relevancyChecker.isRelevant(suggestWord);
283    
284                            if (relevant) {
285                                    suggestWordQueue.insertWithOverflow(suggestWord);
286                            }
287                    }
288    
289                    String[] words = new String[suggestWordQueue.size()];
290    
291                    for (int i = suggestWordQueue.size() - 1; i >= 0; i--) {
292                            SuggestWord suggestWord = suggestWordQueue.pop();
293    
294                            words[i] = suggestWord.string;
295                    }
296    
297                    return words;
298            }
299    
300            protected Map<String, List<String>> spellCheckKeywords(
301                            List<String> keywords, String localizedFieldName,
302                            SearchContext searchContext, String languageId, int max)
303                    throws SearchException {
304    
305                    IndexSearcher indexSearcher = null;
306    
307                    try {
308                            Map<String, List<String>> suggestions =
309                                    new LinkedHashMap<String, List<String>>();
310    
311                            float scoresThreshold = searchContext.getScoresThreshold();
312    
313                            if (scoresThreshold == 0) {
314                                    scoresThreshold = _SCORES_THRESHOLD_DEFAULT;
315                            }
316    
317                            indexSearcher = LuceneHelperUtil.getIndexSearcher(
318                                    searchContext.getCompanyId());
319    
320                            List<IndexReader> indexReaders = new ArrayList<IndexReader>();
321    
322                            if (indexSearcher.maxDoc() > 0) {
323                                    ReaderUtil.gatherSubReaders(
324                                            indexReaders, indexSearcher.getIndexReader());
325                            }
326    
327                            for (String keyword : keywords) {
328                                    List<String> suggestionsList = Collections.emptyList();
329    
330                                    if (!SpellCheckerUtil.isValidWord(
331                                                    localizedFieldName, keyword, indexReaders)) {
332    
333                                            int frequency = indexSearcher.docFreq(
334                                                    new Term(localizedFieldName, keyword));
335    
336                                            String[] suggestionsArray = null;
337    
338                                            if (frequency > 0) {
339                                                    suggestionsArray = new String[] {keyword};
340                                            }
341                                            else {
342                                                    BooleanQuery suggestWordQuery = buildSpellCheckQuery(
343                                                            searchContext.getGroupIds(), keyword, languageId,
344                                                            SuggestionConstants.TYPE_SPELL_CHECKER, 0);
345    
346                                                    RelevancyChecker relevancyChecker =
347                                                            new StringDistanceRelevancyChecker(
348                                                                    keyword, scoresThreshold, _stringDistance);
349    
350                                                    suggestionsArray = search(
351                                                            indexSearcher, suggestWordQuery, localizedFieldName,
352                                                            relevancyChecker, max);
353                                            }
354    
355                                            suggestionsList = Arrays.asList(suggestionsArray);
356                                    }
357    
358                                    suggestions.put(keyword, suggestionsList);
359                            }
360    
361                            return suggestions;
362                    }
363                    catch (IOException ioe) {
364                            throw new SearchException("Unable to find suggestions", ioe);
365                    }
366                    finally {
367                            try {
368                                    LuceneHelperUtil.releaseIndexSearcher(
369                                            searchContext.getCompanyId(), indexSearcher);
370                            }
371                            catch (IOException ioe) {
372                                    _log.error("Unable to release searcher", ioe);
373                            }
374                    }
375            }
376    
377            private static final float _SCORES_THRESHOLD_DEFAULT = 0.5f;
378    
379            private static Log _log = LogFactoryUtil.getLog(LuceneQuerySuggester.class);
380    
381            private float _boostEnd = 1.0f;
382            private float _boostStart = 2.0f;
383            private int _querySuggestionMaxNGramLength = 50;
384            private RelevancyChecker _relevancyChecker = new DefaultRelevancyChecker();
385            private StringDistance _stringDistance;
386            private Comparator<SuggestWord> _suggestWordComparator =
387                    SuggestWordQueue.DEFAULT_COMPARATOR;
388    
389    }