001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.liferay.portal.kernel.log.Log;
018 import com.liferay.portal.kernel.log.LogFactoryUtil;
019 import com.liferay.portal.kernel.search.BaseQuerySuggester;
020 import com.liferay.portal.kernel.search.DocumentImpl;
021 import com.liferay.portal.kernel.search.Field;
022 import com.liferay.portal.kernel.search.NGramHolder;
023 import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
024 import com.liferay.portal.kernel.search.SearchContext;
025 import com.liferay.portal.kernel.search.SearchException;
026 import com.liferay.portal.kernel.search.SuggestionConstants;
027 import com.liferay.portal.kernel.search.TokenizerUtil;
028 import com.liferay.portal.kernel.util.ArrayUtil;
029 import com.liferay.portal.util.PortletKeys;
030
031 import java.io.IOException;
032
033 import java.util.ArrayList;
034 import java.util.Arrays;
035 import java.util.Collections;
036 import java.util.Comparator;
037 import java.util.LinkedHashMap;
038 import java.util.List;
039 import java.util.Map;
040
041 import org.apache.lucene.document.Document;
042 import org.apache.lucene.document.Fieldable;
043 import org.apache.lucene.index.IndexReader;
044 import org.apache.lucene.index.Term;
045 import org.apache.lucene.search.BooleanClause;
046 import org.apache.lucene.search.BooleanQuery;
047 import org.apache.lucene.search.IndexSearcher;
048 import org.apache.lucene.search.Query;
049 import org.apache.lucene.search.ScoreDoc;
050 import org.apache.lucene.search.TermQuery;
051 import org.apache.lucene.search.TopDocs;
052 import org.apache.lucene.search.spell.StringDistance;
053 import org.apache.lucene.search.spell.SuggestWord;
054 import org.apache.lucene.search.spell.SuggestWordQueue;
055 import org.apache.lucene.util.ReaderUtil;
056
057
060 public class LuceneQuerySuggester extends BaseQuerySuggester {
061
062 public void setBoostEnd(float boostEnd) {
063 _boostEnd = boostEnd;
064 }
065
066 public void setBoostStart(float boostStart) {
067 _boostStart = boostStart;
068 }
069
070 public void setQuerySuggestionMaxNGramLength(
071 int querySuggestionMaxNGramLength) {
072
073 _querySuggestionMaxNGramLength = querySuggestionMaxNGramLength;
074 }
075
076 public void setStringDistance(StringDistance stringDistance) {
077 _stringDistance = stringDistance;
078 }
079
080 public void setSuggestWordComparator(
081 Comparator<SuggestWord> suggestWordComparator) {
082
083 _suggestWordComparator = suggestWordComparator;
084 }
085
086 @Override
087 public Map<String, List<String>> spellCheckKeywords(
088 SearchContext searchContext, int max)
089 throws SearchException {
090
091 String languageId = searchContext.getLanguageId();
092
093 String localizedFieldName = DocumentImpl.getLocalizedName(
094 languageId, Field.SPELL_CHECK_WORD);
095
096 List<String> keywords = TokenizerUtil.tokenize(
097 localizedFieldName, searchContext.getKeywords(), languageId);
098
099 return spellCheckKeywords(
100 keywords, localizedFieldName, searchContext, languageId, max);
101 }
102
103 @Override
104 public String[] suggestKeywordQueries(SearchContext searchContext, int max)
105 throws SearchException {
106
107 IndexSearcher indexSearcher = null;
108
109 try {
110 indexSearcher = LuceneHelperUtil.getIndexSearcher(
111 searchContext.getCompanyId());
112
113 String localizedKeywordFieldName = DocumentImpl.getLocalizedName(
114 searchContext.getLanguageId(), Field.KEYWORD_SEARCH);
115
116 BooleanQuery suggestKeywordQuery = buildSpellCheckQuery(
117 searchContext.getGroupIds(), searchContext.getKeywords(),
118 searchContext.getLanguageId(),
119 SuggestionConstants.TYPE_QUERY_SUGGESTION,
120 _querySuggestionMaxNGramLength);
121
122 return search(
123 indexSearcher, suggestKeywordQuery, localizedKeywordFieldName,
124 _relevancyChecker, max);
125 }
126 catch (Exception e) {
127 throw new SearchException("Unable to suggest query", e);
128 }
129 finally {
130 try {
131 LuceneHelperUtil.releaseIndexSearcher(
132 searchContext.getCompanyId(), indexSearcher);
133 }
134 catch (IOException ioe) {
135 _log.error("Unable to release searcher", ioe);
136 }
137 }
138 }
139
140 protected void addNGramTermQuery(
141 BooleanQuery booleanQuery, Map<String, String> nGrams, Float boost,
142 BooleanClause.Occur occur) {
143
144 for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
145 String name = nGramEntry.getKey();
146 String value = nGramEntry.getValue();
147
148 addTermQuery(booleanQuery, name, value, boost, occur);
149 }
150 }
151
152 protected void addTermQuery(
153 BooleanQuery booleanQuery, String termName, String termValue,
154 Float boost, BooleanClause.Occur occur) {
155
156 Query query = new TermQuery(new Term(termName, termValue));
157
158 if (boost != null) {
159 query.setBoost(boost);
160 }
161
162 BooleanClause booleanClause = new BooleanClause(query, occur);
163
164 booleanQuery.add(booleanClause);
165 }
166
167 protected BooleanQuery buildGroupIdQuery(long[] groupIds) {
168 BooleanQuery booleanQuery = new BooleanQuery();
169
170 addTermQuery(
171 booleanQuery, Field.GROUP_ID, String.valueOf(0), null,
172 BooleanClause.Occur.SHOULD);
173
174 if (ArrayUtil.isNotEmpty(groupIds)) {
175 for (long groupId : groupIds) {
176 addTermQuery(
177 booleanQuery, Field.GROUP_ID, String.valueOf(groupId), null,
178 BooleanClause.Occur.SHOULD);
179 }
180 }
181
182 return booleanQuery;
183 }
184
185 protected BooleanQuery buildNGramQuery(String word, int maxNGramLength)
186 throws SearchException {
187
188 NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
189 word, maxNGramLength);
190
191 BooleanQuery booleanQuery = new BooleanQuery();
192
193 if (_boostEnd > 0) {
194 Map<String, String> nGramEnds = nGramHolder.getNGramEnds();
195
196 addNGramTermQuery(
197 booleanQuery, nGramEnds, _boostEnd, BooleanClause.Occur.SHOULD);
198 }
199
200 Map<String, List<String>> nGrams = nGramHolder.getNGrams();
201
202 for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
203 String fieldName = entry.getKey();
204
205 for (String nGram : entry.getValue()) {
206 addTermQuery(
207 booleanQuery, fieldName, nGram, null,
208 BooleanClause.Occur.SHOULD);
209 }
210 }
211
212 if (_boostStart > 0) {
213 Map<String, String> nGramStarts = nGramHolder.getNGramStarts();
214
215 addNGramTermQuery(
216 booleanQuery, nGramStarts, _boostStart,
217 BooleanClause.Occur.SHOULD);
218 }
219
220 return booleanQuery;
221 }
222
223 protected BooleanQuery buildSpellCheckQuery(
224 long groupIds[], String word, String languageId,
225 String typeFieldValue, int maxNGramLength)
226 throws SearchException {
227
228 BooleanQuery suggestWordQuery = new BooleanQuery();
229
230 BooleanQuery nGramQuery = buildNGramQuery(word, maxNGramLength);
231
232 BooleanClause booleanNGramQueryClause = new BooleanClause(
233 nGramQuery, BooleanClause.Occur.MUST);
234
235 suggestWordQuery.add(booleanNGramQueryClause);
236
237 BooleanQuery groupIdQuery = buildGroupIdQuery(groupIds);
238
239 BooleanClause groupIdQueryClause = new BooleanClause(
240 groupIdQuery, BooleanClause.Occur.MUST);
241
242 suggestWordQuery.add(groupIdQueryClause);
243
244 addTermQuery(
245 suggestWordQuery, Field.LANGUAGE_ID, languageId, null,
246 BooleanClause.Occur.MUST);
247 addTermQuery(
248 suggestWordQuery, Field.PORTLET_ID, PortletKeys.SEARCH, null,
249 BooleanClause.Occur.MUST);
250 addTermQuery(
251 suggestWordQuery, Field.TYPE, typeFieldValue, null,
252 BooleanClause.Occur.MUST);
253
254 return suggestWordQuery;
255 }
256
257 protected String[] search(
258 IndexSearcher indexSearcher, Query query, String fieldName,
259 RelevancyChecker relevancyChecker, int max)
260 throws IOException {
261
262 int maxScoreDocs = max * 10;
263
264 TopDocs topDocs = indexSearcher.search(query, null, maxScoreDocs);
265
266 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
267
268 SuggestWordQueue suggestWordQueue = new SuggestWordQueue(
269 max, _suggestWordComparator);
270
271 int stop = Math.min(scoreDocs.length, maxScoreDocs);
272
273 for (int i = 0; i < stop; i++) {
274 SuggestWord suggestWord = new SuggestWord();
275
276 Document document = indexSearcher.doc(scoreDocs[i].doc);
277
278 Fieldable fieldable = document.getFieldable(fieldName);
279
280 suggestWord.string = fieldable.stringValue();
281
282 boolean relevant = relevancyChecker.isRelevant(suggestWord);
283
284 if (relevant) {
285 suggestWordQueue.insertWithOverflow(suggestWord);
286 }
287 }
288
289 String[] words = new String[suggestWordQueue.size()];
290
291 for (int i = suggestWordQueue.size() - 1; i >= 0; i--) {
292 SuggestWord suggestWord = suggestWordQueue.pop();
293
294 words[i] = suggestWord.string;
295 }
296
297 return words;
298 }
299
300 protected Map<String, List<String>> spellCheckKeywords(
301 List<String> keywords, String localizedFieldName,
302 SearchContext searchContext, String languageId, int max)
303 throws SearchException {
304
305 IndexSearcher indexSearcher = null;
306
307 try {
308 Map<String, List<String>> suggestions =
309 new LinkedHashMap<String, List<String>>();
310
311 float scoresThreshold = searchContext.getScoresThreshold();
312
313 if (scoresThreshold == 0) {
314 scoresThreshold = _SCORES_THRESHOLD_DEFAULT;
315 }
316
317 indexSearcher = LuceneHelperUtil.getIndexSearcher(
318 searchContext.getCompanyId());
319
320 List<IndexReader> indexReaders = new ArrayList<IndexReader>();
321
322 if (indexSearcher.maxDoc() > 0) {
323 ReaderUtil.gatherSubReaders(
324 indexReaders, indexSearcher.getIndexReader());
325 }
326
327 for (String keyword : keywords) {
328 List<String> suggestionsList = Collections.emptyList();
329
330 if (!SpellCheckerUtil.isValidWord(
331 localizedFieldName, keyword, indexReaders)) {
332
333 int frequency = indexSearcher.docFreq(
334 new Term(localizedFieldName, keyword));
335
336 String[] suggestionsArray = null;
337
338 if (frequency > 0) {
339 suggestionsArray = new String[] {keyword};
340 }
341 else {
342 BooleanQuery suggestWordQuery = buildSpellCheckQuery(
343 searchContext.getGroupIds(), keyword, languageId,
344 SuggestionConstants.TYPE_SPELL_CHECKER, 0);
345
346 RelevancyChecker relevancyChecker =
347 new StringDistanceRelevancyChecker(
348 keyword, scoresThreshold, _stringDistance);
349
350 suggestionsArray = search(
351 indexSearcher, suggestWordQuery, localizedFieldName,
352 relevancyChecker, max);
353 }
354
355 suggestionsList = Arrays.asList(suggestionsArray);
356 }
357
358 suggestions.put(keyword, suggestionsList);
359 }
360
361 return suggestions;
362 }
363 catch (IOException ioe) {
364 throw new SearchException("Unable to find suggestions", ioe);
365 }
366 finally {
367 try {
368 LuceneHelperUtil.releaseIndexSearcher(
369 searchContext.getCompanyId(), indexSearcher);
370 }
371 catch (IOException ioe) {
372 _log.error("Unable to release searcher", ioe);
373 }
374 }
375 }
376
377 private static final float _SCORES_THRESHOLD_DEFAULT = 0.5f;
378
379 private static Log _log = LogFactoryUtil.getLog(LuceneQuerySuggester.class);
380
381 private float _boostEnd = 1.0f;
382 private float _boostStart = 2.0f;
383 private int _querySuggestionMaxNGramLength = 50;
384 private RelevancyChecker _relevancyChecker = new DefaultRelevancyChecker();
385 private StringDistance _stringDistance;
386 private Comparator<SuggestWord> _suggestWordComparator =
387 SuggestWordQueue.DEFAULT_COMPARATOR;
388
389 }