001
014
015 package com.liferay.portal.kernel.search;
016
017 import com.liferay.portal.kernel.log.Log;
018 import com.liferay.portal.kernel.log.LogFactoryUtil;
019 import com.liferay.portal.kernel.util.StringPool;
020 import com.liferay.portal.util.PortletKeys;
021
022 import java.io.InputStream;
023
024 import java.util.HashSet;
025 import java.util.Iterator;
026 import java.util.List;
027 import java.util.Map;
028 import java.util.Set;
029
030
033 public abstract class BaseGenericSpellCheckIndexWriter
034 extends BaseSpellCheckIndexWriter {
035
036 public void setBatchSize(int batchSize) {
037 _batchSize = batchSize;
038 }
039
040 public void setDocumentPrototype(Document documentPrototype) {
041 _documentPrototype = documentPrototype;
042 }
043
044 public void setIndexWriter(IndexWriter indexWriter) {
045 _indexWriter = indexWriter;
046 }
047
048 protected void addNGramFields(
049 Document document, Map<String, String> nGrams) {
050
051 for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
052 document.addKeyword(nGramEntry.getKey(), nGramEntry.getValue());
053 }
054 }
055
056 protected Document createDocument(
057 long companyId, long groupId, String languageId, String keywords,
058 float weight, String keywordFieldName, String typeFieldValue,
059 int maxNGramLength)
060 throws SearchException {
061
062 Document document = (Document)_documentPrototype.clone();
063
064 document.addKeyword(Field.COMPANY_ID, companyId);
065 document.addKeyword(Field.GROUP_ID, groupId);
066 document.addKeyword(Field.LANGUAGE_ID, languageId);
067 document.addKeyword(Field.PORTLET_ID, PortletKeys.SEARCH);
068 document.addKeyword(Field.PRIORITY, String.valueOf(weight));
069 document.addKeyword(keywordFieldName, keywords);
070 document.addKeyword(Field.TYPE, typeFieldValue);
071 document.addKeyword(Field.UID, getUID(companyId, languageId, keywords));
072
073 NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
074 keywords, maxNGramLength);
075
076 addNGramFields(document, nGramHolder.getNGramEnds());
077
078 Map<String, List<String>> nGrams = nGramHolder.getNGrams();
079
080 for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
081 String fieldName = entry.getKey();
082
083 for (String nGram : entry.getValue()) {
084 document.addKeyword(fieldName, nGram);
085 }
086 }
087
088 addNGramFields(document, nGramHolder.getNGramStarts());
089
090 return document;
091 }
092
093 @Override
094 protected void indexKeyword(
095 long companyId, long groupId, String languageId, String keyword,
096 float weight, String keywordFieldName, String typeFieldValue,
097 int maxNGramLength)
098 throws Exception {
099
100 Document document = createDocument(
101 companyId, groupId, languageId, keyword, weight, keywordFieldName,
102 typeFieldValue, maxNGramLength);
103
104 _indexWriter.addDocument(null, document);
105 }
106
107 @Override
108 protected void indexKeywords(
109 long companyId, long groupId, String languageId,
110 InputStream inputStream, String keywordFieldName,
111 String typeFieldValue, int maxNGramLength)
112 throws Exception {
113
114 Set<Document> documents = new HashSet<Document>();
115
116 try {
117 DictionaryReader dictionaryReader = new DictionaryReader(
118 inputStream, StringPool.UTF8);
119
120 Iterator<DictionaryEntry> iterator =
121 dictionaryReader.getDictionaryEntriesIterator();
122
123 int counter = 0;
124
125 while (iterator.hasNext()) {
126 counter++;
127
128 DictionaryEntry dictionaryEntry = iterator.next();
129
130 Document document = createDocument(
131 companyId, groupId, languageId, dictionaryEntry.getWord(),
132 dictionaryEntry.getWeight(), keywordFieldName,
133 typeFieldValue, maxNGramLength);
134
135 documents.add(document);
136
137 if ((counter == _batchSize) || !iterator.hasNext()) {
138 _indexWriter.addDocuments(null, documents);
139
140 documents.clear();
141
142 counter = 0;
143 }
144 }
145 }
146 catch (Exception e) {
147 if (_log.isWarnEnabled()) {
148 _log.warn("Unable to index dictionaries", e);
149 }
150
151 throw new SearchException(e.getMessage(), e);
152 }
153 }
154
155 private static final int _DEFAULT_BATCH_SIZE = 1000;
156
157 private static Log _log = LogFactoryUtil.getLog(
158 BaseGenericSpellCheckIndexWriter.class);
159
160 private int _batchSize = _DEFAULT_BATCH_SIZE;
161 private Document _documentPrototype = new DocumentImpl();
162 private IndexWriter _indexWriter;
163
164 }