001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.kernel.search;
016    
017    import com.liferay.portal.kernel.log.Log;
018    import com.liferay.portal.kernel.log.LogFactoryUtil;
019    import com.liferay.portal.kernel.util.StringPool;
020    import com.liferay.portal.util.PortletKeys;
021    
022    import java.io.InputStream;
023    
024    import java.util.HashSet;
025    import java.util.Iterator;
026    import java.util.List;
027    import java.util.Map;
028    import java.util.Set;
029    
030    /**
031     * @author Michael C. Han
032     */
033    public abstract class BaseGenericSpellCheckIndexWriter
034            extends BaseSpellCheckIndexWriter {
035    
036            public void setBatchSize(int batchSize) {
037                    _batchSize = batchSize;
038            }
039    
040            public void setDocumentPrototype(Document documentPrototype) {
041                    _documentPrototype = documentPrototype;
042            }
043    
044            public void setIndexWriter(IndexWriter indexWriter) {
045                    _indexWriter = indexWriter;
046            }
047    
048            protected void addNGramFields(
049                    Document document, Map<String, String> nGrams) {
050    
051                    for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
052                            document.addKeyword(nGramEntry.getKey(), nGramEntry.getValue());
053                    }
054            }
055    
056            protected Document createDocument(
057                            long companyId, long groupId, String languageId, String keywords,
058                            float weight, String keywordFieldName, String typeFieldValue,
059                            int maxNGramLength)
060                    throws SearchException {
061    
062                    Document document = (Document)_documentPrototype.clone();
063    
064                    document.addKeyword(Field.COMPANY_ID, companyId);
065                    document.addKeyword(Field.GROUP_ID, groupId);
066                    document.addKeyword(Field.LANGUAGE_ID, languageId);
067                    document.addKeyword(Field.PORTLET_ID, PortletKeys.SEARCH);
068                    document.addKeyword(Field.PRIORITY, String.valueOf(weight));
069                    document.addKeyword(keywordFieldName, keywords);
070                    document.addKeyword(Field.TYPE, typeFieldValue);
071                    document.addKeyword(Field.UID, getUID(companyId, languageId, keywords));
072    
073                    NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
074                            keywords, maxNGramLength);
075    
076                    addNGramFields(document, nGramHolder.getNGramEnds());
077    
078                    Map<String, List<String>> nGrams = nGramHolder.getNGrams();
079    
080                    for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
081                            String fieldName = entry.getKey();
082    
083                            for (String nGram : entry.getValue()) {
084                                    document.addKeyword(fieldName, nGram);
085                            }
086                    }
087    
088                    addNGramFields(document, nGramHolder.getNGramStarts());
089    
090                    return document;
091            }
092    
093            @Override
094            protected void indexKeyword(
095                            long companyId, long groupId, String languageId, String keyword,
096                            float weight, String keywordFieldName, String typeFieldValue,
097                            int maxNGramLength)
098                    throws Exception {
099    
100                    Document document = createDocument(
101                            companyId, groupId, languageId, keyword, weight, keywordFieldName,
102                            typeFieldValue, maxNGramLength);
103    
104                    _indexWriter.addDocument(null, document);
105            }
106    
107            @Override
108            protected void indexKeywords(
109                            long companyId, long groupId, String languageId,
110                            InputStream inputStream, String keywordFieldName,
111                            String typeFieldValue, int maxNGramLength)
112                    throws Exception {
113    
114                    Set<Document> documents = new HashSet<Document>();
115    
116                    try {
117                            DictionaryReader dictionaryReader = new DictionaryReader(
118                                    inputStream, StringPool.UTF8);
119    
120                            Iterator<DictionaryEntry> iterator =
121                                    dictionaryReader.getDictionaryEntriesIterator();
122    
123                            int counter = 0;
124    
125                            while (iterator.hasNext()) {
126                                    counter++;
127    
128                                    DictionaryEntry dictionaryEntry = iterator.next();
129    
130                                    Document document = createDocument(
131                                            companyId, groupId, languageId, dictionaryEntry.getWord(),
132                                            dictionaryEntry.getWeight(), keywordFieldName,
133                                            typeFieldValue, maxNGramLength);
134    
135                                    documents.add(document);
136    
137                                    if ((counter == _batchSize) || !iterator.hasNext()) {
138                                            _indexWriter.addDocuments(null, documents);
139    
140                                            documents.clear();
141    
142                                            counter = 0;
143                                    }
144                            }
145                    }
146                    catch (Exception e) {
147                            if (_log.isWarnEnabled()) {
148                                    _log.warn("Unable to index dictionaries", e);
149                            }
150    
151                            throw new SearchException(e.getMessage(), e);
152                    }
153            }
154    
155            private static final int _DEFAULT_BATCH_SIZE = 1000;
156    
157            private static Log _log = LogFactoryUtil.getLog(
158                    BaseGenericSpellCheckIndexWriter.class);
159    
160            private int _batchSize = _DEFAULT_BATCH_SIZE;
161            private Document _documentPrototype = new DocumentImpl();
162            private IndexWriter _indexWriter;
163    
164    }