001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.liferay.portal.kernel.log.Log;
018 import com.liferay.portal.kernel.log.LogFactoryUtil;
019 import com.liferay.portal.kernel.search.DocumentImpl;
020 import com.liferay.portal.kernel.search.SearchException;
021 import com.liferay.portal.kernel.search.Tokenizer;
022
023 import java.io.IOException;
024 import java.io.Reader;
025 import java.io.StringReader;
026
027 import java.util.ArrayList;
028 import java.util.LinkedHashMap;
029 import java.util.List;
030 import java.util.Map;
031 import java.util.regex.Pattern;
032
033 import org.apache.lucene.analysis.Analyzer;
034 import org.apache.lucene.analysis.TokenStream;
035 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
036 import org.apache.lucene.document.Fieldable;
037
038
042 public class PerFieldAnalyzer extends Analyzer implements Tokenizer {
043
044 public PerFieldAnalyzer(
045 Analyzer defaultAnalyzer, Map<String, Analyzer> analyzerMap) {
046
047 _analyzer = defaultAnalyzer;
048 _analyzers = analyzerMap;
049 }
050
051 public void addAnalyzer(String fieldName, Analyzer analyzer) {
052 _analyzers.put(fieldName, analyzer);
053 }
054
055 public Analyzer getAnalyzer(String fieldName) {
056 Analyzer analyzer = _analyzers.get(fieldName);
057
058 if (analyzer != null) {
059 return analyzer;
060 }
061
062 for (String key : _analyzers.keySet()) {
063 if (Pattern.matches(key, fieldName)) {
064 return _analyzers.get(key);
065 }
066 }
067
068 return _analyzer;
069 }
070
071 @Override
072 public int getOffsetGap(Fieldable field) {
073 Analyzer analyzer = getAnalyzer(field.name());
074
075 return analyzer.getOffsetGap(field);
076 }
077
078 @Override
079 public int getPositionIncrementGap(String fieldName) {
080 Analyzer analyzer = getAnalyzer(fieldName);
081
082 return analyzer.getPositionIncrementGap(fieldName);
083 }
084
085 @Override
086 public final TokenStream reusableTokenStream(
087 String fieldName, Reader reader)
088 throws IOException {
089
090 Analyzer analyzer = getAnalyzer(fieldName);
091
092 return analyzer.reusableTokenStream(fieldName, reader);
093 }
094
095 @Override
096 public List<String> tokenize(
097 String fieldName, String input, String languageId)
098 throws SearchException {
099
100 List<String> tokens = new ArrayList<String>();
101 TokenStream tokenStream = null;
102
103 try {
104 String localizedFieldName = DocumentImpl.getLocalizedName(
105 languageId, fieldName);
106
107 Analyzer analyzer = getAnalyzer(localizedFieldName);
108
109 tokenStream = analyzer.tokenStream(
110 localizedFieldName, new StringReader(input));
111
112 CharTermAttribute charTermAttribute = tokenStream.addAttribute(
113 CharTermAttribute.class);
114
115 tokenStream.reset();
116
117 while (tokenStream.incrementToken()) {
118 tokens.add(charTermAttribute.toString());
119 }
120
121 tokenStream.end();
122 }
123 catch (IOException ioe) {
124 throw new SearchException(ioe);
125 }
126 finally {
127 if (tokenStream != null) {
128 try {
129 tokenStream.close();
130 }
131 catch (IOException ioe) {
132 if (_log.isWarnEnabled()) {
133 _log.warn("Unable to close token stream", ioe);
134 }
135 }
136 }
137 }
138
139 return tokens;
140 }
141
142 @Override
143 public final TokenStream tokenStream(String fieldName, Reader reader) {
144 Analyzer analyzer = getAnalyzer(fieldName);
145
146 return analyzer.tokenStream(fieldName, reader);
147 }
148
149 private static Log _log = LogFactoryUtil.getLog(PerFieldAnalyzer.class);
150
151 private Analyzer _analyzer;
152 private Map<String, Analyzer> _analyzers =
153 new LinkedHashMap<String, Analyzer>();
154
155 }