001
014
015 package com.liferay.portal.search;
016
017 import com.liferay.portal.kernel.search.NGramHolder;
018 import com.liferay.portal.kernel.search.NGramHolderBuilder;
019 import com.liferay.portal.kernel.search.SearchException;
020
021 import java.io.StringReader;
022
023 import org.apache.lucene.analysis.ngram.NGramTokenizer;
024 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
025 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
026
027
030 public class NGramHolderBuilderImpl implements NGramHolderBuilder {
031
032 @Override
033 public NGramHolder buildNGramHolder(String input) throws SearchException {
034 return buildNGramHolder(
035 input, getNGramMinLength(input.length()),
036 getNGramMaxLength(input.length()));
037 }
038
039 @Override
040 public NGramHolder buildNGramHolder(String input, int nGramMaxLength)
041 throws SearchException {
042
043 if (nGramMaxLength <= 0) {
044 nGramMaxLength = getNGramMaxLength(input.length());
045 }
046
047 return buildNGramHolder(
048 input, getNGramMinLength(input.length()), nGramMaxLength);
049 }
050
051 @Override
052 public NGramHolder buildNGramHolder(
053 String input, int nGramMinLength, int nGramMaxLength)
054 throws SearchException {
055
056 try {
057 NGramHolder nGramHolder = new NGramHolder();
058
059 NGramTokenizer nGramTokenizer = new NGramTokenizer(
060 new StringReader(input), nGramMinLength, nGramMaxLength);
061
062 CharTermAttribute charTermAttribute = nGramTokenizer.getAttribute(
063 CharTermAttribute.class);
064
065 OffsetAttribute offsetAttribute = nGramTokenizer.getAttribute(
066 OffsetAttribute.class);
067
068 while (nGramTokenizer.incrementToken()) {
069 String nGram = charTermAttribute.toString();
070
071 int currentNGramSize = charTermAttribute.length();
072
073 if ((currentNGramSize >= nGramMinLength) &&
074 (currentNGramSize <= nGramMaxLength)) {
075
076 if (offsetAttribute.startOffset() == 0) {
077 nGramHolder.addNGramStart(currentNGramSize, nGram);
078 }
079 else if (offsetAttribute.endOffset() == input.length()) {
080 nGramHolder.addNGramEnd(currentNGramSize, nGram);
081 }
082 else {
083 nGramHolder.addNGram(currentNGramSize, nGram);
084 }
085 }
086 }
087
088 return nGramHolder;
089 }
090 catch (Exception e) {
091 throw new SearchException(e);
092 }
093 }
094
095 protected int getNGramMaxLength(int length) {
096 if (length > 5) {
097 return 4;
098 }
099 else if (length == 5) {
100 return 3;
101 }
102
103 return 2;
104 }
105
106 protected int getNGramMinLength(int length) {
107 if (length > 5) {
108 return 3;
109 }
110 else if (length == 5) {
111 return 2;
112 }
113
114 return 1;
115 }
116
117 }