001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search;
016    
017    import com.liferay.portal.kernel.search.NGramHolder;
018    import com.liferay.portal.kernel.search.NGramHolderBuilder;
019    import com.liferay.portal.kernel.search.SearchException;
020    
021    import java.io.StringReader;
022    
023    import org.apache.lucene.analysis.ngram.NGramTokenizer;
024    import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
025    import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
026    
027    /**
028     * @author Michael C. Han
029     */
030    public class NGramHolderBuilderImpl implements NGramHolderBuilder {
031    
032            @Override
033            public NGramHolder buildNGramHolder(String input) throws SearchException {
034                    return buildNGramHolder(
035                            input, getNGramMinLength(input.length()),
036                            getNGramMaxLength(input.length()));
037            }
038    
039            @Override
040            public NGramHolder buildNGramHolder(String input, int nGramMaxLength)
041                    throws SearchException {
042    
043                    if (nGramMaxLength <= 0) {
044                            nGramMaxLength = getNGramMaxLength(input.length());
045                    }
046    
047                    return buildNGramHolder(
048                            input, getNGramMinLength(input.length()), nGramMaxLength);
049            }
050    
051            @Override
052            public NGramHolder buildNGramHolder(
053                            String input, int nGramMinLength, int nGramMaxLength)
054                    throws SearchException {
055    
056                    try {
057                            NGramHolder nGramHolder = new NGramHolder();
058    
059                            NGramTokenizer nGramTokenizer = new NGramTokenizer(
060                                    new StringReader(input), nGramMinLength, nGramMaxLength);
061    
062                            CharTermAttribute charTermAttribute = nGramTokenizer.getAttribute(
063                                    CharTermAttribute.class);
064    
065                            OffsetAttribute offsetAttribute = nGramTokenizer.getAttribute(
066                                    OffsetAttribute.class);
067    
068                            while (nGramTokenizer.incrementToken()) {
069                                    String nGram = charTermAttribute.toString();
070    
071                                    int currentNGramSize = charTermAttribute.length();
072    
073                                    if ((currentNGramSize >= nGramMinLength) &&
074                                            (currentNGramSize <= nGramMaxLength)) {
075    
076                                            if (offsetAttribute.startOffset() == 0) {
077                                                    nGramHolder.addNGramStart(currentNGramSize, nGram);
078                                            }
079                                            else if (offsetAttribute.endOffset() == input.length()) {
080                                                    nGramHolder.addNGramEnd(currentNGramSize, nGram);
081                                            }
082                                            else {
083                                                    nGramHolder.addNGram(currentNGramSize, nGram);
084                                            }
085                                    }
086                            }
087    
088                            return nGramHolder;
089                    }
090                    catch (Exception e) {
091                            throw new SearchException(e);
092                    }
093            }
094    
095            protected int getNGramMaxLength(int length) {
096                    if (length > 5) {
097                            return 4;
098                    }
099                    else if (length == 5) {
100                            return 3;
101                    }
102    
103                    return 2;
104            }
105    
106            protected int getNGramMinLength(int length) {
107                    if (length > 5) {
108                            return 3;
109                    }
110                    else if (length == 5) {
111                            return 2;
112                    }
113    
114                    return 1;
115            }
116    
117    }