001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.kernel.util;
016    
017    /**
018     * <p>
019     * See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm.
020     * </p>
021     *
022     * @author Shuyang Zhou
023     */
024    public class KMPSearch {
025    
026            public static int[] generateNexts(byte[] pattern) {
027                    int length = pattern.length;
028    
029                    int[] nexts = new int[length];
030    
031                    nexts[0] = -1;
032    
033                    int i = 0;
034                    int j = -1;
035    
036                    while (i < (length - 1)) {
037                            if ((j == -1) || (pattern[i] == pattern[j])) {
038                                    i++;
039                                    j++;
040    
041                                    nexts[i] = j;
042                            }
043                            else {
044                                    j = nexts[j];
045                            }
046                    }
047    
048                    return nexts;
049            }
050    
051            public static int[] generateNexts(char[] pattern) {
052                    int length = pattern.length;
053    
054                    int[] nexts = new int[length];
055    
056                    nexts[0] = -1;
057    
058                    int i = 0;
059                    int j = -1;
060    
061                    while (i < (length - 1)) {
062                            if ((j == -1) || (pattern[i] == pattern[j])) {
063                                    i++;
064                                    j++;
065    
066                                    nexts[i] = j;
067                            }
068                            else {
069                                    j = nexts[j];
070                            }
071                    }
072    
073                    return nexts;
074            }
075    
076            public static int[] generateNexts(CharSequence pattern) {
077                    int length = pattern.length();
078    
079                    int[] nexts = new int[length];
080    
081                    nexts[0] = -1;
082    
083                    int i = 0;
084                    int j = -1;
085    
086                    while (i < (length - 1)) {
087                            if ((j == -1) || (pattern.charAt(i) == pattern.charAt(j))) {
088                                    i++;
089                                    j++;
090    
091                                    nexts[i] = j;
092                            }
093                            else {
094                                    j = nexts[j];
095                            }
096                    }
097    
098                    return nexts;
099            }
100    
101            public static int search(byte[] text, byte[] pattern) {
102                    int[] nexts = generateNexts(pattern);
103    
104                    return search(text, 0, text.length, pattern, nexts);
105            }
106    
107            public static int search(byte[] text, byte[] pattern, int[] nexts) {
108                    return search(text, 0, text.length, pattern, nexts);
109            }
110    
111            public static int search(
112                    byte[] text, int offset, byte[] pattern, int[] nexts) {
113    
114                    return search(text, offset, text.length - offset, pattern, nexts);
115            }
116    
117            public static int search(
118                    byte[] text, int offset, int length, byte[] pattern, int[] nexts) {
119    
120                    int patternLength = pattern.length;
121    
122                    int i = 0;
123                    int j = 0;
124    
125                    while ((i < length) && (j < patternLength)) {
126                            if ((j == -1) || (text[i + offset] == pattern[j])) {
127                                    i++;
128                                    j++;
129                            }
130                            else {
131                                    j = nexts[j];
132                            }
133                    }
134    
135                    if (j >= patternLength) {
136                            return i - patternLength + offset;
137                    }
138                    else {
139                            return -1;
140                    }
141            }
142    
143            public static int search(char[] text, char[] pattern) {
144                    int[] nexts = generateNexts(pattern);
145    
146                    return search(text, 0, text.length, pattern, nexts);
147            }
148    
149            public static int search(char[] text, char[] pattern, int[] nexts) {
150                    return search(text, 0, text.length, pattern, nexts);
151            }
152    
153            public static int search(
154                    char[] text, int offset, char[] pattern, int[] nexts) {
155    
156                    return search(text, offset, text.length - offset, pattern, nexts);
157            }
158    
159            public static int search(
160                    char[] text, int offset, int length, char[] pattern, int[] nexts) {
161    
162                    int patternLength = pattern.length;
163    
164                    int i = 0;
165                    int j = 0;
166    
167                    while ((i < length) && (j < patternLength)) {
168                            if ((j == -1) || (text[i + offset] == pattern[j])) {
169                                    i++;
170                                    j++;
171                            }
172                            else {
173                                    j = nexts[j];
174                            }
175                    }
176    
177                    if (j >= patternLength) {
178                            return i - patternLength + offset;
179                    }
180                    else {
181                            return -1;
182                    }
183            }
184    
185            public static int search(CharSequence text, CharSequence pattern) {
186                    int[] nexts = generateNexts(pattern);
187    
188                    return search(text, 0, text.length(), pattern, nexts);
189            }
190    
191            public static int search(
192                    CharSequence text, CharSequence pattern, int[] nexts) {
193    
194                    return search(text, 0, text.length(), pattern, nexts);
195            }
196    
197            public static int search(
198                    CharSequence text, int offset, CharSequence pattern, int[] nexts) {
199    
200                    return search(text, offset, text.length() - offset, pattern, nexts);
201            }
202    
203            public static int search(
204                    CharSequence text, int offset, int length, CharSequence pattern,
205                    int[] nexts) {
206    
207                    int patternLength = pattern.length();
208    
209                    int i = 0;
210                    int j = 0;
211    
212                    while ((i < length) && (j < patternLength)) {
213                            if (j == -1) {
214                                    i++;
215                                    j++;
216                            }
217                            else {
218                                    char c1 = text.charAt(i + offset);
219                                    char c2 = pattern.charAt(j);
220    
221                                    if ((c1 == c2) || (c1 == Character.toUpperCase(c2))) {
222                                            i++;
223                                            j++;
224                                    }
225                                    else {
226                                            j = nexts[j];
227                                    }
228                            }
229                    }
230    
231                    if (j >= patternLength) {
232                            return i - patternLength + offset;
233                    }
234                    else {
235                            return -1;
236                    }
237            }
238    
239    }