001    /**
002     * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.kernel.util;
016    
017    import java.util.ArrayList;
018    import java.util.List;
019    import java.util.Map;
020    import java.util.regex.Matcher;
021    import java.util.regex.Pattern;
022    
023    /**
024     * Parses strings into parameter maps and vice versa.
025     *
026     * @author Connor McKay
027     * @author Brian Wing Shun Chan
028     * @see    com.liferay.portal.kernel.portlet.Route
029     * @see    Pattern
030     */
031    public class StringParser {
032    
033            /**
034             * Constructs a new string parser from the pattern.
035             *
036             * <p>
037             * The pattern can be any string containing named fragments in brackets. The
038             * following is a valid pattern for greeting:
039             * </p>
040             *
041             * <pre>
042             * <code>
043             * Hi {name}! How are you?
044             * </code>
045             * </pre>
046             *
047             * <p>
048             * This pattern would match the string &quot;Hi Tom! How are you?&quot;. The
049             * format of a fragment may optionally be specified by inserting a colon
050             * followed by a regular expression after the fragment name. For instance,
051             * <code>name</code> could be set to match only lower case letters with the
052             * following:
053             * </p>
054             *
055             * <pre>
056             * <code>
057             * Hi {name:[a-z]+}! How are you?
058             * </code>
059             * </pre>
060             *
061             * <p>
062             * By default, a fragment will match anything except a forward slash or a
063             * period.
064             * </p>
065             *
066             * <p>
067             * If a string parser is set to encode fragments using a {@link
068             * StringEncoder}, an individual fragment can be specified as raw by
069             * prefixing its name with a percent sign, as shown below:
070             * </p>
071             *
072             * <pre>
073             * <code>
074             * /view_page/{%path:.*}
075             * </code>
076             * </pre>
077             *
078             * <p>
079             * The format of the path fragment has also been specified to match anything
080             * using the pattern &quot;.*&quot;. This pattern could be used to parse the
081             * string:
082             * </p>
083             *
084             * <pre>
085             * <code>
086             * /view_page/root/home/mysite/pages/index.htm
087             * </code>
088             * </pre>
089             *
090             * <p>
091             * <code>path</code> would be set to
092             * &quot;root/home/mysite/pages/index.htm&quot;, even if {@link
093             * URLStringEncoder} had been set as the string encoder.
094             * </p>
095             *
096             * <p>
097             * <b>Do not include capturing subgroups in the pattern.</b>
098             * </p>
099             *
100             *
101             * @param pattern the pattern string
102             */
103            public StringParser(String pattern) {
104                    _builder = pattern;
105    
106                    String regex = escapeRegex(pattern);
107    
108                    Matcher matcher = _fragmentPattern.matcher(pattern);
109    
110                    while (matcher.find()) {
111                            String chunk = matcher.group();
112    
113                            StringParserFragment stringParserFragment =
114                                    new StringParserFragment(chunk);
115    
116                            _stringParserFragments.add(stringParserFragment);
117    
118                            _builder = _builder.replace(chunk, stringParserFragment.getToken());
119    
120                            regex = regex.replace(
121                                    escapeRegex(chunk),
122                                    StringPool.OPEN_PARENTHESIS.concat(
123                                            stringParserFragment.getPattern().concat(
124                                                    StringPool.CLOSE_PARENTHESIS)));
125                    }
126    
127                    _pattern = Pattern.compile(regex);
128            }
129    
130            /**
131             * Builds a string from the parameter map if this parser is appropriate.
132             *
133             * <p>
134             * A parser is appropriate if each parameter matches the format of its
135             * accompanying fragment.
136             * </p>
137             *
138             * <p>
139             * If this parser is appropriate, all the parameters used in the pattern
140             * will be removed from the parameter map. If this parser is not
141             * appropriate, the parameter map will not be modified.
142             * </p>
143             *
144             * @param  parameters the parameter map to build the string from
145             * @return the string, or <code>null</code> if this parser is not
146             *                 appropriate
147             */
148            public String build(Map<String, String> parameters) {
149                    String s = _builder;
150    
151                    for (StringParserFragment stringParserFragment :
152                                    _stringParserFragments) {
153    
154                            String value = parameters.get(stringParserFragment.getName());
155    
156                            if (value == null) {
157                                    return null;
158                            }
159    
160                            if ((_stringEncoder != null) && !stringParserFragment.isRaw()) {
161                                    value = _stringEncoder.encode(value);
162                            }
163    
164                            if (!stringParserFragment.matches(value)) {
165                                    return null;
166                            }
167    
168                            s = s.replace(stringParserFragment.getToken(), value);
169                    }
170    
171                    for (StringParserFragment stringParserFragment :
172                                    _stringParserFragments) {
173    
174                            parameters.remove(stringParserFragment.getName());
175                    }
176    
177                    return s;
178            }
179    
180            /**
181             * Escapes the special characters in the string so that they will have no
182             * special meaning in a regular expression.
183             *
184             * <p>
185             * This method differs from {@link Pattern#quote(String)} by escaping each
186             * special character with a backslash, rather than enclosing the entire
187             * string in special quote tags. This allows the escaped string to be
188             * manipulated or have sections replaced with non-literal sequences.
189             * </p>
190             *
191             * @param  s the string to escape
192             * @return the escaped string
193             */
194            public static String escapeRegex(String s) {
195                    Matcher matcher = _escapeRegexPattern.matcher(s);
196    
197                    return matcher.replaceAll("\\\\$0");
198            }
199    
200            /**
201             * Populates the parameter map with values parsed from the string if this
202             * parser matches.
203             *
204             * @param  s the string to parse
205             * @param  parameters the parameter map to populate if this parser matches
206             *                 the string
207             * @return <code>true</code> if this parser matches; <code>false</code>
208             *                 otherwise
209             */
210            public boolean parse(String s, Map<String, String> parameters) {
211                    Matcher matcher = _pattern.matcher(s);
212    
213                    if (!matcher.matches()) {
214                            return false;
215                    }
216    
217                    for (int i = 1; i <= _stringParserFragments.size(); i++) {
218                            StringParserFragment stringParserFragment =
219                                    _stringParserFragments.get(i - 1);
220    
221                            String value = matcher.group(i);
222    
223                            if ((_stringEncoder != null) && !stringParserFragment.isRaw()) {
224                                    value = _stringEncoder.decode(value);
225                            }
226    
227                            parameters.put(stringParserFragment.getName(), value);
228                    }
229    
230                    return true;
231            }
232    
233            /**
234             * Sets the string encoder to use for parsing or building a string.
235             *
236             * <p>
237             * The string encoder will not be used for fragments marked as raw. A
238             * fragment can be marked as raw by prefixing its name with a percent sign.
239             * </p>
240             *
241             * @param stringEncoder the string encoder to use for parsing or building a
242             *                string
243             * @see   StringEncoder
244             */
245            public void setStringEncoder(StringEncoder stringEncoder) {
246                    _stringEncoder = stringEncoder;
247            }
248    
249            private static Pattern _escapeRegexPattern = Pattern.compile(
250                    "[\\{\\}\\(\\)\\[\\]\\*\\+\\?\\$\\^\\.\\#\\\\]");
251            private static Pattern _fragmentPattern = Pattern.compile("\\{.+?\\}");
252    
253            private String _builder;
254            private StringEncoder _stringEncoder;
255            private List<StringParserFragment> _stringParserFragments =
256                    new ArrayList<StringParserFragment>();
257            private Pattern _pattern;
258    
259    }