001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.kernel.util;
016    
017    import com.liferay.portal.kernel.log.Log;
018    import com.liferay.portal.kernel.log.LogFactoryUtil;
019    import com.liferay.portal.kernel.nio.charset.CharsetDecoderUtil;
020    import com.liferay.portal.kernel.nio.charset.CharsetEncoderUtil;
021    
022    import java.nio.ByteBuffer;
023    import java.nio.CharBuffer;
024    import java.nio.charset.CharacterCodingException;
025    import java.nio.charset.CharsetDecoder;
026    import java.nio.charset.CharsetEncoder;
027    
028    import java.util.BitSet;
029    
030    /**
031     * @author Shuyang Zhou
032     * @author Brian Wing Shun Chan
033     */
034    public class URLCodec {
035    
036            public static String decodeURL(String encodedURLString) {
037                    return decodeURL(encodedURLString, StringPool.UTF8, false);
038            }
039    
040            public static String decodeURL(
041                    String encodedURLString, boolean unescapeSpaces) {
042    
043                    return decodeURL(encodedURLString, StringPool.UTF8, unescapeSpaces);
044            }
045    
046            public static String decodeURL(
047                    String encodedURLString, String charsetName, boolean unescapeSpaces) {
048    
049                    if (encodedURLString == null) {
050                            return null;
051                    }
052    
053                    if (encodedURLString.length() == 0) {
054                            return StringPool.BLANK;
055                    }
056    
057                    /*if (unescapeSpaces) {
058                            encodedURLString = StringUtil.replace(
059                                    encodedURLString, "%20", StringPool.PLUS);
060                    }*/
061    
062                    StringBuilder sb = null;
063    
064                    CharsetDecoder charsetDecoder = null;
065    
066                    for (int i = 0; i < encodedURLString.length(); i++) {
067                            char c = encodedURLString.charAt(i);
068    
069                            switch (c) {
070                                    case CharPool.PERCENT:
071                                            ByteBuffer byteBuffer = _getEncodedByteBuffer(
072                                                    encodedURLString, i);
073    
074                                            if (charsetDecoder == null) {
075                                                    charsetDecoder = CharsetDecoderUtil.getCharsetDecoder(
076                                                            charsetName);
077                                            }
078    
079                                            CharBuffer charBuffer = null;
080    
081                                            try {
082                                                    charBuffer = charsetDecoder.decode(byteBuffer);
083                                            }
084                                            catch (CharacterCodingException cce) {
085                                                    _log.error(cce, cce);
086    
087                                                    return StringPool.BLANK;
088                                            }
089    
090                                            if (sb == null) {
091                                                    sb = new StringBuilder(encodedURLString.length());
092    
093                                                    if (i > 0) {
094                                                            sb.append(encodedURLString, 0, i);
095                                                    }
096                                            }
097    
098                                            sb.append(charBuffer);
099    
100                                            i += byteBuffer.capacity() * 3 - 1;
101    
102                                            break;
103    
104                                    case CharPool.PLUS:
105                                            if (sb == null) {
106                                                    sb = new StringBuilder(encodedURLString.length());
107    
108                                                    if (i > 0) {
109                                                            sb.append(encodedURLString, 0, i);
110                                                    }
111                                            }
112    
113                                            sb.append(CharPool.SPACE);
114    
115                                            break;
116    
117                                    default:
118                                            if (sb != null) {
119                                                    sb.append(c);
120                                            }
121                            }
122                    }
123    
124                    if (sb == null) {
125                            return encodedURLString;
126                    }
127                    else {
128                            return sb.toString();
129                    }
130            }
131    
132            public static String encodeURL(String rawURLString) {
133                    return encodeURL(rawURLString, StringPool.UTF8, false);
134            }
135    
136            public static String encodeURL(String rawURLString, boolean escapeSpaces) {
137                    return encodeURL(rawURLString, StringPool.UTF8, escapeSpaces);
138            }
139    
140            public static String encodeURL(
141                    String rawURLString, String charsetName, boolean escapeSpaces) {
142    
143                    if (rawURLString == null) {
144                            return null;
145                    }
146    
147                    if (rawURLString.length() == 0) {
148                            return StringPool.BLANK;
149                    }
150    
151                    StringBuilder sb = null;
152    
153                    CharsetEncoder charsetEncoder = null;
154    
155                    char[] hexes = new char[2];
156    
157                    for (int i = 0; i < rawURLString.length(); i++) {
158                            char c = rawURLString.charAt(i);
159    
160                            if (_validChars.get(c)) {
161                                    if (sb != null) {
162                                            sb.append(c);
163                                    }
164    
165                                    continue;
166                            }
167    
168                            if (sb == null) {
169                                    sb = new StringBuilder(rawURLString.length());
170    
171                                    sb.append(rawURLString.substring(0, i));
172                            }
173    
174                            // The cases are ordered by frequency and not alphabetically
175    
176                            switch (c) {
177                                    case CharPool.SLASH :
178                                            sb.append("%2F");
179    
180                                            continue;
181    
182                                    case CharPool.EQUAL :
183                                            sb.append("%3D");
184    
185                                            continue;
186    
187                                    case CharPool.AMPERSAND :
188                                            sb.append("%26");
189    
190                                            continue;
191    
192                                    case CharPool.PERCENT :
193                                            sb.append("%25");
194    
195                                            continue;
196    
197                                    case CharPool.SPACE :
198                                            if (escapeSpaces) {
199                                                    sb.append("%20");
200                                            }
201                                            else {
202                                                    sb.append(CharPool.PLUS);
203                                            }
204    
205                                            continue;
206    
207                                    case CharPool.COLON :
208                                            sb.append("%3A");
209    
210                                            continue;
211    
212                                    case CharPool.QUESTION :
213                                            sb.append("%3F");
214    
215                                            continue;
216                            }
217    
218                            CharBuffer charBuffer = _getRawCharBuffer(
219                                    rawURLString, i, escapeSpaces);
220    
221                            if (charsetEncoder == null) {
222                                    charsetEncoder = CharsetEncoderUtil.getCharsetEncoder(
223                                            charsetName);
224                            }
225    
226                            i += charBuffer.length() - 1;
227    
228                            ByteBuffer byteBuffer = null;
229    
230                            try {
231                                    byteBuffer = charsetEncoder.encode(charBuffer);
232                            }
233                            catch (CharacterCodingException cce) {
234                                    _log.error(cce, cce);
235    
236                                    return StringPool.BLANK;
237                            }
238    
239                            for (int j = byteBuffer.position(); j < byteBuffer.limit(); j++) {
240                                    sb.append(CharPool.PERCENT);
241    
242                                    sb.append(
243                                            UnicodeFormatter.byteToHex(byteBuffer.get(), hexes, true));
244                            }
245                    }
246    
247                    if (sb == null) {
248                            return rawURLString;
249                    }
250                    else {
251                            return sb.toString();
252                    }
253            }
254    
255            private static int _charToHex(char c) {
256                    if ((c >= CharPool.LOWER_CASE_A) && (c <= CharPool.LOWER_CASE_F)) {
257                            return c - CharPool.LOWER_CASE_A + 10;
258                    }
259    
260                    if ((c >= CharPool.UPPER_CASE_A) && (c <= CharPool.UPPER_CASE_F)) {
261                            return c - CharPool.UPPER_CASE_A + 10;
262                    }
263    
264                    if ((c >= CharPool.NUMBER_0) && (c <= CharPool.NUMBER_9)) {
265                            return c - CharPool.NUMBER_0;
266                    }
267    
268                    throw new IllegalArgumentException(c + " is not a hex char");
269            }
270    
271            private static ByteBuffer _getEncodedByteBuffer(
272                    String encodedString, int start) {
273    
274                    int count = 1;
275    
276                    for (int i = start + 3; i < encodedString.length(); i += 3) {
277                            if (encodedString.charAt(i) == CharPool.PERCENT) {
278                                    count++;
279                            }
280                            else {
281                                    break;
282                            }
283                    }
284    
285                    if (encodedString.length() < (start + count * 3)) {
286                            throw new IllegalArgumentException(
287                                    "Invalid URL encoding " + encodedString);
288                    }
289    
290                    ByteBuffer byteBuffer = ByteBuffer.allocate(count);
291    
292                    for (int i = start; i < start + count * 3; i += 3) {
293                            int high = _charToHex(encodedString.charAt(i + 1));
294                            int low = _charToHex(encodedString.charAt(i + 2));
295    
296                            byteBuffer.put((byte)((high << 4) + low));
297                    }
298    
299                    byteBuffer.flip();
300    
301                    return byteBuffer;
302            }
303    
304            private static CharBuffer _getRawCharBuffer(
305                    String rawString, int start, boolean includeSpaces) {
306    
307                    int count = 0;
308    
309                    for (int i = start; i < rawString.length(); i++) {
310                            char rawChar = rawString.charAt(i);
311    
312                            if (!_validChars.get(rawChar) &&
313                                    ((rawChar != CharPool.SPACE) || includeSpaces)) {
314    
315                                    count++;
316    
317                                    if (Character.isHighSurrogate(rawChar)) {
318                                            if (((i + 1) < rawString.length()) &&
319                                                    Character.isLowSurrogate(rawString.charAt(i + 1))) {
320    
321                                                    i++;
322                                                    count++;
323                                            }
324                                    }
325                            }
326                            else {
327                                    break;
328                            }
329                    }
330    
331                    return CharBuffer.wrap(rawString, start, start + count);
332            }
333    
334            private static Log _log = LogFactoryUtil.getLog(URLCodec.class);
335    
336            private static BitSet _validChars = new BitSet(256);
337    
338            static {
339                    for (int i = 'a'; i <= 'z'; i++) {
340                            _validChars.set(i);
341                    }
342    
343                    for (int i = 'A'; i <= 'Z'; i++) {
344                            _validChars.set(i);
345                    }
346    
347                    for (int i = '0'; i <= '9'; i++) {
348                            _validChars.set(i);
349                    }
350    
351                    _validChars.set('-');
352                    _validChars.set('_');
353                    _validChars.set('.');
354                    _validChars.set('*');
355            }
356    
357    }