001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.kernel.util;
016    
017    import com.liferay.portal.kernel.log.Log;
018    import com.liferay.portal.kernel.log.LogFactoryUtil;
019    import com.liferay.portal.kernel.nio.charset.CharsetDecoderUtil;
020    import com.liferay.portal.kernel.nio.charset.CharsetEncoderUtil;
021    
022    import java.nio.ByteBuffer;
023    import java.nio.CharBuffer;
024    import java.nio.charset.CharacterCodingException;
025    import java.nio.charset.CharsetDecoder;
026    import java.nio.charset.CharsetEncoder;
027    
028    import java.util.BitSet;
029    
030    /**
031     * @author Shuyang Zhou
032     * @author Brian Wing Shun Chan
033     */
034    public class URLCodec {
035    
036            public static String decodeURL(String encodedURLString) {
037                    return decodeURL(encodedURLString, StringPool.UTF8, false);
038            }
039    
040            public static String decodeURL(
041                    String encodedURLString, boolean unescapeSpaces) {
042    
043                    return decodeURL(encodedURLString, StringPool.UTF8, unescapeSpaces);
044            }
045    
046            public static String decodeURL(
047                    String encodedURLString, String charsetName, boolean unescapeSpaces) {
048    
049                    if (encodedURLString == null) {
050                            return null;
051                    }
052    
053                    if (encodedURLString.length() == 0) {
054                            return StringPool.BLANK;
055                    }
056    
057                    /*if (unescapeSpaces) {
058                            encodedURLString = StringUtil.replace(
059                                    encodedURLString, "%20", StringPool.PLUS);
060                    }*/
061    
062                    StringBuilder sb = null;
063    
064                    CharsetDecoder charsetDecoder = null;
065    
066                    for (int i = 0; i < encodedURLString.length(); i++) {
067                            char c = encodedURLString.charAt(i);
068    
069                            switch (c) {
070                                    case CharPool.PERCENT:
071                                            ByteBuffer byteBuffer = _getEncodedByteBuffer(
072                                                    encodedURLString, i);
073    
074                                            if (charsetDecoder == null) {
075                                                    charsetDecoder = CharsetDecoderUtil.getCharsetDecoder(
076                                                            charsetName);
077                                            }
078    
079                                            CharBuffer charBuffer = null;
080    
081                                            try {
082                                                    charBuffer = charsetDecoder.decode(byteBuffer);
083                                            }
084                                            catch (CharacterCodingException cce) {
085                                                    _log.error(cce, cce);
086    
087                                                    return StringPool.BLANK;
088                                            }
089    
090                                            if (sb == null) {
091                                                    sb = new StringBuilder(encodedURLString.length());
092    
093                                                    if (i > 0) {
094                                                            sb.append(encodedURLString, 0, i);
095                                                    }
096                                            }
097    
098                                            sb.append(charBuffer);
099    
100                                            i += byteBuffer.capacity() * 3 - 1;
101    
102                                            break;
103    
104                                    case CharPool.PLUS:
105                                            if (sb == null) {
106                                                    sb = new StringBuilder(encodedURLString.length());
107    
108                                                    if (i > 0) {
109                                                            sb.append(encodedURLString, 0, i);
110                                                    }
111                                            }
112    
113                                            sb.append(CharPool.SPACE);
114    
115                                            break;
116    
117                                    default:
118                                            if (sb != null) {
119                                                    sb.append(c);
120                                            }
121                            }
122                    }
123    
124                    if (sb == null) {
125                            return encodedURLString;
126                    }
127                    else {
128                            return sb.toString();
129                    }
130            }
131    
132            public static String encodeURL(String rawURLString) {
133                    return encodeURL(rawURLString, StringPool.UTF8, false);
134            }
135    
136            public static String encodeURL(String rawURLString, boolean escapeSpaces) {
137                    return encodeURL(rawURLString, StringPool.UTF8, escapeSpaces);
138            }
139    
140            public static String encodeURL(
141                    String rawURLString, String charsetName, boolean escapeSpaces) {
142    
143                    if (rawURLString == null) {
144                            return null;
145                    }
146    
147                    if (rawURLString.length() == 0) {
148                            return StringPool.BLANK;
149                    }
150    
151                    StringBuilder sb = null;
152    
153                    CharsetEncoder charsetEncoder = null;
154    
155                    char[] hexes = new char[2];
156    
157                    for (int i = 0; i < rawURLString.length(); i++) {
158                            char c = rawURLString.charAt(i);
159    
160                            if (_validChars.get(c)) {
161                                    if (sb != null) {
162                                            sb.append(c);
163                                    }
164    
165                                    continue;
166                            }
167    
168                            if (sb == null) {
169                                    sb = new StringBuilder(rawURLString.length());
170    
171                                    sb.append(rawURLString.substring(0, i));
172                            }
173    
174                            // The cases are ordered by frequency and not alphabetically
175    
176                            switch (c) {
177                                    case CharPool.SLASH :
178                                            sb.append("%2F");
179    
180                                            continue;
181    
182                                    case CharPool.EQUAL :
183                                            sb.append("%3D");
184    
185                                            continue;
186    
187                                    case CharPool.AMPERSAND :
188                                            sb.append("%26");
189    
190                                            continue;
191    
192                                    case CharPool.PERCENT :
193                                            sb.append("%25");
194    
195                                            continue;
196    
197                                    case CharPool.SPACE :
198                                            if (escapeSpaces) {
199                                                    sb.append("%20");
200                                            }
201                                            else {
202                                                    sb.append(CharPool.PLUS);
203                                            }
204    
205                                            continue;
206    
207                                    case CharPool.COLON :
208                                            sb.append("%3A");
209    
210                                            continue;
211    
212                                    case CharPool.QUESTION :
213                                            sb.append("%3F");
214    
215                                            continue;
216                            }
217    
218                            CharBuffer charBuffer = _getRawCharBuffer(rawURLString, i);
219    
220                            if (charsetEncoder == null) {
221                                    charsetEncoder = CharsetEncoderUtil.getCharsetEncoder(
222                                            charsetName);
223                            }
224    
225                            i += charBuffer.length() - 1;
226    
227                            ByteBuffer byteBuffer = null;
228    
229                            try {
230                                    byteBuffer = charsetEncoder.encode(charBuffer);
231                            }
232                            catch (CharacterCodingException cce) {
233                                    _log.error(cce, cce);
234    
235                                    return StringPool.BLANK;
236                            }
237    
238                            for (int j = byteBuffer.position(); j < byteBuffer.limit(); j++) {
239                                    sb.append(CharPool.PERCENT);
240    
241                                    sb.append(
242                                            UnicodeFormatter.byteToHex(byteBuffer.get(), hexes, true));
243                            }
244                    }
245    
246                    if (sb == null) {
247                            return rawURLString;
248                    }
249                    else {
250                            return sb.toString();
251                    }
252            }
253    
254            private static int _charToHex(char c) {
255                    if ((c >= CharPool.LOWER_CASE_A) && (c <= CharPool.LOWER_CASE_Z)) {
256                            return c - CharPool.LOWER_CASE_A + 10;
257                    }
258    
259                    if ((c >= CharPool.UPPER_CASE_A) && (c <= CharPool.UPPER_CASE_Z)) {
260                            return c - CharPool.UPPER_CASE_A + 10;
261                    }
262    
263                    if ((c >= CharPool.NUMBER_0) && (c <= CharPool.NUMBER_9)) {
264                            return c - CharPool.NUMBER_0;
265                    }
266    
267                    throw new IllegalArgumentException(c + " is not a hex char");
268            }
269    
270            private static ByteBuffer _getEncodedByteBuffer(
271                    String encodedString, int start) {
272    
273                    int count = 1;
274    
275                    for (int i = start + 3; i < encodedString.length(); i += 3) {
276                            if (encodedString.charAt(i) == CharPool.PERCENT) {
277                                    count++;
278                            }
279                            else {
280                                    break;
281                            }
282                    }
283    
284                    ByteBuffer byteBuffer = ByteBuffer.allocate(count);
285    
286                    for (int i = start; i < start + count * 3; i += 3) {
287                            int high = _charToHex(encodedString.charAt(i + 1));
288                            int low = _charToHex(encodedString.charAt(i + 2));
289    
290                            byteBuffer.put((byte)((high << 4) + low));
291                    }
292    
293                    byteBuffer.flip();
294    
295                    return byteBuffer;
296            }
297    
298            private static CharBuffer _getRawCharBuffer(String rawString, int start) {
299                    int count = 0;
300    
301                    for (int i = start; i < rawString.length(); i++) {
302                            char rawChar = rawString.charAt(i);
303    
304                            if (!_validChars.get(rawChar)) {
305                                    count++;
306    
307                                    if (Character.isHighSurrogate(rawChar)) {
308                                            if (((i + 1) < rawString.length()) &&
309                                                    Character.isLowSurrogate(rawString.charAt(i + 1))) {
310    
311                                                    count++;
312                                            }
313                                    }
314                            }
315                            else {
316                                    break;
317                            }
318                    }
319    
320                    return CharBuffer.wrap(rawString, start, start + count);
321            }
322    
323            private static Log _log = LogFactoryUtil.getLog(URLCodec.class);
324    
325            private static BitSet _validChars = new BitSet(256);
326    
327            static {
328                    for (int i = 'a'; i <= 'z'; i++) {
329                            _validChars.set(i);
330                    }
331    
332                    for (int i = 'A'; i <= 'Z'; i++) {
333                            _validChars.set(i);
334                    }
335    
336                    for (int i = '0'; i <= '9'; i++) {
337                            _validChars.set(i);
338                    }
339    
340                    _validChars.set('-');
341                    _validChars.set('_');
342                    _validChars.set('.');
343                    _validChars.set('*');
344            }
345    
346    }