001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.util.CharPool;
018 import com.liferay.portal.kernel.util.Html;
019 import com.liferay.portal.kernel.util.HttpUtil;
020 import com.liferay.portal.kernel.util.StringBundler;
021 import com.liferay.portal.kernel.util.StringPool;
022 import com.liferay.portal.kernel.util.StringUtil;
023
024 import java.util.regex.Matcher;
025 import java.util.regex.Pattern;
026
027 import net.htmlparser.jericho.Source;
028
029
036 public class HtmlImpl implements Html {
037
038 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
039
040 public static final int ESCAPE_MODE_CSS = 2;
041
042 public static final int ESCAPE_MODE_JS = 3;
043
044 public static final int ESCAPE_MODE_TEXT = 4;
045
046 public static final int ESCAPE_MODE_URL = 5;
047
048 public String escape(String text) {
049 if (text == null) {
050 return null;
051 }
052
053 if (text.length() == 0) {
054 return StringPool.BLANK;
055 }
056
057
058
059
060
061 StringBuilder sb = new StringBuilder(text.length());
062
063 for (int i = 0; i < text.length(); i++) {
064 char c = text.charAt(i);
065
066 switch (c) {
067 case '<':
068 sb.append("<");
069
070 break;
071
072 case '>':
073 sb.append(">");
074
075 break;
076
077 case '&':
078 sb.append("&");
079
080 break;
081
082 case '"':
083 sb.append(""");
084
085 break;
086
087 case '\'':
088 sb.append("'");
089
090 break;
091
092 case '»':
093 sb.append("»");
094
095 break;
096
097 default:
098 sb.append(c);
099
100 break;
101 }
102 }
103
104 return sb.toString();
105 }
106
107 public String escape(String text, int type) {
108 if (text == null) {
109 return null;
110 }
111
112 if (text.length() == 0) {
113 return StringPool.BLANK;
114 }
115
116 String prefix = StringPool.BLANK;
117 String postfix = StringPool.BLANK;
118
119 if (type == ESCAPE_MODE_ATTRIBUTE) {
120 prefix = "&#x";
121 postfix = StringPool.SEMICOLON;
122 }
123 else if (type == ESCAPE_MODE_CSS) {
124 prefix = StringPool.BACK_SLASH;
125 }
126 else if (type == ESCAPE_MODE_JS) {
127 prefix = "\\x";
128 }
129 else if (type == ESCAPE_MODE_URL) {
130 return HttpUtil.encodeURL(text, true);
131 }
132 else {
133 return escape(text);
134 }
135
136 StringBuilder sb = new StringBuilder();
137
138 for (int i = 0; i < text.length(); i++) {
139 char c = text.charAt(i);
140
141 if ((Character.isLetterOrDigit(c)) ||
142 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
143
144 sb.append(c);
145 }
146 else {
147 sb.append(prefix);
148 sb.append(Integer.toHexString(c));
149 sb.append(postfix);
150 }
151 }
152
153 return sb.toString();
154 }
155
156 public String escapeAttribute(String attribute) {
157 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
158 }
159
160 public String escapeCSS(String css) {
161 return escape(css, ESCAPE_MODE_CSS);
162 }
163
164 public String escapeHREF(String href) {
165 if (href == null) {
166 return null;
167 }
168
169 if (href.length() == 0) {
170 return StringPool.BLANK;
171 }
172
173 if (href.indexOf(StringPool.COLON) == 10) {
174 String protocol = href.substring(0, 10).toLowerCase();
175
176 if (protocol.equals("javascript")) {
177 return StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
178 }
179 }
180
181 return href;
182 }
183
184 public String escapeJS(String js) {
185 return escape(js, ESCAPE_MODE_JS);
186 }
187
188 public String escapeURL(String url) {
189 return escape(url, ESCAPE_MODE_URL);
190 }
191
192 public String extractText(String html) {
193 if (html == null) {
194 return null;
195 }
196
197 Source source = new Source(html);
198
199 return source.getTextExtractor().toString();
200 }
201
202 public String fromInputSafe(String text) {
203 return StringUtil.replace(text, "&", "&");
204 }
205
206 public String replaceMsWordCharacters(String text) {
207 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
208 }
209
210 public String stripBetween(String text, String tag) {
211 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
212 }
213
214 public String stripComments(String text) {
215 return StringUtil.stripBetween(text, "<!--", "-->");
216 }
217
218 public String stripHtml(String text) {
219 if (text == null) {
220 return null;
221 }
222
223 text = stripComments(text);
224
225 StringBuilder sb = new StringBuilder(text.length());
226
227 int x = 0;
228 int y = text.indexOf("<");
229
230 while (y != -1) {
231 sb.append(text.substring(x, y));
232 sb.append(StringPool.SPACE);
233
234
235
236 boolean scriptFound = isScriptTag(text, y + 1);
237
238 if (scriptFound) {
239 int pos = y + _TAG_SCRIPT.length;
240
241
242
243 pos = text.indexOf(">", pos);
244
245 if (pos >= 0) {
246
247
248
249
250 if (text.charAt(pos-1) != '/') {
251
252
253
254 for (;;) {
255 pos = text.indexOf("</", pos);
256
257 if (pos >= 0) {
258 if (isScriptTag(text, pos + 2)) {
259 y = pos;
260
261 break;
262 }
263 else {
264
265
266
267 pos += 2;
268 }
269 }
270 else {
271 break;
272 }
273 }
274 }
275 }
276 }
277
278 x = text.indexOf(">", y);
279
280 if (x == -1) {
281 break;
282 }
283
284 x++;
285
286 if (x < y) {
287
288
289
290 break;
291 }
292
293 y = text.indexOf("<", x);
294 }
295
296 if (y == -1) {
297 sb.append(text.substring(x, text.length()));
298 }
299
300 return sb.toString();
301 }
302
303 public String toInputSafe(String text) {
304 return StringUtil.replace(
305 text,
306 new String[] {"&", "\""},
307 new String[] {"&", """});
308 }
309
310 public String unescape(String text) {
311 if (text == null) {
312 return null;
313 }
314
315 if (text.length() == 0) {
316 return StringPool.BLANK;
317 }
318
319
320
321 text = StringUtil.replace(text, "<", "<");
322 text = StringUtil.replace(text, ">", ">");
323 text = StringUtil.replace(text, "&", "&");
324 text = StringUtil.replace(text, """, "\"");
325 text = StringUtil.replace(text, "'", "'");
326 text = StringUtil.replace(text, "(", "(");
327 text = StringUtil.replace(text, ")", ")");
328 text = StringUtil.replace(text, ",", ",");
329 text = StringUtil.replace(text, "#", "#");
330 text = StringUtil.replace(text, "%", "%");
331 text = StringUtil.replace(text, ";", ";");
332 text = StringUtil.replace(text, "=", "=");
333 text = StringUtil.replace(text, "+", "+");
334 text = StringUtil.replace(text, "-", "-");
335
336 return text;
337 }
338
339 public String wordBreak(String text, int columns) {
340 StringBundler sb = new StringBundler();
341
342 int length = 0;
343 int lastWrite = 0;
344 int pos = 0;
345
346 Pattern pattern = Pattern.compile("([\\s<&]|$)");
347
348 Matcher matcher = pattern.matcher(text);
349
350 while (matcher.find()) {
351 if (matcher.start() < pos) {
352 continue;
353 }
354
355 while ((length + matcher.start() - pos) >= columns) {
356 pos += columns - length;
357
358 sb.append(text.substring(lastWrite, pos));
359 sb.append("<wbr/>");
360
361 length = 0;
362 lastWrite = pos;
363 }
364
365 length += matcher.start() - pos;
366
367 String group = matcher.group();
368
369 if (group.equals(StringPool.AMPERSAND)) {
370 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
371
372 if (x != -1) {
373 length++;
374 pos = x + 1;
375 }
376
377 continue;
378 }
379
380 if (group.equals(StringPool.LESS_THAN)) {
381 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
382
383 if (x != -1) {
384 pos = x + 1;
385 }
386
387 continue;
388 }
389
390 if (group.equals(StringPool.SPACE) ||
391 group.equals(StringPool.NEW_LINE)) {
392
393 length = 0;
394 pos = matcher.start() + 1;
395 }
396 }
397
398 sb.append(text.substring(lastWrite));
399
400 return sb.toString();
401 }
402
403 protected boolean isScriptTag(String text, int pos) {
404 if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
405 char item;
406
407 for (int i = 0; i < _TAG_SCRIPT.length; i++) {
408 item = text.charAt(pos++);
409
410 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
411 return false;
412 }
413 }
414
415 item = text.charAt(pos);
416
417
418
419 return !Character.isLetter(item);
420 }
421 else {
422 return false;
423 }
424 }
425
426 private static final String[] _MS_WORD_UNICODE = new String[] {
427 "\u00ae", "\u2019", "\u201c", "\u201d"
428 };
429
430 private static final String[] _MS_WORD_HTML = new String[] {
431 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
432 };
433
434 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
435
436 }