1
22
23 package com.liferay.portal.util;
24
25 import au.id.jericho.lib.html.Source;
26
27 import com.liferay.portal.kernel.util.Html;
28 import com.liferay.portal.kernel.util.StringPool;
29 import com.liferay.portal.kernel.util.StringUtil;
30
31
39 public class HtmlImpl implements Html {
40
41 public String escape(String text) {
42 if (text == null) {
43 return null;
44 }
45
46
50 StringBuilder sb = new StringBuilder(text.length());
51
52 for (int i = 0; i < text.length(); i++) {
53 char c = text.charAt(i);
54
55 switch (c) {
56 case '<':
57 sb.append("<");
58
59 break;
60
61 case '>':
62 sb.append(">");
63
64 break;
65
66 case '&':
67 sb.append("&");
68
69 break;
70
71 case '"':
72 sb.append(""");
73
74 break;
75
76 case '\'':
77 sb.append("'");
78
79 break;
80
81 case '(':
82 sb.append("(");
83
84 break;
85
86 case ')':
87 sb.append(")");
88
89 break;
90
91 case '#':
92 sb.append("#");
93
94 break;
95
96 case '%':
97 sb.append("%");
98
99 break;
100
101 case ';':
102 sb.append(";");
103
104 break;
105
106 case '+':
107 sb.append("+");
108
109 break;
110
111 case '-':
112 sb.append("-");
113
114 break;
115
116 default:
117 sb.append(c);
118
119 break;
120 }
121 }
122
123 return sb.toString();
124 }
125
126 public String extractText(String html) {
127 if (html == null) {
128 return null;
129 }
130
131 Source source = new Source(html);
132
133 return source.getTextExtractor().toString();
134 }
135
136 public String fromInputSafe(String text) {
137 return StringUtil.replace(text, "&", "&");
138 }
139
140 public String replaceMsWordCharacters(String text) {
141 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
142 }
143
144 public String stripBetween(String text, String tag) {
145 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
146 }
147
148 public String stripComments(String text) {
149 return StringUtil.stripBetween(text, "<!--", "-->");
150 }
151
152 public String stripHtml(String text) {
153 if (text == null) {
154 return null;
155 }
156
157 text = stripComments(text);
158
159 StringBuilder sb = new StringBuilder(text.length());
160
161 int x = 0;
162 int y = text.indexOf("<");
163
164 while (y != -1) {
165 sb.append(text.substring(x, y));
166 sb.append(StringPool.SPACE);
167
168
170 boolean scriptFound = isScriptTag(text, y + 1);
171
172 if (scriptFound) {
173 int pos = y + _TAG_SCRIPT.length;
174
175
177 pos = text.indexOf(">", pos);
178
179 if (pos >= 0) {
180
181
184 if (text.charAt(pos-1) != '/') {
185
186
188 for (;;) {
189 pos = text.indexOf("</", pos);
190
191 if (pos >= 0) {
192 if (isScriptTag(text, pos + 2)) {
193 y = pos;
194
195 break;
196 }
197 else {
198
199
201 pos += 2;
202 }
203 }
204 else {
205 break;
206 }
207 }
208 }
209 }
210 }
211
212 x = text.indexOf(">", y);
213
214 if (x == -1) {
215 break;
216 }
217
218 x++;
219
220 if (x < y) {
221
222
224 break;
225 }
226
227 y = text.indexOf("<", x);
228 }
229
230 if (y == -1) {
231 sb.append(text.substring(x, text.length()));
232 }
233
234 return sb.toString();
235 }
236
237 public String toInputSafe(String text) {
238 return StringUtil.replace(
239 text,
240 new String[] {"&", "\""},
241 new String[] {"&", """});
242 }
243
244 public String unescape(String text) {
245 if (text == null) {
246 return null;
247 }
248
249
251 text = StringUtil.replace(text, "<", "<");
252 text = StringUtil.replace(text, ">", ">");
253 text = StringUtil.replace(text, "&", "&");
254 text = StringUtil.replace(text, """, "\"");
255 text = StringUtil.replace(text, "'", "'");
256 text = StringUtil.replace(text, "(", "(");
257 text = StringUtil.replace(text, ")", ")");
258 text = StringUtil.replace(text, "#", "#");
259 text = StringUtil.replace(text, "%", "%");
260 text = StringUtil.replace(text, ";", ";");
261 text = StringUtil.replace(text, "+", "+");
262 text = StringUtil.replace(text, "-", "-");
263
264 return text;
265 }
266
267 protected boolean isScriptTag(String text, int pos) {
268 if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
269 char item;
270
271 for (int i = 0; i < _TAG_SCRIPT.length; i++) {
272 item = text.charAt(pos++);
273
274 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
275 return false;
276 }
277 }
278
279 item = text.charAt(pos);
280
281
283 return !Character.isLetter(item);
284 }
285 else {
286 return false;
287 }
288 }
289
290 private static final String[] _MS_WORD_UNICODE = new String[] {
291 "\u00ae", "\u2019", "\u201c", "\u201d"
292 };
293
294 private static final String[] _MS_WORD_HTML = new String[] {
295 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
296 };
297
298 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
299
300 }