1
14
15 package com.liferay.portal.util;
16
17 import com.liferay.portal.kernel.util.CharPool;
18 import com.liferay.portal.kernel.util.Html;
19 import com.liferay.portal.kernel.util.HttpUtil;
20 import com.liferay.portal.kernel.util.StringPool;
21 import com.liferay.portal.kernel.util.StringUtil;
22
23 import net.htmlparser.jericho.Source;
24
25
33 public class HtmlImpl implements Html {
34
35 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
36
37 public static final int ESCAPE_MODE_CSS = 2;
38
39 public static final int ESCAPE_MODE_JS = 3;
40
41 public static final int ESCAPE_MODE_TEXT = 4;
42
43 public static final int ESCAPE_MODE_URL = 5;
44
45 public String escape(String text) {
46 if (text == null) {
47 return null;
48 }
49
50 if (text.length() == 0) {
51 return StringPool.BLANK;
52 }
53
54
58 StringBuilder sb = new StringBuilder(text.length());
59
60 for (int i = 0; i < text.length(); i++) {
61 char c = text.charAt(i);
62
63 switch (c) {
64 case '<':
65 sb.append("<");
66
67 break;
68
69 case '>':
70 sb.append(">");
71
72 break;
73
74 case '&':
75 sb.append("&");
76
77 break;
78
79 case '"':
80 sb.append(""");
81
82 break;
83
84 case '\'':
85 sb.append("'");
86
87 break;
88
89 default:
90 sb.append(c);
91
92 break;
93 }
94 }
95
96 return sb.toString();
97 }
98
99 public String escape(String text, int type) {
100 if (text == null) {
101 return null;
102 }
103
104 if (text.length() == 0) {
105 return StringPool.BLANK;
106 }
107
108 String prefix = StringPool.BLANK;
109 String postfix = StringPool.BLANK;
110
111 if (type == ESCAPE_MODE_ATTRIBUTE) {
112 prefix = "&#x";
113 postfix = StringPool.SEMICOLON;
114 }
115 else if (type == ESCAPE_MODE_CSS) {
116 prefix = StringPool.BACK_SLASH;
117 }
118 else if (type == ESCAPE_MODE_JS) {
119 prefix = "\\x";
120 }
121 else if (type == ESCAPE_MODE_URL) {
122 return HttpUtil.encodeURL(text, true);
123 }
124 else {
125 return escape(text);
126 }
127
128 StringBuilder sb = new StringBuilder();
129
130 for (int i = 0; i < text.length(); i++) {
131 char c = text.charAt(i);
132
133 if ((Character.isLetterOrDigit(c)) ||
134 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
135
136 sb.append(c);
137 }
138 else {
139 sb.append(prefix);
140 sb.append(Integer.toHexString(c));
141 sb.append(postfix);
142 }
143 }
144
145 return sb.toString();
146 }
147
148 public String escapeAttribute(String attribute) {
149 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
150 }
151
152 public String escapeCSS(String css) {
153 return escape(css, ESCAPE_MODE_CSS);
154 }
155
156 public String escapeJS(String js) {
157 return escape(js, ESCAPE_MODE_JS);
158 }
159
160 public String escapeURL(String url) {
161 return escape(url, ESCAPE_MODE_URL);
162 }
163
164 public String extractText(String html) {
165 if (html == null) {
166 return null;
167 }
168
169 Source source = new Source(html);
170
171 return source.getTextExtractor().toString();
172 }
173
174 public String fromInputSafe(String text) {
175 return StringUtil.replace(text, "&", "&");
176 }
177
178 public String replaceMsWordCharacters(String text) {
179 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
180 }
181
182 public String stripBetween(String text, String tag) {
183 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
184 }
185
186 public String stripComments(String text) {
187 return StringUtil.stripBetween(text, "<!--", "-->");
188 }
189
190 public String stripHtml(String text) {
191 if (text == null) {
192 return null;
193 }
194
195 text = stripComments(text);
196
197 StringBuilder sb = new StringBuilder(text.length());
198
199 int x = 0;
200 int y = text.indexOf("<");
201
202 while (y != -1) {
203 sb.append(text.substring(x, y));
204 sb.append(StringPool.SPACE);
205
206
208 boolean scriptFound = isScriptTag(text, y + 1);
209
210 if (scriptFound) {
211 int pos = y + _TAG_SCRIPT.length;
212
213
215 pos = text.indexOf(">", pos);
216
217 if (pos >= 0) {
218
219
222 if (text.charAt(pos-1) != '/') {
223
224
226 for (;;) {
227 pos = text.indexOf("</", pos);
228
229 if (pos >= 0) {
230 if (isScriptTag(text, pos + 2)) {
231 y = pos;
232
233 break;
234 }
235 else {
236
237
239 pos += 2;
240 }
241 }
242 else {
243 break;
244 }
245 }
246 }
247 }
248 }
249
250 x = text.indexOf(">", y);
251
252 if (x == -1) {
253 break;
254 }
255
256 x++;
257
258 if (x < y) {
259
260
262 break;
263 }
264
265 y = text.indexOf("<", x);
266 }
267
268 if (y == -1) {
269 sb.append(text.substring(x, text.length()));
270 }
271
272 return sb.toString();
273 }
274
275 public String toInputSafe(String text) {
276 return StringUtil.replace(
277 text,
278 new String[] {"&", "\""},
279 new String[] {"&", """});
280 }
281
282 public String unescape(String text) {
283 if (text == null) {
284 return null;
285 }
286
287 if (text.length() == 0) {
288 return StringPool.BLANK;
289 }
290
291
293 text = StringUtil.replace(text, "<", "<");
294 text = StringUtil.replace(text, ">", ">");
295 text = StringUtil.replace(text, "&", "&");
296 text = StringUtil.replace(text, """, "\"");
297 text = StringUtil.replace(text, "'", "'");
298 text = StringUtil.replace(text, "(", "(");
299 text = StringUtil.replace(text, ")", ")");
300 text = StringUtil.replace(text, "#", "#");
301 text = StringUtil.replace(text, "%", "%");
302 text = StringUtil.replace(text, ";", ";");
303 text = StringUtil.replace(text, "+", "+");
304 text = StringUtil.replace(text, "-", "-");
305
306 return text;
307 }
308
309 protected boolean isScriptTag(String text, int pos) {
310 if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
311 char item;
312
313 for (int i = 0; i < _TAG_SCRIPT.length; i++) {
314 item = text.charAt(pos++);
315
316 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
317 return false;
318 }
319 }
320
321 item = text.charAt(pos);
322
323
325 return !Character.isLetter(item);
326 }
327 else {
328 return false;
329 }
330 }
331
332 private static final String[] _MS_WORD_UNICODE = new String[] {
333 "\u00ae", "\u2019", "\u201c", "\u201d"
334 };
335
336 private static final String[] _MS_WORD_HTML = new String[] {
337 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
338 };
339
340 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
341
342 }