1
19
20 package com.liferay.portal.util;
21
22 import au.id.jericho.lib.html.Source;
23
24 import com.liferay.portal.kernel.util.Html;
25 import com.liferay.portal.kernel.util.StringPool;
26 import com.liferay.portal.kernel.util.StringUtil;
27
28
36 public class HtmlImpl implements Html {
37
38 public String escape(String text) {
39 if (text == null) {
40 return null;
41 }
42
43
47 StringBuilder sb = new StringBuilder(text.length());
48
49 for (int i = 0; i < text.length(); i++) {
50 char c = text.charAt(i);
51
52 switch (c) {
53 case '<':
54 sb.append("<");
55
56 break;
57
58 case '>':
59 sb.append(">");
60
61 break;
62
63 case '&':
64 sb.append("&");
65
66 break;
67
68 case '"':
69 sb.append(""");
70
71 break;
72
73 case '\'':
74 sb.append("'");
75
76 break;
77
78 case '(':
79 sb.append("(");
80
81 break;
82
83 case ')':
84 sb.append(")");
85
86 break;
87
88 case '#':
89 sb.append("#");
90
91 break;
92
93 case '%':
94 sb.append("%");
95
96 break;
97
98 case ';':
99 sb.append(";");
100
101 break;
102
103 case '+':
104 sb.append("+");
105
106 break;
107
108 case '-':
109 sb.append("-");
110
111 break;
112
113 default:
114 sb.append(c);
115
116 break;
117 }
118 }
119
120 return sb.toString();
121 }
122
123 public String extractText(String html) {
124 if (html == null) {
125 return null;
126 }
127
128 Source source = new Source(html);
129
130 return source.getTextExtractor().toString();
131 }
132
133 public String fromInputSafe(String text) {
134 return StringUtil.replace(text, "&", "&");
135 }
136
137 public String replaceMsWordCharacters(String text) {
138 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
139 }
140
141 public String stripBetween(String text, String tag) {
142 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
143 }
144
145 public String stripComments(String text) {
146 return StringUtil.stripBetween(text, "<!--", "-->");
147 }
148
149 public String stripHtml(String text) {
150 if (text == null) {
151 return null;
152 }
153
154 text = stripComments(text);
155
156 StringBuilder sb = new StringBuilder(text.length());
157
158 int x = 0;
159 int y = text.indexOf("<");
160
161 while (y != -1) {
162 sb.append(text.substring(x, y));
163 sb.append(StringPool.SPACE);
164
165
167 boolean scriptFound = isScriptTag(text, y + 1);
168
169 if (scriptFound) {
170 int pos = y + _TAG_SCRIPT.length;
171
172
174 pos = text.indexOf(">", pos);
175
176 if (pos >= 0) {
177
178
181 if (text.charAt(pos-1) != '/') {
182
183
185 for (;;) {
186 pos = text.indexOf("</", pos);
187
188 if (pos >= 0) {
189 if (isScriptTag(text, pos + 2)) {
190 y = pos;
191
192 break;
193 }
194 else {
195
196
198 pos += 2;
199 }
200 }
201 else {
202 break;
203 }
204 }
205 }
206 }
207 }
208
209 x = text.indexOf(">", y);
210
211 if (x == -1) {
212 break;
213 }
214
215 x++;
216
217 if (x < y) {
218
219
221 break;
222 }
223
224 y = text.indexOf("<", x);
225 }
226
227 if (y == -1) {
228 sb.append(text.substring(x, text.length()));
229 }
230
231 return sb.toString();
232 }
233
234 public String toInputSafe(String text) {
235 return StringUtil.replace(
236 text,
237 new String[] {"&", "\""},
238 new String[] {"&", """});
239 }
240
241 public String unescape(String text) {
242 if (text == null) {
243 return null;
244 }
245
246
248 text = StringUtil.replace(text, "<", "<");
249 text = StringUtil.replace(text, ">", ">");
250 text = StringUtil.replace(text, "&", "&");
251 text = StringUtil.replace(text, """, "\"");
252 text = StringUtil.replace(text, "'", "'");
253 text = StringUtil.replace(text, "(", "(");
254 text = StringUtil.replace(text, ")", ")");
255 text = StringUtil.replace(text, "#", "#");
256 text = StringUtil.replace(text, "%", "%");
257 text = StringUtil.replace(text, ";", ";");
258 text = StringUtil.replace(text, "+", "+");
259 text = StringUtil.replace(text, "-", "-");
260
261 return text;
262 }
263
264 protected boolean isScriptTag(String text, int pos) {
265 if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
266 char item;
267
268 for (int i = 0; i < _TAG_SCRIPT.length; i++) {
269 item = text.charAt(pos++);
270
271 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
272 return false;
273 }
274 }
275
276 item = text.charAt(pos);
277
278
280 return !Character.isLetter(item);
281 }
282 else {
283 return false;
284 }
285 }
286
287 private static final String[] _MS_WORD_UNICODE = new String[] {
288 "\u00ae", "\u2019", "\u201c", "\u201d"
289 };
290
291 private static final String[] _MS_WORD_HTML = new String[] {
292 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
293 };
294
295 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
296
297 }