1   /**
2    * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
3    *
4    * This library is free software; you can redistribute it and/or modify it under
5    * the terms of the GNU Lesser General Public License as published by the Free
6    * Software Foundation; either version 2.1 of the License, or (at your option)
7    * any later version.
8    *
9    * This library is distributed in the hope that it will be useful, but WITHOUT
10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11   * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12   * details.
13   */
14  
15  package com.liferay.portal.util;
16  
17  import com.liferay.portal.kernel.util.CharPool;
18  import com.liferay.portal.kernel.util.Html;
19  import com.liferay.portal.kernel.util.HttpUtil;
20  import com.liferay.portal.kernel.util.StringPool;
21  import com.liferay.portal.kernel.util.StringUtil;
22  
23  import net.htmlparser.jericho.Source;
24  
25  /**
26   * <a href="HtmlImpl.java.html"><b><i>View Source</i></b></a>
27   *
28   * @author Brian Wing Shun Chan
29   * @author Clarence Shen
30   * @author Harry Mark
31   * @author Samuel Kong
32   */
33  public class HtmlImpl implements Html {
34  
35      public static final int ESCAPE_MODE_ATTRIBUTE = 1;
36  
37      public static final int ESCAPE_MODE_CSS = 2;
38  
39      public static final int ESCAPE_MODE_JS = 3;
40  
41      public static final int ESCAPE_MODE_TEXT = 4;
42  
43      public static final int ESCAPE_MODE_URL = 5;
44  
45      public String escape(String text) {
46          if (text == null) {
47              return null;
48          }
49  
50          if (text.length() == 0) {
51              return StringPool.BLANK;
52          }
53  
54          // Escape using XSS recommendations from
55          // http://www.owasp.org/index.php/Cross_Site_Scripting
56          // #How_to_Protect_Yourself
57  
58          StringBuilder sb = new StringBuilder(text.length());
59  
60          for (int i = 0; i < text.length(); i++) {
61              char c = text.charAt(i);
62  
63              switch (c) {
64                  case '<':
65                      sb.append("&lt;");
66  
67                      break;
68  
69                  case '>':
70                      sb.append("&gt;");
71  
72                      break;
73  
74                  case '&':
75                      sb.append("&amp;");
76  
77                      break;
78  
79                  case '"':
80                      sb.append("&#034;");
81  
82                      break;
83  
84                  case '\'':
85                      sb.append("&#039;");
86  
87                      break;
88  
89                  default:
90                      sb.append(c);
91  
92                      break;
93              }
94          }
95  
96          return sb.toString();
97      }
98  
99      public String escape(String text, int type) {
100         if (text == null) {
101             return null;
102         }
103 
104         if (text.length() == 0) {
105             return StringPool.BLANK;
106         }
107 
108         String prefix = StringPool.BLANK;
109         String postfix = StringPool.BLANK;
110 
111         if (type == ESCAPE_MODE_ATTRIBUTE) {
112             prefix = "&#x";
113             postfix = StringPool.SEMICOLON;
114         }
115         else if (type == ESCAPE_MODE_CSS) {
116             prefix = StringPool.BACK_SLASH;
117         }
118         else if (type == ESCAPE_MODE_JS) {
119             prefix = "\\x";
120         }
121         else if (type == ESCAPE_MODE_URL) {
122             return HttpUtil.encodeURL(text, true);
123         }
124         else {
125             return escape(text);
126         }
127 
128         StringBuilder sb = new StringBuilder();
129 
130         for (int i = 0; i < text.length(); i++) {
131             char c = text.charAt(i);
132 
133             if ((Character.isLetterOrDigit(c)) ||
134                 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
135 
136                 sb.append(c);
137             }
138             else {
139                 sb.append(prefix);
140                 sb.append(Integer.toHexString(c));
141                 sb.append(postfix);
142             }
143         }
144 
145         return sb.toString();
146     }
147 
148     public String escapeAttribute(String attribute) {
149         return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
150     }
151 
152     public String escapeCSS(String css) {
153         return escape(css, ESCAPE_MODE_CSS);
154     }
155 
156     public String escapeJS(String js) {
157         return escape(js, ESCAPE_MODE_JS);
158     }
159 
160     public String escapeURL(String url) {
161         return escape(url, ESCAPE_MODE_URL);
162     }
163 
164     public String extractText(String html) {
165         if (html == null) {
166             return null;
167         }
168 
169         Source source = new Source(html);
170 
171         return source.getTextExtractor().toString();
172     }
173 
174     public String fromInputSafe(String text) {
175         return StringUtil.replace(text, "&amp;", "&");
176     }
177 
178     public String replaceMsWordCharacters(String text) {
179         return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
180     }
181 
182     public String stripBetween(String text, String tag) {
183         return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
184     }
185 
186     public String stripComments(String text) {
187         return StringUtil.stripBetween(text, "<!--", "-->");
188     }
189 
190     public String stripHtml(String text) {
191         if (text == null) {
192             return null;
193         }
194 
195         text = stripComments(text);
196 
197         StringBuilder sb = new StringBuilder(text.length());
198 
199         int x = 0;
200         int y = text.indexOf("<");
201 
202         while (y != -1) {
203             sb.append(text.substring(x, y));
204             sb.append(StringPool.SPACE);
205 
206             // Look for text enclosed by <script></script>
207 
208             boolean scriptFound = isScriptTag(text, y + 1);
209 
210             if (scriptFound) {
211                 int pos = y + _TAG_SCRIPT.length;
212 
213                 // Find end of the tag
214 
215                 pos = text.indexOf(">", pos);
216 
217                 if (pos >= 0) {
218 
219                     // Check if preceding character is / (i.e. is this instance
220                     // of <script/>)
221 
222                     if (text.charAt(pos-1) != '/') {
223 
224                         // Search for the ending </script> tag
225 
226                         for (;;) {
227                             pos = text.indexOf("</", pos);
228 
229                             if (pos >= 0) {
230                                 if (isScriptTag(text, pos + 2)) {
231                                     y = pos;
232 
233                                     break;
234                                 }
235                                 else {
236 
237                                     // Skip past "</"
238 
239                                     pos += 2;
240                                 }
241                             }
242                             else {
243                                 break;
244                             }
245                         }
246                     }
247                 }
248             }
249 
250             x = text.indexOf(">", y);
251 
252             if (x == -1) {
253                 break;
254             }
255 
256             x++;
257 
258             if (x < y) {
259 
260                 // <b>Hello</b
261 
262                 break;
263             }
264 
265             y = text.indexOf("<", x);
266         }
267 
268         if (y == -1) {
269             sb.append(text.substring(x, text.length()));
270         }
271 
272         return sb.toString();
273     }
274 
275     public String toInputSafe(String text) {
276         return StringUtil.replace(
277             text,
278             new String[] {"&", "\""},
279             new String[] {"&amp;", "&quot;"});
280     }
281 
282     public String unescape(String text) {
283         if (text == null) {
284             return null;
285         }
286 
287         if (text.length() == 0) {
288             return StringPool.BLANK;
289         }
290 
291         // Optimize this
292 
293         text = StringUtil.replace(text, "&lt;", "<");
294         text = StringUtil.replace(text, "&gt;", ">");
295         text = StringUtil.replace(text, "&amp;", "&");
296         text = StringUtil.replace(text, "&#034;", "\"");
297         text = StringUtil.replace(text, "&#039;", "'");
298         text = StringUtil.replace(text, "&#040;", "(");
299         text = StringUtil.replace(text, "&#041;", ")");
300         text = StringUtil.replace(text, "&#035;", "#");
301         text = StringUtil.replace(text, "&#037;", "%");
302         text = StringUtil.replace(text, "&#059;", ";");
303         text = StringUtil.replace(text, "&#043;", "+");
304         text = StringUtil.replace(text, "&#045;", "-");
305 
306         return text;
307     }
308 
309     protected boolean isScriptTag(String text, int pos) {
310         if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
311             char item;
312 
313             for (int i = 0; i < _TAG_SCRIPT.length; i++) {
314                 item = text.charAt(pos++);
315 
316                 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
317                     return false;
318                 }
319             }
320 
321             item = text.charAt(pos);
322 
323             // Check that char after "script" is not a letter (i.e. another tag)
324 
325             return !Character.isLetter(item);
326         }
327         else {
328             return false;
329         }
330     }
331 
332     private static final String[] _MS_WORD_UNICODE = new String[] {
333         "\u00ae", "\u2019", "\u201c", "\u201d"
334     };
335 
336     private static final String[] _MS_WORD_HTML = new String[] {
337         "&reg;", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
338     };
339 
340     private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
341 
342 }