1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.portal.util;
24  
25  import au.id.jericho.lib.html.Source;
26  
27  import com.liferay.portal.kernel.util.Html;
28  import com.liferay.portal.kernel.util.HttpUtil;
29  import com.liferay.portal.kernel.util.StringPool;
30  import com.liferay.portal.kernel.util.StringUtil;
31  
32  /**
33   * <a href="HtmlImpl.java.html"><b><i>View Source</i></b></a>
34   *
35   * @author Brian Wing Shun Chan
36   * @author Clarence Shen
37   * @author Harry Mark
38   * @author Samuel Kong
39   *
40   */
41  public class HtmlImpl implements Html {
42  
43      public static final int ESCAPE_MODE_ATTRIBUTE = 1;
44  
45      public static final int ESCAPE_MODE_CSS = 2;
46  
47      public static final int ESCAPE_MODE_JS = 3;
48  
49      public static final int ESCAPE_MODE_TEXT = 4;
50  
51      public static final int ESCAPE_MODE_URL = 5;
52  
53      public String escape(String text) {
54          if (text == null) {
55              return null;
56          }
57  
58          // Escape using XSS recommendations from
59          // http://www.owasp.org/index.php/Cross_Site_Scripting
60          // #How_to_Protect_Yourself
61  
62          StringBuilder sb = new StringBuilder(text.length());
63  
64          for (int i = 0; i < text.length(); i++) {
65              char c = text.charAt(i);
66  
67              switch (c) {
68                  case '<':
69                      sb.append("&lt;");
70  
71                      break;
72  
73                  case '>':
74                      sb.append("&gt;");
75  
76                      break;
77  
78                  case '&':
79                      sb.append("&amp;");
80  
81                      break;
82  
83                  case '"':
84                      sb.append("&#034;");
85  
86                      break;
87  
88                  case '\'':
89                      sb.append("&#039;");
90  
91                      break;
92  
93                  default:
94                      sb.append(c);
95  
96                      break;
97              }
98          }
99  
100         return sb.toString();
101     }
102 
103     public String escape(String text, int type) {
104         if (text == null){
105             return null;
106         }
107 
108         String prefix = StringPool.BLANK;
109         String postfix = StringPool.BLANK;
110 
111         if (type == ESCAPE_MODE_ATTRIBUTE) {
112             prefix = "&#x";
113             postfix = StringPool.SEMICOLON;
114         }
115         else if (type == ESCAPE_MODE_CSS) {
116             prefix = StringPool.BACK_SLASH;
117         }
118         else if (type == ESCAPE_MODE_JS) {
119             prefix = "\\x";
120         }
121         else if (type == ESCAPE_MODE_URL) {
122             return HttpUtil.encodeURL(text, true);
123         }
124         else {
125             return escape(text);
126         }
127 
128         StringBuilder sb = new StringBuilder();
129 
130         for (int i = 0; i < text.length(); i++) {
131             char c = text.charAt(i);
132 
133             if (Character.isLetterOrDigit(c)) {
134                 sb.append(c);
135             }
136             else {
137                 sb.append(prefix);
138                 sb.append(Integer.toHexString(c));
139                 sb.append(postfix);
140             }
141         }
142 
143         return sb.toString();
144     }
145 
146     public String escapeAttribute(String attribute) {
147         return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
148     }
149 
150     public String escapeCSS(String css) {
151         return escape(css, ESCAPE_MODE_CSS);
152     }
153 
154     public String escapeJS(String js) {
155         return escape(js, ESCAPE_MODE_JS);
156     }
157 
158     public String escapeURL(String url) {
159         return escape(url, ESCAPE_MODE_URL);
160     }
161 
162     public String extractText(String html) {
163         if (html == null) {
164             return null;
165         }
166 
167         Source source = new Source(html);
168 
169         return source.getTextExtractor().toString();
170     }
171 
172     public String fromInputSafe(String text) {
173         return StringUtil.replace(text, "&amp;", "&");
174     }
175 
176     public String replaceMsWordCharacters(String text) {
177         return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
178     }
179 
180     public String stripBetween(String text, String tag) {
181         return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
182     }
183 
184     public String stripComments(String text) {
185         return StringUtil.stripBetween(text, "<!--", "-->");
186     }
187 
188     public String stripHtml(String text) {
189         if (text == null) {
190             return null;
191         }
192 
193         text = stripComments(text);
194 
195         StringBuilder sb = new StringBuilder(text.length());
196 
197         int x = 0;
198         int y = text.indexOf("<");
199 
200         while (y != -1) {
201             sb.append(text.substring(x, y));
202             sb.append(StringPool.SPACE);
203 
204             // Look for text enclosed by <script></script>
205 
206             boolean scriptFound = isScriptTag(text, y + 1);
207 
208             if (scriptFound) {
209                 int pos = y + _TAG_SCRIPT.length;
210 
211                 // Find end of the tag
212 
213                 pos = text.indexOf(">", pos);
214 
215                 if (pos >= 0) {
216 
217                     // Check if preceding character is / (i.e. is this instance
218                     // of <script/>)
219 
220                     if (text.charAt(pos-1) != '/') {
221 
222                         // Search for the ending </script> tag
223 
224                         for (;;) {
225                             pos = text.indexOf("</", pos);
226 
227                             if (pos >= 0) {
228                                 if (isScriptTag(text, pos + 2)) {
229                                     y = pos;
230 
231                                     break;
232                                 }
233                                 else {
234 
235                                     // Skip past "</"
236 
237                                     pos += 2;
238                                 }
239                             }
240                             else {
241                                 break;
242                             }
243                         }
244                     }
245                 }
246             }
247 
248             x = text.indexOf(">", y);
249 
250             if (x == -1) {
251                 break;
252             }
253 
254             x++;
255 
256             if (x < y) {
257 
258                 // <b>Hello</b
259 
260                 break;
261             }
262 
263             y = text.indexOf("<", x);
264         }
265 
266         if (y == -1) {
267             sb.append(text.substring(x, text.length()));
268         }
269 
270         return sb.toString();
271     }
272 
273     public String toInputSafe(String text) {
274         return StringUtil.replace(
275             text,
276             new String[] {"&", "\""},
277             new String[] {"&amp;", "&quot;"});
278     }
279 
280     public String unescape(String text) {
281         if (text == null) {
282             return null;
283         }
284 
285         // Optimize this
286 
287         text = StringUtil.replace(text, "&lt;", "<");
288         text = StringUtil.replace(text, "&gt;", ">");
289         text = StringUtil.replace(text, "&amp;", "&");
290         text = StringUtil.replace(text, "&#034;", "\"");
291         text = StringUtil.replace(text, "&#039;", "'");
292         text = StringUtil.replace(text, "&#040;", "(");
293         text = StringUtil.replace(text, "&#041;", ")");
294         text = StringUtil.replace(text, "&#035;", "#");
295         text = StringUtil.replace(text, "&#037;", "%");
296         text = StringUtil.replace(text, "&#059;", ";");
297         text = StringUtil.replace(text, "&#043;", "+");
298         text = StringUtil.replace(text, "&#045;", "-");
299 
300         return text;
301     }
302 
303     protected boolean isScriptTag(String text, int pos) {
304         if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
305             char item;
306 
307             for (int i = 0; i < _TAG_SCRIPT.length; i++) {
308                 item = text.charAt(pos++);
309 
310                 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
311                     return false;
312                 }
313             }
314 
315             item = text.charAt(pos);
316 
317             // Check that char after "script" is not a letter (i.e. another tag)
318 
319             return !Character.isLetter(item);
320         }
321         else {
322             return false;
323         }
324     }
325 
326     private static final String[] _MS_WORD_UNICODE = new String[] {
327         "\u00ae", "\u2019", "\u201c", "\u201d"
328     };
329 
330     private static final String[] _MS_WORD_HTML = new String[] {
331         "&reg;", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
332     };
333 
334     private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
335 
336 }