View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.http;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.io.OutputStream;
23  
24  /**
25   * This class is responsible for quoting HTML characters.
26   */
27  public class HtmlQuoting {
28    private static final byte[] ampBytes = "&".getBytes();
29    private static final byte[] aposBytes = "'".getBytes();
30    private static final byte[] gtBytes = ">".getBytes();
31    private static final byte[] ltBytes = "<".getBytes();
32    private static final byte[] quotBytes = """.getBytes();
33  
34    /**
35     * Does the given string need to be quoted?
36     * @param data the string to check
37     * @param off the starting position
38     * @param len the number of bytes to check
39     * @return does the string contain any of the active html characters?
40     */
41    public static boolean needsQuoting(byte[] data, int off, int len) {
42      if (off+len > data.length) {
43          throw new IllegalStateException("off+len=" + off+len + " should be lower"
44                  + " than data length=" + data.length);
45      }
46      for(int i=off; i< off+len; ++i) {
47        switch(data[i]) {
48        case '&':
49        case '<':
50        case '>':
51        case '\'':
52        case '"':
53          return true;
54        default:
55          break;
56        }
57      }
58      return false;
59    }
60  
61    /**
62     * Does the given string need to be quoted?
63     * @param str the string to check
64     * @return does the string contain any of the active html characters?
65     */
66    public static boolean needsQuoting(String str) {
67      if (str == null) {
68        return false;
69      }
70      byte[] bytes = str.getBytes();
71      return needsQuoting(bytes, 0 , bytes.length);
72    }
73  
74    /**
75     * Quote all of the active HTML characters in the given string as they
76     * are added to the buffer.
77     * @param output the stream to write the output to
78     * @param buffer the byte array to take the characters from
79     * @param off the index of the first byte to quote
80     * @param len the number of bytes to quote
81     */
82    public static void quoteHtmlChars(OutputStream output, byte[] buffer,
83                                      int off, int len) throws IOException {
84      for(int i=off; i < off+len; i++) {
85        switch (buffer[i]) {
86        case '&': output.write(ampBytes); break;
87        case '<': output.write(ltBytes); break;
88        case '>': output.write(gtBytes); break;
89        case '\'': output.write(aposBytes); break;
90        case '"': output.write(quotBytes); break;
91        default: output.write(buffer, i, 1);
92        }
93      }
94    }
95    
96    /**
97     * Quote the given item to make it html-safe.
98     * @param item the string to quote
99     * @return the quoted string
100    */
101   public static String quoteHtmlChars(String item) {
102     if (item == null) {
103       return null;
104     }
105     byte[] bytes = item.getBytes();
106     if (needsQuoting(bytes, 0, bytes.length)) {
107       ByteArrayOutputStream buffer = new ByteArrayOutputStream();
108       try {
109         quoteHtmlChars(buffer, bytes, 0, bytes.length);
110       } catch (IOException ioe) {
111         // Won't happen, since it is a bytearrayoutputstream
112       }
113       return buffer.toString();
114     } else {
115       return item;
116     }
117   }
118 
119   /**
120    * Return an output stream that quotes all of the output.
121    * @param out the stream to write the quoted output to
122    * @return a new stream that the application show write to
123    * @throws IOException if the underlying output fails
124    */
125   public static OutputStream quoteOutputStream(final OutputStream out
126                                                ) throws IOException {
127     return new OutputStream() {
128       private byte[] data = new byte[1];
129       @Override
130       public void write(byte[] data, int off, int len) throws IOException {
131         quoteHtmlChars(out, data, off, len);
132       }
133       
134       @Override
135       public void write(int b) throws IOException {
136         data[0] = (byte) b;
137         quoteHtmlChars(out, data, 0, 1);
138       }
139       
140       @Override
141       public void flush() throws IOException {
142         out.flush();
143       }
144       
145       @Override
146       public void close() throws IOException {
147         out.close();
148       }
149     };
150   }
151 
152   /**
153    * Remove HTML quoting from a string.
154    * @param item the string to unquote
155    * @return the unquoted string
156    */
157   public static String unquoteHtmlChars(String item) {
158     if (item == null) {
159       return null;
160     }
161     int next = item.indexOf('&');
162     // nothing was quoted
163     if (next == -1) {
164       return item;
165     }
166     int len = item.length();
167     int posn = 0;
168     StringBuilder buffer = new StringBuilder();
169     while (next != -1) {
170       buffer.append(item.substring(posn, next));
171       if (item.startsWith("&amp;", next)) {
172         buffer.append('&');
173         next += 5;
174       } else if (item.startsWith("&apos;", next)) {
175         buffer.append('\'');
176         next += 6;        
177       } else if (item.startsWith("&gt;", next)) {
178         buffer.append('>');
179         next += 4;
180       } else if (item.startsWith("&lt;", next)) {
181         buffer.append('<');
182         next += 4;
183       } else if (item.startsWith("&quot;", next)) {
184         buffer.append('"');
185         next += 6;
186       } else {
187         int end = item.indexOf(';', next)+1;
188         if (end == 0) {
189           end = len;
190         }
191         throw new IllegalArgumentException("Bad HTML quoting for " + 
192                                            item.substring(next,end));
193       }
194       posn = next;
195       next = item.indexOf('&', posn);
196     }
197     buffer.append(item.substring(posn, len));
198     return buffer.toString();
199   }
200   
201   public static void main(String[] args) throws Exception {
202     if (args.length == 0) {
203         throw new IllegalArgumentException("Please provide some arguments");
204     }
205     for(String arg:args) {
206       System.out.println("Original: " + arg);
207       String quoted = quoteHtmlChars(arg);
208       System.out.println("Quoted: "+ quoted);
209       String unquoted = unquoteHtmlChars(quoted);
210       System.out.println("Unquoted: " + unquoted);
211       System.out.println();
212     }
213   }
214 
215 }