001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.http;
019
020import java.io.ByteArrayOutputStream;
021import java.io.IOException;
022import java.io.OutputStream;
023import org.apache.hadoop.hbase.util.Bytes;
024import org.apache.yetus.audience.InterfaceAudience;
025
026/**
027 * This class is responsible for quoting HTML characters.
028 */
029@InterfaceAudience.Private
030public final class HtmlQuoting {
031  private static final byte[] ampBytes = Bytes.toBytes("&");
032  private static final byte[] aposBytes = Bytes.toBytes("'");
033  private static final byte[] gtBytes = Bytes.toBytes(">");
034  private static final byte[] ltBytes = Bytes.toBytes("<");
035  private static final byte[] quotBytes = Bytes.toBytes(""");
036
037  /**
038   * Does the given string need to be quoted?
039   * @param data the string to check
040   * @param off  the starting position
041   * @param len  the number of bytes to check
042   * @return does the string contain any of the active html characters?
043   */
044  public static boolean needsQuoting(byte[] data, int off, int len) {
045    if (off + len > data.length) {
046      throw new IllegalStateException(
047        "off+len=" + off + len + " should be lower" + " than data length=" + data.length);
048    }
049    for (int i = off; i < off + len; ++i) {
050      switch (data[i]) {
051        case '&':
052        case '<':
053        case '>':
054        case '\'':
055        case '"':
056          return true;
057        default:
058          break;
059      }
060    }
061    return false;
062  }
063
064  /**
065   * Does the given string need to be quoted?
066   * @param str the string to check
067   * @return does the string contain any of the active html characters?
068   */
069  public static boolean needsQuoting(String str) {
070    if (str == null) {
071      return false;
072    }
073    byte[] bytes = Bytes.toBytes(str);
074    return needsQuoting(bytes, 0, bytes.length);
075  }
076
077  /**
078   * Quote all of the active HTML characters in the given string as they are added to the buffer.
079   * @param output the stream to write the output to
080   * @param buffer the byte array to take the characters from
081   * @param off    the index of the first byte to quote
082   * @param len    the number of bytes to quote
083   */
084  public static void quoteHtmlChars(OutputStream output, byte[] buffer, int off, int len)
085    throws IOException {
086    for (int i = off; i < off + len; i++) {
087      switch (buffer[i]) {
088        case '&':
089          output.write(ampBytes);
090          break;
091        case '<':
092          output.write(ltBytes);
093          break;
094        case '>':
095          output.write(gtBytes);
096          break;
097        case '\'':
098          output.write(aposBytes);
099          break;
100        case '"':
101          output.write(quotBytes);
102          break;
103        default:
104          output.write(buffer, i, 1);
105          break;
106      }
107    }
108  }
109
110  /**
111   * Quote the given item to make it html-safe.
112   * @param item the string to quote
113   * @return the quoted string
114   */
115  public static String quoteHtmlChars(String item) {
116    if (item == null) {
117      return null;
118    }
119    byte[] bytes = Bytes.toBytes(item);
120    if (needsQuoting(bytes, 0, bytes.length)) {
121      ByteArrayOutputStream buffer = new ByteArrayOutputStream();
122      try {
123        quoteHtmlChars(buffer, bytes, 0, bytes.length);
124      } catch (IOException ioe) {
125        // Won't happen, since it is a bytearrayoutputstream
126      }
127      return buffer.toString();
128    } else {
129      return item;
130    }
131  }
132
133  /**
134   * Return an output stream that quotes all of the output.
135   * @param out the stream to write the quoted output to
136   * @return a new stream that the application show write to
137   */
138  public static OutputStream quoteOutputStream(final OutputStream out) {
139    return new OutputStream() {
140      private byte[] data = new byte[1];
141
142      @Override
143      public void write(byte[] data, int off, int len) throws IOException {
144        quoteHtmlChars(out, data, off, len);
145      }
146
147      @Override
148      public void write(int b) throws IOException {
149        data[0] = (byte) b;
150        quoteHtmlChars(out, data, 0, 1);
151      }
152
153      @Override
154      public void flush() throws IOException {
155        out.flush();
156      }
157
158      @Override
159      public void close() throws IOException {
160        out.close();
161      }
162    };
163  }
164
165  /**
166   * Remove HTML quoting from a string.
167   * @param item the string to unquote
168   * @return the unquoted string
169   */
170  public static String unquoteHtmlChars(String item) {
171    if (item == null) {
172      return null;
173    }
174    int next = item.indexOf('&');
175    // nothing was quoted
176    if (next == -1) {
177      return item;
178    }
179    int len = item.length();
180    int posn = 0;
181    StringBuilder buffer = new StringBuilder();
182    while (next != -1) {
183      buffer.append(item.substring(posn, next));
184      if (item.startsWith("&amp;", next)) {
185        buffer.append('&');
186        next += 5;
187      } else if (item.startsWith("&apos;", next)) {
188        buffer.append('\'');
189        next += 6;
190      } else if (item.startsWith("&gt;", next)) {
191        buffer.append('>');
192        next += 4;
193      } else if (item.startsWith("&lt;", next)) {
194        buffer.append('<');
195        next += 4;
196      } else if (item.startsWith("&quot;", next)) {
197        buffer.append('"');
198        next += 6;
199      } else {
200        int end = item.indexOf(';', next) + 1;
201        if (end == 0) {
202          end = len;
203        }
204        throw new IllegalArgumentException("Bad HTML quoting for " + item.substring(next, end));
205      }
206      posn = next;
207      next = item.indexOf('&', posn);
208    }
209    buffer.append(item.substring(posn, len));
210    return buffer.toString();
211  }
212
213  public static void main(String[] args) {
214    if (args.length == 0) {
215      throw new IllegalArgumentException("Please provide some arguments");
216    }
217    for (String arg : args) {
218      System.out.println("Original: " + arg);
219      String quoted = quoteHtmlChars(arg);
220      System.out.println("Quoted: " + quoted);
221      String unquoted = unquoteHtmlChars(quoted);
222      System.out.println("Unquoted: " + unquoted);
223      System.out.println();
224    }
225  }
226
227  private HtmlQuoting() {
228  }
229}