001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.http;
019
020import java.io.ByteArrayOutputStream;
021import java.io.IOException;
022import java.io.OutputStream;
023import org.apache.yetus.audience.InterfaceAudience;
024
025/**
026 * This class is responsible for quoting HTML characters.
027 */
028@InterfaceAudience.Private
029public final class HtmlQuoting {
030  private static final byte[] ampBytes = "&".getBytes();
031  private static final byte[] aposBytes = "'".getBytes();
032  private static final byte[] gtBytes = ">".getBytes();
033  private static final byte[] ltBytes = "<".getBytes();
034  private static final byte[] quotBytes = """.getBytes();
035
036  /**
037   * Does the given string need to be quoted?
038   * @param data the string to check
039   * @param off the starting position
040   * @param len the number of bytes to check
041   * @return does the string contain any of the active html characters?
042   */
043  public static boolean needsQuoting(byte[] data, int off, int len) {
044    if (off+len > data.length) {
045      throw new IllegalStateException("off+len=" + off+len + " should be lower"
046              + " than data length=" + data.length);
047    }
048    for(int i=off; i< off+len; ++i) {
049      switch(data[i]) {
050        case '&':
051        case '<':
052        case '>':
053        case '\'':
054        case '"':
055          return true;
056        default:
057          break;
058      }
059    }
060    return false;
061  }
062
063  /**
064   * Does the given string need to be quoted?
065   * @param str the string to check
066   * @return does the string contain any of the active html characters?
067   */
068  public static boolean needsQuoting(String str) {
069    if (str == null) {
070      return false;
071    }
072    byte[] bytes = str.getBytes();
073    return needsQuoting(bytes, 0 , bytes.length);
074  }
075
076  /**
077   * Quote all of the active HTML characters in the given string as they
078   * are added to the buffer.
079   * @param output the stream to write the output to
080   * @param buffer the byte array to take the characters from
081   * @param off the index of the first byte to quote
082   * @param len the number of bytes to quote
083   */
084  public static void quoteHtmlChars(OutputStream output, byte[] buffer, int off, int len)
085          throws IOException {
086    for(int i=off; i < off+len; i++) {
087      switch (buffer[i]) {
088        case '&':
089          output.write(ampBytes);
090          break;
091        case '<':
092          output.write(ltBytes);
093          break;
094        case '>':
095          output.write(gtBytes);
096          break;
097        case '\'':
098          output.write(aposBytes);
099          break;
100        case '"':
101          output.write(quotBytes);
102          break;
103        default:
104          output.write(buffer, i, 1);
105          break;
106      }
107    }
108  }
109
110  /**
111   * Quote the given item to make it html-safe.
112   * @param item the string to quote
113   * @return the quoted string
114   */
115  public static String quoteHtmlChars(String item) {
116    if (item == null) {
117      return null;
118    }
119    byte[] bytes = item.getBytes();
120    if (needsQuoting(bytes, 0, bytes.length)) {
121      ByteArrayOutputStream buffer = new ByteArrayOutputStream();
122      try {
123        quoteHtmlChars(buffer, bytes, 0, bytes.length);
124      } catch (IOException ioe) {
125        // Won't happen, since it is a bytearrayoutputstream
126      }
127      return buffer.toString();
128    } else {
129      return item;
130    }
131  }
132
133  /**
134   * Return an output stream that quotes all of the output.
135   * @param out the stream to write the quoted output to
136   * @return a new stream that the application show write to
137   */
138  public static OutputStream quoteOutputStream(final OutputStream out) {
139    return new OutputStream() {
140      private byte[] data = new byte[1];
141      @Override
142      public void write(byte[] data, int off, int len) throws IOException {
143        quoteHtmlChars(out, data, off, len);
144      }
145
146      @Override
147      public void write(int b) throws IOException {
148        data[0] = (byte) b;
149        quoteHtmlChars(out, data, 0, 1);
150      }
151
152      @Override
153      public void flush() throws IOException {
154        out.flush();
155      }
156
157      @Override
158      public void close() throws IOException {
159        out.close();
160      }
161    };
162  }
163
164  /**
165   * Remove HTML quoting from a string.
166   * @param item the string to unquote
167   * @return the unquoted string
168   */
169  public static String unquoteHtmlChars(String item) {
170    if (item == null) {
171      return null;
172    }
173    int next = item.indexOf('&');
174    // nothing was quoted
175    if (next == -1) {
176      return item;
177    }
178    int len = item.length();
179    int posn = 0;
180    StringBuilder buffer = new StringBuilder();
181    while (next != -1) {
182      buffer.append(item.substring(posn, next));
183      if (item.startsWith("&amp;", next)) {
184        buffer.append('&');
185        next += 5;
186      } else if (item.startsWith("&apos;", next)) {
187        buffer.append('\'');
188        next += 6;
189      } else if (item.startsWith("&gt;", next)) {
190        buffer.append('>');
191        next += 4;
192      } else if (item.startsWith("&lt;", next)) {
193        buffer.append('<');
194        next += 4;
195      } else if (item.startsWith("&quot;", next)) {
196        buffer.append('"');
197        next += 6;
198      } else {
199        int end = item.indexOf(';', next)+1;
200        if (end == 0) {
201          end = len;
202        }
203        throw new IllegalArgumentException("Bad HTML quoting for " +
204                                           item.substring(next,end));
205      }
206      posn = next;
207      next = item.indexOf('&', posn);
208    }
209    buffer.append(item.substring(posn, len));
210    return buffer.toString();
211  }
212
213  public static void main(String[] args) {
214    if (args.length == 0) {
215      throw new IllegalArgumentException("Please provide some arguments");
216    }
217    for(String arg:args) {
218      System.out.println("Original: " + arg);
219      String quoted = quoteHtmlChars(arg);
220      System.out.println("Quoted: "+ quoted);
221      String unquoted = unquoteHtmlChars(quoted);
222      System.out.println("Unquoted: " + unquoted);
223      System.out.println();
224    }
225  }
226
227  private HtmlQuoting() {}
228}