001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.http;
019
020import java.io.ByteArrayOutputStream;
021import java.io.IOException;
022import java.io.OutputStream;
023import org.apache.yetus.audience.InterfaceAudience;
024
025/**
026 * This class is responsible for quoting HTML characters.
027 */
028@InterfaceAudience.Private
029public final class HtmlQuoting {
030  private static final byte[] ampBytes = "&".getBytes();
031  private static final byte[] aposBytes = "'".getBytes();
032  private static final byte[] gtBytes = ">".getBytes();
033  private static final byte[] ltBytes = "<".getBytes();
034  private static final byte[] quotBytes = """.getBytes();
035
036  /**
037   * Does the given string need to be quoted?
038   * @param data the string to check
039   * @param off the starting position
040   * @param len the number of bytes to check
041   * @return does the string contain any of the active html characters?
042   */
043  public static boolean needsQuoting(byte[] data, int off, int len) {
044    if (off+len > data.length) {
045        throw new IllegalStateException("off+len=" + off+len + " should be lower"
046                + " than data length=" + data.length);
047    }
048    for(int i=off; i< off+len; ++i) {
049      switch(data[i]) {
050      case '&':
051      case '<':
052      case '>':
053      case '\'':
054      case '"':
055        return true;
056      default:
057        break;
058      }
059    }
060    return false;
061  }
062
063  /**
064   * Does the given string need to be quoted?
065   * @param str the string to check
066   * @return does the string contain any of the active html characters?
067   */
068  public static boolean needsQuoting(String str) {
069    if (str == null) {
070      return false;
071    }
072    byte[] bytes = str.getBytes();
073    return needsQuoting(bytes, 0 , bytes.length);
074  }
075
076  /**
077   * Quote all of the active HTML characters in the given string as they
078   * are added to the buffer.
079   * @param output the stream to write the output to
080   * @param buffer the byte array to take the characters from
081   * @param off the index of the first byte to quote
082   * @param len the number of bytes to quote
083   */
084  public static void quoteHtmlChars(OutputStream output, byte[] buffer,
085                                    int off, int len) throws IOException {
086    for(int i=off; i < off+len; i++) {
087      switch (buffer[i]) {
088      case '&': output.write(ampBytes); break;
089      case '<': output.write(ltBytes); break;
090      case '>': output.write(gtBytes); break;
091      case '\'': output.write(aposBytes); break;
092      case '"': output.write(quotBytes); break;
093      default: output.write(buffer, i, 1);
094      }
095    }
096  }
097
098  /**
099   * Quote the given item to make it html-safe.
100   * @param item the string to quote
101   * @return the quoted string
102   */
103  public static String quoteHtmlChars(String item) {
104    if (item == null) {
105      return null;
106    }
107    byte[] bytes = item.getBytes();
108    if (needsQuoting(bytes, 0, bytes.length)) {
109      ByteArrayOutputStream buffer = new ByteArrayOutputStream();
110      try {
111        quoteHtmlChars(buffer, bytes, 0, bytes.length);
112      } catch (IOException ioe) {
113        // Won't happen, since it is a bytearrayoutputstream
114      }
115      return buffer.toString();
116    } else {
117      return item;
118    }
119  }
120
121  /**
122   * Return an output stream that quotes all of the output.
123   * @param out the stream to write the quoted output to
124   * @return a new stream that the application show write to
125   * @throws IOException if the underlying output fails
126   */
127  public static OutputStream quoteOutputStream(final OutputStream out
128                                               ) throws IOException {
129    return new OutputStream() {
130      private byte[] data = new byte[1];
131      @Override
132      public void write(byte[] data, int off, int len) throws IOException {
133        quoteHtmlChars(out, data, off, len);
134      }
135
136      @Override
137      public void write(int b) throws IOException {
138        data[0] = (byte) b;
139        quoteHtmlChars(out, data, 0, 1);
140      }
141
142      @Override
143      public void flush() throws IOException {
144        out.flush();
145      }
146
147      @Override
148      public void close() throws IOException {
149        out.close();
150      }
151    };
152  }
153
154  /**
155   * Remove HTML quoting from a string.
156   * @param item the string to unquote
157   * @return the unquoted string
158   */
159  public static String unquoteHtmlChars(String item) {
160    if (item == null) {
161      return null;
162    }
163    int next = item.indexOf('&');
164    // nothing was quoted
165    if (next == -1) {
166      return item;
167    }
168    int len = item.length();
169    int posn = 0;
170    StringBuilder buffer = new StringBuilder();
171    while (next != -1) {
172      buffer.append(item.substring(posn, next));
173      if (item.startsWith("&amp;", next)) {
174        buffer.append('&');
175        next += 5;
176      } else if (item.startsWith("&apos;", next)) {
177        buffer.append('\'');
178        next += 6;
179      } else if (item.startsWith("&gt;", next)) {
180        buffer.append('>');
181        next += 4;
182      } else if (item.startsWith("&lt;", next)) {
183        buffer.append('<');
184        next += 4;
185      } else if (item.startsWith("&quot;", next)) {
186        buffer.append('"');
187        next += 6;
188      } else {
189        int end = item.indexOf(';', next)+1;
190        if (end == 0) {
191          end = len;
192        }
193        throw new IllegalArgumentException("Bad HTML quoting for " +
194                                           item.substring(next,end));
195      }
196      posn = next;
197      next = item.indexOf('&', posn);
198    }
199    buffer.append(item.substring(posn, len));
200    return buffer.toString();
201  }
202
203  public static void main(String[] args) throws Exception {
204    if (args.length == 0) {
205        throw new IllegalArgumentException("Please provide some arguments");
206    }
207    for(String arg:args) {
208      System.out.println("Original: " + arg);
209      String quoted = quoteHtmlChars(arg);
210      System.out.println("Quoted: "+ quoted);
211      String unquoted = unquoteHtmlChars(quoted);
212      System.out.println("Unquoted: " + unquoted);
213      System.out.println();
214    }
215  }
216
217  private HtmlQuoting() {}
218}