001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.http;
019
020import java.io.ByteArrayOutputStream;
021import java.io.IOException;
022import java.io.OutputStream;
023import org.apache.yetus.audience.InterfaceAudience;
024
025/**
026 * This class is responsible for quoting HTML characters.
027 */
028@InterfaceAudience.Private
029public final class HtmlQuoting {
030  private static final byte[] ampBytes = "&".getBytes();
031  private static final byte[] aposBytes = "'".getBytes();
032  private static final byte[] gtBytes = ">".getBytes();
033  private static final byte[] ltBytes = "<".getBytes();
034  private static final byte[] quotBytes = """.getBytes();
035
036  /**
037   * Does the given string need to be quoted?
038   * @param data the string to check
039   * @param off  the starting position
040   * @param len  the number of bytes to check
041   * @return does the string contain any of the active html characters?
042   */
043  public static boolean needsQuoting(byte[] data, int off, int len) {
044    if (off + len > data.length) {
045      throw new IllegalStateException(
046        "off+len=" + off + len + " should be lower" + " than data length=" + data.length);
047    }
048    for (int i = off; i < off + len; ++i) {
049      switch (data[i]) {
050        case '&':
051        case '<':
052        case '>':
053        case '\'':
054        case '"':
055          return true;
056        default:
057          break;
058      }
059    }
060    return false;
061  }
062
063  /**
064   * Does the given string need to be quoted?
065   * @param str the string to check
066   * @return does the string contain any of the active html characters?
067   */
068  public static boolean needsQuoting(String str) {
069    if (str == null) {
070      return false;
071    }
072    byte[] bytes = str.getBytes();
073    return needsQuoting(bytes, 0, bytes.length);
074  }
075
076  /**
077   * Quote all of the active HTML characters in the given string as they are added to the buffer.
078   * @param output the stream to write the output to
079   * @param buffer the byte array to take the characters from
080   * @param off    the index of the first byte to quote
081   * @param len    the number of bytes to quote
082   */
083  public static void quoteHtmlChars(OutputStream output, byte[] buffer, int off, int len)
084    throws IOException {
085    for (int i = off; i < off + len; i++) {
086      switch (buffer[i]) {
087        case '&':
088          output.write(ampBytes);
089          break;
090        case '<':
091          output.write(ltBytes);
092          break;
093        case '>':
094          output.write(gtBytes);
095          break;
096        case '\'':
097          output.write(aposBytes);
098          break;
099        case '"':
100          output.write(quotBytes);
101          break;
102        default:
103          output.write(buffer, i, 1);
104          break;
105      }
106    }
107  }
108
109  /**
110   * Quote the given item to make it html-safe.
111   * @param item the string to quote
112   * @return the quoted string
113   */
114  public static String quoteHtmlChars(String item) {
115    if (item == null) {
116      return null;
117    }
118    byte[] bytes = item.getBytes();
119    if (needsQuoting(bytes, 0, bytes.length)) {
120      ByteArrayOutputStream buffer = new ByteArrayOutputStream();
121      try {
122        quoteHtmlChars(buffer, bytes, 0, bytes.length);
123      } catch (IOException ioe) {
124        // Won't happen, since it is a bytearrayoutputstream
125      }
126      return buffer.toString();
127    } else {
128      return item;
129    }
130  }
131
132  /**
133   * Return an output stream that quotes all of the output.
134   * @param out the stream to write the quoted output to
135   * @return a new stream that the application show write to
136   */
137  public static OutputStream quoteOutputStream(final OutputStream out) {
138    return new OutputStream() {
139      private byte[] data = new byte[1];
140
141      @Override
142      public void write(byte[] data, int off, int len) throws IOException {
143        quoteHtmlChars(out, data, off, len);
144      }
145
146      @Override
147      public void write(int b) throws IOException {
148        data[0] = (byte) b;
149        quoteHtmlChars(out, data, 0, 1);
150      }
151
152      @Override
153      public void flush() throws IOException {
154        out.flush();
155      }
156
157      @Override
158      public void close() throws IOException {
159        out.close();
160      }
161    };
162  }
163
164  /**
165   * Remove HTML quoting from a string.
166   * @param item the string to unquote
167   * @return the unquoted string
168   */
169  public static String unquoteHtmlChars(String item) {
170    if (item == null) {
171      return null;
172    }
173    int next = item.indexOf('&');
174    // nothing was quoted
175    if (next == -1) {
176      return item;
177    }
178    int len = item.length();
179    int posn = 0;
180    StringBuilder buffer = new StringBuilder();
181    while (next != -1) {
182      buffer.append(item.substring(posn, next));
183      if (item.startsWith("&amp;", next)) {
184        buffer.append('&');
185        next += 5;
186      } else if (item.startsWith("&apos;", next)) {
187        buffer.append('\'');
188        next += 6;
189      } else if (item.startsWith("&gt;", next)) {
190        buffer.append('>');
191        next += 4;
192      } else if (item.startsWith("&lt;", next)) {
193        buffer.append('<');
194        next += 4;
195      } else if (item.startsWith("&quot;", next)) {
196        buffer.append('"');
197        next += 6;
198      } else {
199        int end = item.indexOf(';', next) + 1;
200        if (end == 0) {
201          end = len;
202        }
203        throw new IllegalArgumentException("Bad HTML quoting for " + item.substring(next, end));
204      }
205      posn = next;
206      next = item.indexOf('&', posn);
207    }
208    buffer.append(item.substring(posn, len));
209    return buffer.toString();
210  }
211
212  public static void main(String[] args) {
213    if (args.length == 0) {
214      throw new IllegalArgumentException("Please provide some arguments");
215    }
216    for (String arg : args) {
217      System.out.println("Original: " + arg);
218      String quoted = quoteHtmlChars(arg);
219      System.out.println("Quoted: " + quoted);
220      String unquoted = unquoteHtmlChars(quoted);
221      System.out.println("Unquoted: " + unquoted);
222      System.out.println();
223    }
224  }
225
226  private HtmlQuoting() {
227  }
228}