View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.filter;
21  
22  import org.apache.hadoop.hbase.HConstants;
23  import org.apache.hadoop.hbase.util.Bytes;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  
28  import java.io.DataInput;
29  import java.io.DataOutput;
30  import java.io.IOException;
31  import java.nio.charset.Charset;
32  import java.nio.charset.IllegalCharsetNameException;
33  import java.util.Arrays;
34  import java.util.regex.Pattern;
35  
36  /**
37   * This comparator is for use with {@link CompareFilter} implementations, such
38   * as {@link RowFilter}, {@link QualifierFilter}, and {@link ValueFilter}, for
39   * filtering based on the value of a given column. Use it to test if a given
40   * regular expression matches a cell value in the column.
41   * <p>
42   * Only EQUAL or NOT_EQUAL comparisons are valid with this comparator.
43   * <p>
44   * For example:
45   * <p>
46   * <pre>
47   * ValueFilter vf = new ValueFilter(CompareOp.EQUAL,
48   *     new RegexStringComparator(
49   *       // v4 IP address
50   *       "(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3,3}" +
51   *         "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(\\/[0-9]+)?" +
52   *         "|" +
53   *       // v6 IP address
54   *       "((([\\dA-Fa-f]{1,4}:){7}[\\dA-Fa-f]{1,4})(:([\\d]{1,3}.)" +
55   *         "{3}[\\d]{1,3})?)(\\/[0-9]+)?"));
56   * </pre>
57   */
58  public class RegexStringComparator extends WritableByteArrayComparable {
59  
60    private static final Log LOG = LogFactory.getLog(RegexStringComparator.class);
61  
62    private Charset charset = Charset.forName(HConstants.UTF8_ENCODING);
63  
64    private Pattern pattern;
65  
66    /** Nullary constructor for Writable, do not use */
67    public RegexStringComparator() { }
68  
69    /**
70     * Constructor
71     * @param expr a valid regular expression
72     */
73    public RegexStringComparator(String expr) {
74      super(Bytes.toBytes(expr));
75      this.pattern = Pattern.compile(expr, Pattern.DOTALL);
76    }
77  
78    /**
79     * Specifies the {@link Charset} to use to convert the row key to a String.
80     * <p>
81     * The row key needs to be converted to a String in order to be matched
82     * against the regular expression.  This method controls which charset is
83     * used to do this conversion.
84     * <p>
85     * If the row key is made of arbitrary bytes, the charset {@code ISO-8859-1}
86     * is recommended.
87     * @param charset The charset to use.
88     */
89    public void setCharset(final Charset charset) {
90      this.charset = charset;
91    }
92  
93    @Override
94    public int compareTo(byte[] value, int offset, int length) {
95      // Use find() for subsequence match instead of matches() (full sequence
96      // match) to adhere to the principle of least surprise.
97      String tmp;
98      if (length < value.length / 2) {
99        // See HBASE-9428. Make a copy of the relevant part of the byte[],
100       // or the JDK will copy the entire byte[] during String decode
101       tmp = new String(Arrays.copyOfRange(value, offset, offset + length), charset);
102     } else {
103       tmp = new String(value, offset, length, charset);
104     }
105     return pattern.matcher(tmp).find() ? 0 : 1;
106   }
107 
108   @Override
109   public void readFields(DataInput in) throws IOException {
110     final String expr = in.readUTF();
111     this.value = Bytes.toBytes(expr);
112     this.pattern = Pattern.compile(expr);
113     final String charset = in.readUTF();
114     if (charset.length() > 0) {
115       try {
116         this.charset = Charset.forName(charset);
117       } catch (IllegalCharsetNameException e) {
118         LOG.error("invalid charset", e);
119       }
120     }
121   }
122 
123   @Override
124   public void write(DataOutput out) throws IOException {
125     out.writeUTF(pattern.toString());
126     out.writeUTF(charset.name());
127   }
128 
129 }