View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.filter;
20  
21  import com.google.protobuf.InvalidProtocolBufferException;
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  import org.apache.hadoop.classification.InterfaceAudience;
25  import org.apache.hadoop.classification.InterfaceStability;
26  import org.apache.hadoop.hbase.HConstants;
27  import org.apache.hadoop.hbase.exceptions.DeserializationException;
28  import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos;
29  import org.apache.hadoop.hbase.util.Bytes;
30  
31  import java.nio.charset.Charset;
32  import java.nio.charset.IllegalCharsetNameException;
33  import java.util.Arrays;
34  import java.util.regex.Pattern;
35  
36  /**
37   * This comparator is for use with {@link CompareFilter} implementations, such
38   * as {@link RowFilter}, {@link QualifierFilter}, and {@link ValueFilter}, for
39   * filtering based on the value of a given column. Use it to test if a given
40   * regular expression matches a cell value in the column.
41   * <p>
42   * Only EQUAL or NOT_EQUAL comparisons are valid with this comparator.
43   * <p>
44   * For example:
45   * <p>
46   * <pre>
47   * ValueFilter vf = new ValueFilter(CompareOp.EQUAL,
48   *     new RegexStringComparator(
49   *       // v4 IP address
50   *       "(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3,3}" +
51   *         "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(\\/[0-9]+)?" +
52   *         "|" +
53   *       // v6 IP address
54   *       "((([\\dA-Fa-f]{1,4}:){7}[\\dA-Fa-f]{1,4})(:([\\d]{1,3}.)" +
55   *         "{3}[\\d]{1,3})?)(\\/[0-9]+)?"));
56   * </pre>
57   * <p>
58   * Supports {@link java.util.regex.Pattern} flags as well:
59   * <p>
60   * <pre>
61   * ValueFilter vf = new ValueFilter(CompareOp.EQUAL,
62   *     new RegexStringComparator("regex", Pattern.CASE_INSENSITIVE | Pattern.DOTALL));
63   * </pre>
64   * @see java.util.regex.Pattern
65   */
66  @InterfaceAudience.Public
67  @InterfaceStability.Stable
68  public class RegexStringComparator extends ByteArrayComparable {
69  
70    private static final Log LOG = LogFactory.getLog(RegexStringComparator.class);
71  
72    private Charset charset = HConstants.UTF8_CHARSET;
73  
74    private Pattern pattern;
75  
76    /**
77     * Constructor
78     * Adds Pattern.DOTALL to the underlying Pattern
79     * @param expr a valid regular expression
80     */
81    public RegexStringComparator(String expr) {
82      this(expr, Pattern.DOTALL);
83    }
84  
85    /**
86     * Constructor
87     * @param expr a valid regular expression
88     * @param flags java.util.regex.Pattern flags
89     */
90    public RegexStringComparator(String expr, int flags) {
91      super(Bytes.toBytes(expr));
92      this.pattern = Pattern.compile(expr, flags);
93    }
94  
95    /**
96     * Specifies the {@link Charset} to use to convert the row key to a String.
97     * <p>
98     * The row key needs to be converted to a String in order to be matched
99     * against the regular expression.  This method controls which charset is
100    * used to do this conversion.
101    * <p>
102    * If the row key is made of arbitrary bytes, the charset {@code ISO-8859-1}
103    * is recommended.
104    * @param charset The charset to use.
105    */
106   public void setCharset(final Charset charset) {
107     this.charset = charset;
108   }
109 
110   @Override
111   public int compareTo(byte[] value, int offset, int length) {
112     // Use find() for subsequence match instead of matches() (full sequence
113     // match) to adhere to the principle of least surprise.
114     String tmp;
115     if (length < value.length / 2) {
116       // See HBASE-9428. Make a copy of the relevant part of the byte[],
117       // or the JDK will copy the entire byte[] during String decode
118       tmp = new String(Arrays.copyOfRange(value, offset, offset + length), charset);
119     } else {
120       tmp = new String(value, offset, length, charset);
121     }
122     return pattern.matcher(tmp).find() ? 0 : 1;
123   }
124 
125   /**
126    * @return The comparator serialized using pb
127    */
128   public byte [] toByteArray() {
129     ComparatorProtos.RegexStringComparator.Builder builder =
130       ComparatorProtos.RegexStringComparator.newBuilder();
131     builder.setPattern(pattern.toString());
132     builder.setPatternFlags(pattern.flags());
133     builder.setCharset(charset.name());
134     return builder.build().toByteArray();
135   }
136 
137   /**
138    * @param pbBytes A pb serialized {@link RegexStringComparator} instance
139    * @return An instance of {@link RegexStringComparator} made from <code>bytes</code>
140    * @throws DeserializationException
141    * @see #toByteArray
142    */
143   public static RegexStringComparator parseFrom(final byte [] pbBytes)
144   throws DeserializationException {
145     ComparatorProtos.RegexStringComparator proto;
146     try {
147       proto = ComparatorProtos.RegexStringComparator.parseFrom(pbBytes);
148     } catch (InvalidProtocolBufferException e) {
149       throw new DeserializationException(e);
150     }
151 
152     RegexStringComparator comparator =
153       new RegexStringComparator(proto.getPattern(), proto.getPatternFlags());
154     final String charset = proto.getCharset();
155     if (charset.length() > 0) {
156       try {
157         comparator.setCharset(Charset.forName(charset));
158       } catch (IllegalCharsetNameException e) {
159         LOG.error("invalid charset", e);
160       }
161     }
162     return comparator;
163   }
164 
165   /**
166    * @param other
167    * @return true if and only if the fields of the comparator that are serialized
168    * are equal to the corresponding fields in other.  Used for testing.
169    */
170   boolean areSerializedFieldsEqual(ByteArrayComparable other) {
171     if (other == this) return true;
172     if (!(other instanceof RegexStringComparator)) return false;
173 
174     RegexStringComparator comparator = (RegexStringComparator)other;
175     return super.areSerializedFieldsEqual(comparator)
176       && this.pattern.toString().equals(comparator.pattern.toString())
177       && this.pattern.flags() == comparator.pattern.flags()
178       && this.charset.equals(comparator.charset);
179   }
180 }