View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.filter;
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  
25  import com.google.protobuf.HBaseZeroCopyByteString;
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.classification.InterfaceAudience;
29  import org.apache.hadoop.classification.InterfaceStability;
30  import org.apache.hadoop.hbase.Cell;
31  import org.apache.hadoop.hbase.KeyValue;
32  import org.apache.hadoop.hbase.KeyValueUtil;
33  import org.apache.hadoop.hbase.client.Scan;
34  import org.apache.hadoop.hbase.exceptions.DeserializationException;
35  import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
36  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
37  import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
38  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
39  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.CompareType;
40  import org.apache.hadoop.hbase.util.Bytes;
41  
42  import com.google.common.base.Preconditions;
43  import com.google.protobuf.InvalidProtocolBufferException;
44  
45  /**
46   * This filter is used to filter cells based on value. It takes a {@link CompareFilter.CompareOp}
47   * operator (equal, greater, not equal, etc), and either a byte [] value or
48   * a ByteArrayComparable.
49   * <p>
50   * If we have a byte [] value then we just do a lexicographic compare. For
51   * example, if passed value is 'b' and cell has 'a' and the compare operator
52   * is LESS, then we will filter out this cell (return true).  If this is not
53   * sufficient (eg you want to deserialize a long and then compare it to a fixed
54   * long value), then you can pass in your own comparator instead.
55   * <p>
56   * You must also specify a family and qualifier.  Only the value of this column
57   * will be tested. When using this filter on a {@link Scan} with specified
58   * inputs, the column to be tested should also be added as input (otherwise
59   * the filter will regard the column as missing).
60   * <p>
61   * To prevent the entire row from being emitted if the column is not found
62   * on a row, use {@link #setFilterIfMissing}.
63   * Otherwise, if the column is found, the entire row will be emitted only if
64   * the value passes.  If the value fails, the row will be filtered out.
65   * <p>
66   * In order to test values of previous versions (timestamps), set
67   * {@link #setLatestVersionOnly} to false. The default is true, meaning that
68   * only the latest version's value is tested and all previous versions are ignored.
69   * <p>
70   * To filter based on the value of all scanned columns, use {@link ValueFilter}.
71   */
72  @InterfaceAudience.Public
73  @InterfaceStability.Stable
74  public class SingleColumnValueFilter extends FilterBase {
75    static final Log LOG = LogFactory.getLog(SingleColumnValueFilter.class);
76  
77    protected byte [] columnFamily;
78    protected byte [] columnQualifier;
79    protected CompareOp compareOp;
80    protected ByteArrayComparable comparator;
81    protected boolean foundColumn = false;
82    protected boolean matchedColumn = false;
83    protected boolean filterIfMissing = false;
84    protected boolean latestVersionOnly = true;
85  
86    /**
87     * Constructor for binary compare of the value of a single column.  If the
88     * column is found and the condition passes, all columns of the row will be
89     * emitted.  If the condition fails, the row will not be emitted.
90     * <p>
91     * Use the filterIfColumnMissing flag to set whether the rest of the columns
92     * in a row will be emitted if the specified column to check is not found in
93     * the row.
94     *
95     * @param family name of column family
96     * @param qualifier name of column qualifier
97     * @param compareOp operator
98     * @param value value to compare column values against
99     */
100   public SingleColumnValueFilter(final byte [] family, final byte [] qualifier,
101       final CompareOp compareOp, final byte[] value) {
102     this(family, qualifier, compareOp, new BinaryComparator(value));
103   }
104 
105   /**
106    * Constructor for binary compare of the value of a single column.  If the
107    * column is found and the condition passes, all columns of the row will be
108    * emitted.  If the condition fails, the row will not be emitted.
109    * <p>
110    * Use the filterIfColumnMissing flag to set whether the rest of the columns
111    * in a row will be emitted if the specified column to check is not found in
112    * the row.
113    *
114    * @param family name of column family
115    * @param qualifier name of column qualifier
116    * @param compareOp operator
117    * @param comparator Comparator to use.
118    */
119   public SingleColumnValueFilter(final byte [] family, final byte [] qualifier,
120       final CompareOp compareOp, final ByteArrayComparable comparator) {
121     this.columnFamily = family;
122     this.columnQualifier = qualifier;
123     this.compareOp = compareOp;
124     this.comparator = comparator;
125   }
126 
127   /**
128    * Constructor for protobuf deserialization only.
129    * @param family
130    * @param qualifier
131    * @param compareOp
132    * @param comparator
133    * @param filterIfMissing
134    * @param latestVersionOnly
135    */
136   protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
137       final CompareOp compareOp, ByteArrayComparable comparator, final boolean filterIfMissing,
138       final boolean latestVersionOnly) {
139     this(family, qualifier, compareOp, comparator);
140     this.filterIfMissing = filterIfMissing;
141     this.latestVersionOnly = latestVersionOnly;
142   }
143 
144   /**
145    * @return operator
146    */
147   public CompareOp getOperator() {
148     return compareOp;
149   }
150 
151   /**
152    * @return the comparator
153    */
154   public ByteArrayComparable getComparator() {
155     return comparator;
156   }
157 
158   /**
159    * @return the family
160    */
161   public byte[] getFamily() {
162     return columnFamily;
163   }
164 
165   /**
166    * @return the qualifier
167    */
168   public byte[] getQualifier() {
169     return columnQualifier;
170   }
171 
172   @Override
173   public ReturnCode filterKeyValue(Cell c) {
174     // TODO get rid of this.
175     KeyValue keyValue = KeyValueUtil.ensureKeyValue(c);
176     
177     // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" + Bytes.toString(keyValue.getValue()));
178     if (this.matchedColumn) {
179       // We already found and matched the single column, all keys now pass
180       return ReturnCode.INCLUDE;
181     } else if (this.latestVersionOnly && this.foundColumn) {
182       // We found but did not match the single column, skip to next row
183       return ReturnCode.NEXT_ROW;
184     }
185     if (!keyValue.matchingColumn(this.columnFamily, this.columnQualifier)) {
186       return ReturnCode.INCLUDE;
187     }
188     foundColumn = true;
189     if (filterColumnValue(keyValue.getValueArray(),
190         keyValue.getValueOffset(), keyValue.getValueLength())) {
191       return this.latestVersionOnly? ReturnCode.NEXT_ROW: ReturnCode.INCLUDE;
192     }
193     this.matchedColumn = true;
194     return ReturnCode.INCLUDE;
195   }
196 
197   private boolean filterColumnValue(final byte [] data, final int offset,
198       final int length) {
199     int compareResult = this.comparator.compareTo(data, offset, length);
200     switch (this.compareOp) {
201     case LESS:
202       return compareResult <= 0;
203     case LESS_OR_EQUAL:
204       return compareResult < 0;
205     case EQUAL:
206       return compareResult != 0;
207     case NOT_EQUAL:
208       return compareResult == 0;
209     case GREATER_OR_EQUAL:
210       return compareResult > 0;
211     case GREATER:
212       return compareResult >= 0;
213     default:
214       throw new RuntimeException("Unknown Compare op " + compareOp.name());
215     }
216   }
217 
218   public boolean filterRow() {
219     // If column was found, return false if it was matched, true if it was not
220     // If column not found, return true if we filter if missing, false if not
221     return this.foundColumn? !this.matchedColumn: this.filterIfMissing;
222   }
223   
224   public boolean hasFilterRow() {
225     return true;
226   }
227 
228   public void reset() {
229     foundColumn = false;
230     matchedColumn = false;
231   }
232 
233   /**
234    * Get whether entire row should be filtered if column is not found.
235    * @return true if row should be skipped if column not found, false if row
236    * should be let through anyways
237    */
238   public boolean getFilterIfMissing() {
239     return filterIfMissing;
240   }
241 
242   /**
243    * Set whether entire row should be filtered if column is not found.
244    * <p>
245    * If true, the entire row will be skipped if the column is not found.
246    * <p>
247    * If false, the row will pass if the column is not found.  This is default.
248    * @param filterIfMissing flag
249    */
250   public void setFilterIfMissing(boolean filterIfMissing) {
251     this.filterIfMissing = filterIfMissing;
252   }
253 
254   /**
255    * Get whether only the latest version of the column value should be compared.
256    * If true, the row will be returned if only the latest version of the column
257    * value matches. If false, the row will be returned if any version of the
258    * column value matches. The default is true.
259    * @return return value
260    */
261   public boolean getLatestVersionOnly() {
262     return latestVersionOnly;
263   }
264 
265   /**
266    * Set whether only the latest version of the column value should be compared.
267    * If true, the row will be returned if only the latest version of the column
268    * value matches. If false, the row will be returned if any version of the
269    * column value matches. The default is true.
270    * @param latestVersionOnly flag
271    */
272   public void setLatestVersionOnly(boolean latestVersionOnly) {
273     this.latestVersionOnly = latestVersionOnly;
274   }
275 
276   public static Filter createFilterFromArguments(ArrayList<byte []> filterArguments) {
277     Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6,
278                                 "Expected 4 or 6 but got: %s", filterArguments.size());
279     byte [] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0));
280     byte [] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1));
281     CompareOp compareOp = ParseFilter.createCompareOp(filterArguments.get(2));
282     ByteArrayComparable comparator = ParseFilter.createComparator(
283       ParseFilter.removeQuotesFromByteArray(filterArguments.get(3)));
284 
285     if (comparator instanceof RegexStringComparator ||
286         comparator instanceof SubstringComparator) {
287       if (compareOp != CompareOp.EQUAL &&
288           compareOp != CompareOp.NOT_EQUAL) {
289         throw new IllegalArgumentException ("A regexstring comparator and substring comparator " +
290                                             "can only be used with EQUAL and NOT_EQUAL");
291       }
292     }
293 
294     SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier,
295                                                                  compareOp, comparator);
296 
297     if (filterArguments.size() == 6) {
298       boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4));
299       boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5));
300       filter.setFilterIfMissing(filterIfMissing);
301       filter.setLatestVersionOnly(latestVersionOnly);
302     }
303     return filter;
304   }
305 
306   FilterProtos.SingleColumnValueFilter convert() {
307     FilterProtos.SingleColumnValueFilter.Builder builder =
308       FilterProtos.SingleColumnValueFilter.newBuilder();
309     if (this.columnFamily != null) {
310       builder.setColumnFamily(HBaseZeroCopyByteString.wrap(this.columnFamily));
311     }
312     if (this.columnQualifier != null) {
313       builder.setColumnQualifier(HBaseZeroCopyByteString.wrap(this.columnQualifier));
314     }
315     HBaseProtos.CompareType compareOp = CompareType.valueOf(this.compareOp.name());
316     builder.setCompareOp(compareOp);
317     builder.setComparator(ProtobufUtil.toComparator(this.comparator));
318     builder.setFilterIfMissing(this.filterIfMissing);
319     builder.setLatestVersionOnly(this.latestVersionOnly);
320 
321     return builder.build();
322   }
323 
324   /**
325    * @return The filter serialized using pb
326    */
327   public byte [] toByteArray() {
328     return convert().toByteArray();
329   }
330 
331   /**
332    * @param pbBytes A pb serialized {@link SingleColumnValueFilter} instance
333    * @return An instance of {@link SingleColumnValueFilter} made from <code>bytes</code>
334    * @throws org.apache.hadoop.hbase.exceptions.DeserializationException
335    * @see #toByteArray
336    */
337   public static SingleColumnValueFilter parseFrom(final byte [] pbBytes)
338   throws DeserializationException {
339     FilterProtos.SingleColumnValueFilter proto;
340     try {
341       proto = FilterProtos.SingleColumnValueFilter.parseFrom(pbBytes);
342     } catch (InvalidProtocolBufferException e) {
343       throw new DeserializationException(e);
344     }
345 
346     final CompareOp compareOp =
347       CompareOp.valueOf(proto.getCompareOp().name());
348     final ByteArrayComparable comparator;
349     try {
350       comparator = ProtobufUtil.toComparator(proto.getComparator());
351     } catch (IOException ioe) {
352       throw new DeserializationException(ioe);
353     }
354 
355     return new SingleColumnValueFilter(proto.hasColumnFamily() ? proto.getColumnFamily()
356         .toByteArray() : null, proto.hasColumnQualifier() ? proto.getColumnQualifier()
357         .toByteArray() : null, compareOp, comparator, proto.getFilterIfMissing(), proto
358         .getLatestVersionOnly());
359   }
360 
361   /**
362    * @param other
363    * @return true if and only if the fields of the filter that are serialized
364    * are equal to the corresponding fields in other.  Used for testing.
365    */
366   boolean areSerializedFieldsEqual(Filter o) {
367     if (o == this) return true;
368     if (!(o instanceof SingleColumnValueFilter)) return false;
369 
370     SingleColumnValueFilter other = (SingleColumnValueFilter)o;
371     return Bytes.equals(this.getFamily(), other.getFamily())
372       && Bytes.equals(this.getQualifier(), other.getQualifier())
373       && this.compareOp.equals(other.compareOp)
374       && this.getComparator().areSerializedFieldsEqual(other.getComparator())
375       && this.getFilterIfMissing() == other.getFilterIfMissing()
376       && this.getLatestVersionOnly() == other.getLatestVersionOnly();
377   }
378 
379   /**
380    * The only CF this filter needs is given column family. So, it's the only essential
381    * column in whole scan. If filterIfMissing == false, all families are essential,
382    * because of possibility of skipping the rows without any data in filtered CF.
383    */
384   public boolean isFamilyEssential(byte[] name) {
385     return !this.filterIfMissing || Bytes.equals(name, this.columnFamily);
386   }
387 
388   @Override
389   public String toString() {
390     return String.format("%s (%s, %s, %s, %s)",
391         this.getClass().getSimpleName(), Bytes.toStringBinary(this.columnFamily),
392         Bytes.toStringBinary(this.columnQualifier), this.compareOp.name(),
393         Bytes.toStringBinary(this.comparator.getValue()));
394   }
395 }