View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.filter;
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  
25  import org.apache.hadoop.hbase.util.ByteStringer;
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.classification.InterfaceAudience;
29  import org.apache.hadoop.classification.InterfaceStability;
30  import org.apache.hadoop.hbase.Cell;
31  import org.apache.hadoop.hbase.CellUtil;
32  import org.apache.hadoop.hbase.client.Scan;
33  import org.apache.hadoop.hbase.exceptions.DeserializationException;
34  import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
35  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
36  import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
37  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
38  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.CompareType;
39  import org.apache.hadoop.hbase.util.Bytes;
40  
41  import com.google.common.base.Preconditions;
42  import com.google.protobuf.InvalidProtocolBufferException;
43  
44  /**
45   * This filter is used to filter cells based on value. It takes a {@link CompareFilter.CompareOp}
46   * operator (equal, greater, not equal, etc), and either a byte [] value or
47   * a ByteArrayComparable.
48   * <p>
49   * If we have a byte [] value then we just do a lexicographic compare. For
50   * example, if passed value is 'b' and cell has 'a' and the compare operator
51   * is LESS, then we will filter out this cell (return true).  If this is not
52   * sufficient (eg you want to deserialize a long and then compare it to a fixed
53   * long value), then you can pass in your own comparator instead.
54   * <p>
55   * You must also specify a family and qualifier.  Only the value of this column
56   * will be tested. When using this filter on a {@link Scan} with specified
57   * inputs, the column to be tested should also be added as input (otherwise
58   * the filter will regard the column as missing).
59   * <p>
60   * To prevent the entire row from being emitted if the column is not found
61   * on a row, use {@link #setFilterIfMissing}.
62   * Otherwise, if the column is found, the entire row will be emitted only if
63   * the value passes.  If the value fails, the row will be filtered out.
64   * <p>
65   * In order to test values of previous versions (timestamps), set
66   * {@link #setLatestVersionOnly} to false. The default is true, meaning that
67   * only the latest version's value is tested and all previous versions are ignored.
68   * <p>
69   * To filter based on the value of all scanned columns, use {@link ValueFilter}.
70   */
71  @InterfaceAudience.Public
72  @InterfaceStability.Stable
73  public class SingleColumnValueFilter extends FilterBase {
74    static final Log LOG = LogFactory.getLog(SingleColumnValueFilter.class);
75  
76    protected byte [] columnFamily;
77    protected byte [] columnQualifier;
78    protected CompareOp compareOp;
79    protected ByteArrayComparable comparator;
80    protected boolean foundColumn = false;
81    protected boolean matchedColumn = false;
82    protected boolean filterIfMissing = false;
83    protected boolean latestVersionOnly = true;
84  
85    /**
86     * Constructor for binary compare of the value of a single column.  If the
87     * column is found and the condition passes, all columns of the row will be
88     * emitted.  If the condition fails, the row will not be emitted.
89     * <p>
90     * Use the filterIfColumnMissing flag to set whether the rest of the columns
91     * in a row will be emitted if the specified column to check is not found in
92     * the row.
93     *
94     * @param family name of column family
95     * @param qualifier name of column qualifier
96     * @param compareOp operator
97     * @param value value to compare column values against
98     */
99    public SingleColumnValueFilter(final byte [] family, final byte [] qualifier,
100       final CompareOp compareOp, final byte[] value) {
101     this(family, qualifier, compareOp, new BinaryComparator(value));
102   }
103 
104   /**
105    * Constructor for binary compare of the value of a single column.  If the
106    * column is found and the condition passes, all columns of the row will be
107    * emitted.  If the condition fails, the row will not be emitted.
108    * <p>
109    * Use the filterIfColumnMissing flag to set whether the rest of the columns
110    * in a row will be emitted if the specified column to check is not found in
111    * the row.
112    *
113    * @param family name of column family
114    * @param qualifier name of column qualifier
115    * @param compareOp operator
116    * @param comparator Comparator to use.
117    */
118   public SingleColumnValueFilter(final byte [] family, final byte [] qualifier,
119       final CompareOp compareOp, final ByteArrayComparable comparator) {
120     this.columnFamily = family;
121     this.columnQualifier = qualifier;
122     this.compareOp = compareOp;
123     this.comparator = comparator;
124   }
125 
126   /**
127    * Constructor for protobuf deserialization only.
128    * @param family
129    * @param qualifier
130    * @param compareOp
131    * @param comparator
132    * @param filterIfMissing
133    * @param latestVersionOnly
134    */
135   protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
136       final CompareOp compareOp, ByteArrayComparable comparator, final boolean filterIfMissing,
137       final boolean latestVersionOnly) {
138     this(family, qualifier, compareOp, comparator);
139     this.filterIfMissing = filterIfMissing;
140     this.latestVersionOnly = latestVersionOnly;
141   }
142 
143   /**
144    * @return operator
145    */
146   public CompareOp getOperator() {
147     return compareOp;
148   }
149 
150   /**
151    * @return the comparator
152    */
153   public ByteArrayComparable getComparator() {
154     return comparator;
155   }
156 
157   /**
158    * @return the family
159    */
160   public byte[] getFamily() {
161     return columnFamily;
162   }
163 
164   /**
165    * @return the qualifier
166    */
167   public byte[] getQualifier() {
168     return columnQualifier;
169   }
170 
171   @Override
172   public ReturnCode filterKeyValue(Cell c) {
173     // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" + Bytes.toString(keyValue.getValue()));
174     if (this.matchedColumn) {
175       // We already found and matched the single column, all keys now pass
176       return ReturnCode.INCLUDE;
177     } else if (this.latestVersionOnly && this.foundColumn) {
178       // We found but did not match the single column, skip to next row
179       return ReturnCode.NEXT_ROW;
180     }
181     if (!CellUtil.matchingColumn(c, this.columnFamily, this.columnQualifier)) {
182       return ReturnCode.INCLUDE;
183     }
184     foundColumn = true;
185     if (filterColumnValue(c.getValueArray(),
186         c.getValueOffset(), c.getValueLength())) {
187       return this.latestVersionOnly? ReturnCode.NEXT_ROW: ReturnCode.INCLUDE;
188     }
189     this.matchedColumn = true;
190     return ReturnCode.INCLUDE;
191   }
192 
193   private boolean filterColumnValue(final byte [] data, final int offset,
194       final int length) {
195     int compareResult = this.comparator.compareTo(data, offset, length);
196     switch (this.compareOp) {
197     case LESS:
198       return compareResult <= 0;
199     case LESS_OR_EQUAL:
200       return compareResult < 0;
201     case EQUAL:
202       return compareResult != 0;
203     case NOT_EQUAL:
204       return compareResult == 0;
205     case GREATER_OR_EQUAL:
206       return compareResult > 0;
207     case GREATER:
208       return compareResult >= 0;
209     default:
210       throw new RuntimeException("Unknown Compare op " + compareOp.name());
211     }
212   }
213 
214   public boolean filterRow() {
215     // If column was found, return false if it was matched, true if it was not
216     // If column not found, return true if we filter if missing, false if not
217     return this.foundColumn? !this.matchedColumn: this.filterIfMissing;
218   }
219   
220   public boolean hasFilterRow() {
221     return true;
222   }
223 
224   public void reset() {
225     foundColumn = false;
226     matchedColumn = false;
227   }
228 
229   /**
230    * Get whether entire row should be filtered if column is not found.
231    * @return true if row should be skipped if column not found, false if row
232    * should be let through anyways
233    */
234   public boolean getFilterIfMissing() {
235     return filterIfMissing;
236   }
237 
238   /**
239    * Set whether entire row should be filtered if column is not found.
240    * <p>
241    * If true, the entire row will be skipped if the column is not found.
242    * <p>
243    * If false, the row will pass if the column is not found.  This is default.
244    * @param filterIfMissing flag
245    */
246   public void setFilterIfMissing(boolean filterIfMissing) {
247     this.filterIfMissing = filterIfMissing;
248   }
249 
250   /**
251    * Get whether only the latest version of the column value should be compared.
252    * If true, the row will be returned if only the latest version of the column
253    * value matches. If false, the row will be returned if any version of the
254    * column value matches. The default is true.
255    * @return return value
256    */
257   public boolean getLatestVersionOnly() {
258     return latestVersionOnly;
259   }
260 
261   /**
262    * Set whether only the latest version of the column value should be compared.
263    * If true, the row will be returned if only the latest version of the column
264    * value matches. If false, the row will be returned if any version of the
265    * column value matches. The default is true.
266    * @param latestVersionOnly flag
267    */
268   public void setLatestVersionOnly(boolean latestVersionOnly) {
269     this.latestVersionOnly = latestVersionOnly;
270   }
271 
272   public static Filter createFilterFromArguments(ArrayList<byte []> filterArguments) {
273     Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6,
274                                 "Expected 4 or 6 but got: %s", filterArguments.size());
275     byte [] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0));
276     byte [] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1));
277     CompareOp compareOp = ParseFilter.createCompareOp(filterArguments.get(2));
278     ByteArrayComparable comparator = ParseFilter.createComparator(
279       ParseFilter.removeQuotesFromByteArray(filterArguments.get(3)));
280 
281     if (comparator instanceof RegexStringComparator ||
282         comparator instanceof SubstringComparator) {
283       if (compareOp != CompareOp.EQUAL &&
284           compareOp != CompareOp.NOT_EQUAL) {
285         throw new IllegalArgumentException ("A regexstring comparator and substring comparator " +
286                                             "can only be used with EQUAL and NOT_EQUAL");
287       }
288     }
289 
290     SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier,
291                                                                  compareOp, comparator);
292 
293     if (filterArguments.size() == 6) {
294       boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4));
295       boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5));
296       filter.setFilterIfMissing(filterIfMissing);
297       filter.setLatestVersionOnly(latestVersionOnly);
298     }
299     return filter;
300   }
301 
302   FilterProtos.SingleColumnValueFilter convert() {
303     FilterProtos.SingleColumnValueFilter.Builder builder =
304       FilterProtos.SingleColumnValueFilter.newBuilder();
305     if (this.columnFamily != null) {
306       builder.setColumnFamily(ByteStringer.wrap(this.columnFamily));
307     }
308     if (this.columnQualifier != null) {
309       builder.setColumnQualifier(ByteStringer.wrap(this.columnQualifier));
310     }
311     HBaseProtos.CompareType compareOp = CompareType.valueOf(this.compareOp.name());
312     builder.setCompareOp(compareOp);
313     builder.setComparator(ProtobufUtil.toComparator(this.comparator));
314     builder.setFilterIfMissing(this.filterIfMissing);
315     builder.setLatestVersionOnly(this.latestVersionOnly);
316 
317     return builder.build();
318   }
319 
320   /**
321    * @return The filter serialized using pb
322    */
323   public byte [] toByteArray() {
324     return convert().toByteArray();
325   }
326 
327   /**
328    * @param pbBytes A pb serialized {@link SingleColumnValueFilter} instance
329    * @return An instance of {@link SingleColumnValueFilter} made from <code>bytes</code>
330    * @throws org.apache.hadoop.hbase.exceptions.DeserializationException
331    * @see #toByteArray
332    */
333   public static SingleColumnValueFilter parseFrom(final byte [] pbBytes)
334   throws DeserializationException {
335     FilterProtos.SingleColumnValueFilter proto;
336     try {
337       proto = FilterProtos.SingleColumnValueFilter.parseFrom(pbBytes);
338     } catch (InvalidProtocolBufferException e) {
339       throw new DeserializationException(e);
340     }
341 
342     final CompareOp compareOp =
343       CompareOp.valueOf(proto.getCompareOp().name());
344     final ByteArrayComparable comparator;
345     try {
346       comparator = ProtobufUtil.toComparator(proto.getComparator());
347     } catch (IOException ioe) {
348       throw new DeserializationException(ioe);
349     }
350 
351     return new SingleColumnValueFilter(proto.hasColumnFamily() ? proto.getColumnFamily()
352         .toByteArray() : null, proto.hasColumnQualifier() ? proto.getColumnQualifier()
353         .toByteArray() : null, compareOp, comparator, proto.getFilterIfMissing(), proto
354         .getLatestVersionOnly());
355   }
356 
357   /**
358    * @param other
359    * @return true if and only if the fields of the filter that are serialized
360    * are equal to the corresponding fields in other.  Used for testing.
361    */
362   boolean areSerializedFieldsEqual(Filter o) {
363     if (o == this) return true;
364     if (!(o instanceof SingleColumnValueFilter)) return false;
365 
366     SingleColumnValueFilter other = (SingleColumnValueFilter)o;
367     return Bytes.equals(this.getFamily(), other.getFamily())
368       && Bytes.equals(this.getQualifier(), other.getQualifier())
369       && this.compareOp.equals(other.compareOp)
370       && this.getComparator().areSerializedFieldsEqual(other.getComparator())
371       && this.getFilterIfMissing() == other.getFilterIfMissing()
372       && this.getLatestVersionOnly() == other.getLatestVersionOnly();
373   }
374 
375   /**
376    * The only CF this filter needs is given column family. So, it's the only essential
377    * column in whole scan. If filterIfMissing == false, all families are essential,
378    * because of possibility of skipping the rows without any data in filtered CF.
379    */
380   public boolean isFamilyEssential(byte[] name) {
381     return !this.filterIfMissing || Bytes.equals(name, this.columnFamily);
382   }
383 
384   @Override
385   public String toString() {
386     return String.format("%s (%s, %s, %s, %s)",
387         this.getClass().getSimpleName(), Bytes.toStringBinary(this.columnFamily),
388         Bytes.toStringBinary(this.columnQualifier), this.compareOp.name(),
389         Bytes.toStringBinary(this.comparator.getValue()));
390   }
391 }