View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.filter;
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  
25  import org.apache.hadoop.hbase.Cell;
26  import org.apache.hadoop.hbase.CellComparator;
27  import org.apache.hadoop.hbase.CellUtil;
28  import org.apache.hadoop.hbase.classification.InterfaceAudience;
29  import org.apache.hadoop.hbase.classification.InterfaceStability;
30  import org.apache.hadoop.hbase.exceptions.DeserializationException;
31  import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
32  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
33  import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
34  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
35  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.CompareType;
36  import org.apache.hadoop.hbase.util.ByteStringer;
37  import org.apache.hadoop.hbase.util.Bytes;
38  
39  import com.google.common.base.Preconditions;
40  import com.google.protobuf.InvalidProtocolBufferException;
41  
42  /**
43   * This filter is used to filter cells based on value. It takes a {@link CompareFilter.CompareOp}
44   * operator (equal, greater, not equal, etc), and either a byte [] value or
45   * a ByteArrayComparable.
46   * <p>
47   * If we have a byte [] value then we just do a lexicographic compare. For
48   * example, if passed value is 'b' and cell has 'a' and the compare operator
49   * is LESS, then we will filter out this cell (return true).  If this is not
50   * sufficient (eg you want to deserialize a long and then compare it to a fixed
51   * long value), then you can pass in your own comparator instead.
52   * <p>
53   * You must also specify a family and qualifier.  Only the value of this column
54   * will be tested. When using this filter on a 
55   * {@link org.apache.hadoop.hbase.CellScanner} with specified
56   * inputs, the column to be tested should also be added as input (otherwise
57   * the filter will regard the column as missing).
58   * <p>
59   * To prevent the entire row from being emitted if the column is not found
60   * on a row, use {@link #setFilterIfMissing}.
61   * Otherwise, if the column is found, the entire row will be emitted only if
62   * the value passes.  If the value fails, the row will be filtered out.
63   * <p>
64   * In order to test values of previous versions (timestamps), set
65   * {@link #setLatestVersionOnly} to false. The default is true, meaning that
66   * only the latest version's value is tested and all previous versions are ignored.
67   * <p>
68   * To filter based on the value of all scanned columns, use {@link ValueFilter}.
69   */
70  @InterfaceAudience.Public
71  @InterfaceStability.Stable
72  public class SingleColumnValueFilter extends FilterBase {
73  
74    protected byte [] columnFamily;
75    protected byte [] columnQualifier;
76    protected CompareOp compareOp;
77    protected ByteArrayComparable comparator;
78    protected boolean foundColumn = false;
79    protected boolean matchedColumn = false;
80    protected boolean filterIfMissing = false;
81    protected boolean latestVersionOnly = true;
82  
83    /**
84     * Constructor for binary compare of the value of a single column.  If the
85     * column is found and the condition passes, all columns of the row will be
86     * emitted.  If the condition fails, the row will not be emitted.
87     * <p>
88     * Use the filterIfColumnMissing flag to set whether the rest of the columns
89     * in a row will be emitted if the specified column to check is not found in
90     * the row.
91     *
92     * @param family name of column family
93     * @param qualifier name of column qualifier
94     * @param compareOp operator
95     * @param value value to compare column values against
96     */
97    public SingleColumnValueFilter(final byte [] family, final byte [] qualifier,
98        final CompareOp compareOp, final byte[] value) {
99      this(family, qualifier, compareOp, new BinaryComparator(value));
100   }
101 
102   /**
103    * Constructor for binary compare of the value of a single column.  If the
104    * column is found and the condition passes, all columns of the row will be
105    * emitted.  If the condition fails, the row will not be emitted.
106    * <p>
107    * Use the filterIfColumnMissing flag to set whether the rest of the columns
108    * in a row will be emitted if the specified column to check is not found in
109    * the row.
110    *
111    * @param family name of column family
112    * @param qualifier name of column qualifier
113    * @param compareOp operator
114    * @param comparator Comparator to use.
115    */
116   public SingleColumnValueFilter(final byte [] family, final byte [] qualifier,
117       final CompareOp compareOp, final ByteArrayComparable comparator) {
118     this.columnFamily = family;
119     this.columnQualifier = qualifier;
120     this.compareOp = compareOp;
121     this.comparator = comparator;
122   }
123 
124   /**
125    * Constructor for protobuf deserialization only.
126    * @param family
127    * @param qualifier
128    * @param compareOp
129    * @param comparator
130    * @param filterIfMissing
131    * @param latestVersionOnly
132    */
133   protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
134       final CompareOp compareOp, ByteArrayComparable comparator, final boolean filterIfMissing,
135       final boolean latestVersionOnly) {
136     this(family, qualifier, compareOp, comparator);
137     this.filterIfMissing = filterIfMissing;
138     this.latestVersionOnly = latestVersionOnly;
139   }
140 
141   /**
142    * @return operator
143    */
144   public CompareOp getOperator() {
145     return compareOp;
146   }
147 
148   /**
149    * @return the comparator
150    */
151   public ByteArrayComparable getComparator() {
152     return comparator;
153   }
154 
155   /**
156    * @return the family
157    */
158   public byte[] getFamily() {
159     return columnFamily;
160   }
161 
162   /**
163    * @return the qualifier
164    */
165   public byte[] getQualifier() {
166     return columnQualifier;
167   }
168 
169   @Override
170   public boolean filterRowKey(Cell cell) throws IOException {
171     // Impl in FilterBase might do unnecessary copy for Off heap backed Cells.
172     return false;
173   }
174 
175   @Override
176   public ReturnCode filterKeyValue(Cell c) {
177     // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" + Bytes.toString(keyValue.getValue()));
178     if (this.matchedColumn) {
179       // We already found and matched the single column, all keys now pass
180       return ReturnCode.INCLUDE;
181     } else if (this.latestVersionOnly && this.foundColumn) {
182       // We found but did not match the single column, skip to next row
183       return ReturnCode.NEXT_ROW;
184     }
185     if (!CellUtil.matchingColumn(c, this.columnFamily, this.columnQualifier)) {
186       return ReturnCode.INCLUDE;
187     }
188     foundColumn = true;
189     if (filterColumnValue(c)) {
190       return this.latestVersionOnly? ReturnCode.NEXT_ROW: ReturnCode.INCLUDE;
191     }
192     this.matchedColumn = true;
193     return ReturnCode.INCLUDE;
194   }
195 
196   private boolean filterColumnValue(final Cell cell) {
197     int compareResult = CellComparator.compareValue(cell, this.comparator);
198     switch (this.compareOp) {
199     case LESS:
200       return compareResult <= 0;
201     case LESS_OR_EQUAL:
202       return compareResult < 0;
203     case EQUAL:
204       return compareResult != 0;
205     case NOT_EQUAL:
206       return compareResult == 0;
207     case GREATER_OR_EQUAL:
208       return compareResult > 0;
209     case GREATER:
210       return compareResult >= 0;
211     default:
212       throw new RuntimeException("Unknown Compare op " + compareOp.name());
213     }
214   }
215 
216   public boolean filterRow() {
217     // If column was found, return false if it was matched, true if it was not
218     // If column not found, return true if we filter if missing, false if not
219     return this.foundColumn? !this.matchedColumn: this.filterIfMissing;
220   }
221   
222   public boolean hasFilterRow() {
223     return true;
224   }
225 
226   public void reset() {
227     foundColumn = false;
228     matchedColumn = false;
229   }
230 
231   /**
232    * Get whether entire row should be filtered if column is not found.
233    * @return true if row should be skipped if column not found, false if row
234    * should be let through anyways
235    */
236   public boolean getFilterIfMissing() {
237     return filterIfMissing;
238   }
239 
240   /**
241    * Set whether entire row should be filtered if column is not found.
242    * <p>
243    * If true, the entire row will be skipped if the column is not found.
244    * <p>
245    * If false, the row will pass if the column is not found.  This is default.
246    * @param filterIfMissing flag
247    */
248   public void setFilterIfMissing(boolean filterIfMissing) {
249     this.filterIfMissing = filterIfMissing;
250   }
251 
252   /**
253    * Get whether only the latest version of the column value should be compared.
254    * If true, the row will be returned if only the latest version of the column
255    * value matches. If false, the row will be returned if any version of the
256    * column value matches. The default is true.
257    * @return return value
258    */
259   public boolean getLatestVersionOnly() {
260     return latestVersionOnly;
261   }
262 
263   /**
264    * Set whether only the latest version of the column value should be compared.
265    * If true, the row will be returned if only the latest version of the column
266    * value matches. If false, the row will be returned if any version of the
267    * column value matches. The default is true.
268    * @param latestVersionOnly flag
269    */
270   public void setLatestVersionOnly(boolean latestVersionOnly) {
271     this.latestVersionOnly = latestVersionOnly;
272   }
273 
274   public static Filter createFilterFromArguments(ArrayList<byte []> filterArguments) {
275     Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6,
276                                 "Expected 4 or 6 but got: %s", filterArguments.size());
277     byte [] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0));
278     byte [] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1));
279     CompareOp compareOp = ParseFilter.createCompareOp(filterArguments.get(2));
280     ByteArrayComparable comparator = ParseFilter.createComparator(
281       ParseFilter.removeQuotesFromByteArray(filterArguments.get(3)));
282 
283     if (comparator instanceof RegexStringComparator ||
284         comparator instanceof SubstringComparator) {
285       if (compareOp != CompareOp.EQUAL &&
286           compareOp != CompareOp.NOT_EQUAL) {
287         throw new IllegalArgumentException ("A regexstring comparator and substring comparator " +
288                                             "can only be used with EQUAL and NOT_EQUAL");
289       }
290     }
291 
292     SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier,
293                                                                  compareOp, comparator);
294 
295     if (filterArguments.size() == 6) {
296       boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4));
297       boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5));
298       filter.setFilterIfMissing(filterIfMissing);
299       filter.setLatestVersionOnly(latestVersionOnly);
300     }
301     return filter;
302   }
303 
304   FilterProtos.SingleColumnValueFilter convert() {
305     FilterProtos.SingleColumnValueFilter.Builder builder =
306       FilterProtos.SingleColumnValueFilter.newBuilder();
307     if (this.columnFamily != null) {
308       builder.setColumnFamily(ByteStringer.wrap(this.columnFamily));
309     }
310     if (this.columnQualifier != null) {
311       builder.setColumnQualifier(ByteStringer.wrap(this.columnQualifier));
312     }
313     HBaseProtos.CompareType compareOp = CompareType.valueOf(this.compareOp.name());
314     builder.setCompareOp(compareOp);
315     builder.setComparator(ProtobufUtil.toComparator(this.comparator));
316     builder.setFilterIfMissing(this.filterIfMissing);
317     builder.setLatestVersionOnly(this.latestVersionOnly);
318 
319     return builder.build();
320   }
321 
322   /**
323    * @return The filter serialized using pb
324    */
325   public byte [] toByteArray() {
326     return convert().toByteArray();
327   }
328 
329   /**
330    * @param pbBytes A pb serialized {@link SingleColumnValueFilter} instance
331    * @return An instance of {@link SingleColumnValueFilter} made from <code>bytes</code>
332    * @throws org.apache.hadoop.hbase.exceptions.DeserializationException
333    * @see #toByteArray
334    */
335   public static SingleColumnValueFilter parseFrom(final byte [] pbBytes)
336   throws DeserializationException {
337     FilterProtos.SingleColumnValueFilter proto;
338     try {
339       proto = FilterProtos.SingleColumnValueFilter.parseFrom(pbBytes);
340     } catch (InvalidProtocolBufferException e) {
341       throw new DeserializationException(e);
342     }
343 
344     final CompareOp compareOp =
345       CompareOp.valueOf(proto.getCompareOp().name());
346     final ByteArrayComparable comparator;
347     try {
348       comparator = ProtobufUtil.toComparator(proto.getComparator());
349     } catch (IOException ioe) {
350       throw new DeserializationException(ioe);
351     }
352 
353     return new SingleColumnValueFilter(proto.hasColumnFamily() ? proto.getColumnFamily()
354         .toByteArray() : null, proto.hasColumnQualifier() ? proto.getColumnQualifier()
355         .toByteArray() : null, compareOp, comparator, proto.getFilterIfMissing(), proto
356         .getLatestVersionOnly());
357   }
358 
359   /**
360    * @param other
361    * @return true if and only if the fields of the filter that are serialized
362    * are equal to the corresponding fields in other.  Used for testing.
363    */
364   boolean areSerializedFieldsEqual(Filter o) {
365     if (o == this) return true;
366     if (!(o instanceof SingleColumnValueFilter)) return false;
367 
368     SingleColumnValueFilter other = (SingleColumnValueFilter)o;
369     return Bytes.equals(this.getFamily(), other.getFamily())
370       && Bytes.equals(this.getQualifier(), other.getQualifier())
371       && this.compareOp.equals(other.compareOp)
372       && this.getComparator().areSerializedFieldsEqual(other.getComparator())
373       && this.getFilterIfMissing() == other.getFilterIfMissing()
374       && this.getLatestVersionOnly() == other.getLatestVersionOnly();
375   }
376 
377   /**
378    * The only CF this filter needs is given column family. So, it's the only essential
379    * column in whole scan. If filterIfMissing == false, all families are essential,
380    * because of possibility of skipping the rows without any data in filtered CF.
381    */
382   public boolean isFamilyEssential(byte[] name) {
383     return !this.filterIfMissing || Bytes.equals(name, this.columnFamily);
384   }
385 
386   @Override
387   public String toString() {
388     return String.format("%s (%s, %s, %s, %s)",
389         this.getClass().getSimpleName(), Bytes.toStringBinary(this.columnFamily),
390         Bytes.toStringBinary(this.columnQualifier), this.compareOp.name(),
391         Bytes.toStringBinary(this.comparator.getValue()));
392   }
393 }