View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.filter;
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  
25  import org.apache.hadoop.hbase.Cell;
26  import org.apache.hadoop.hbase.CellUtil;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.classification.InterfaceStability;
29  import org.apache.hadoop.hbase.exceptions.DeserializationException;
30  import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
31  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
32  import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
33  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
34  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.CompareType;
35  import org.apache.hadoop.hbase.util.ByteStringer;
36  import org.apache.hadoop.hbase.util.Bytes;
37  
38  import com.google.common.base.Preconditions;
39  import com.google.protobuf.InvalidProtocolBufferException;
40  
41  /**
42   * This filter is used to filter cells based on value. It takes a {@link CompareFilter.CompareOp}
43   * operator (equal, greater, not equal, etc), and either a byte [] value or
44   * a ByteArrayComparable.
45   * <p>
46   * If we have a byte [] value then we just do a lexicographic compare. For
47   * example, if passed value is 'b' and cell has 'a' and the compare operator
48   * is LESS, then we will filter out this cell (return true).  If this is not
49   * sufficient (eg you want to deserialize a long and then compare it to a fixed
50   * long value), then you can pass in your own comparator instead.
51   * <p>
52   * You must also specify a family and qualifier.  Only the value of this column
53   * will be tested. When using this filter on a 
54   * {@link org.apache.hadoop.hbase.CellScanner} with specified
55   * inputs, the column to be tested should also be added as input (otherwise
56   * the filter will regard the column as missing).
57   * <p>
58   * To prevent the entire row from being emitted if the column is not found
59   * on a row, use {@link #setFilterIfMissing}.
60   * Otherwise, if the column is found, the entire row will be emitted only if
61   * the value passes.  If the value fails, the row will be filtered out.
62   * <p>
63   * In order to test values of previous versions (timestamps), set
64   * {@link #setLatestVersionOnly} to false. The default is true, meaning that
65   * only the latest version's value is tested and all previous versions are ignored.
66   * <p>
67   * To filter based on the value of all scanned columns, use {@link ValueFilter}.
68   */
69  @InterfaceAudience.Public
70  @InterfaceStability.Stable
71  public class SingleColumnValueFilter extends FilterBase {
72  
73    protected byte [] columnFamily;
74    protected byte [] columnQualifier;
75    protected CompareOp compareOp;
76    protected ByteArrayComparable comparator;
77    protected boolean foundColumn = false;
78    protected boolean matchedColumn = false;
79    protected boolean filterIfMissing = false;
80    protected boolean latestVersionOnly = true;
81  
82    /**
83     * Constructor for binary compare of the value of a single column.  If the
84     * column is found and the condition passes, all columns of the row will be
85     * emitted.  If the condition fails, the row will not be emitted.
86     * <p>
87     * Use the filterIfColumnMissing flag to set whether the rest of the columns
88     * in a row will be emitted if the specified column to check is not found in
89     * the row.
90     *
91     * @param family name of column family
92     * @param qualifier name of column qualifier
93     * @param compareOp operator
94     * @param value value to compare column values against
95     */
96    public SingleColumnValueFilter(final byte [] family, final byte [] qualifier,
97        final CompareOp compareOp, final byte[] value) {
98      this(family, qualifier, compareOp, new BinaryComparator(value));
99    }
100 
101   /**
102    * Constructor for binary compare of the value of a single column.  If the
103    * column is found and the condition passes, all columns of the row will be
104    * emitted.  If the condition fails, the row will not be emitted.
105    * <p>
106    * Use the filterIfColumnMissing flag to set whether the rest of the columns
107    * in a row will be emitted if the specified column to check is not found in
108    * the row.
109    *
110    * @param family name of column family
111    * @param qualifier name of column qualifier
112    * @param compareOp operator
113    * @param comparator Comparator to use.
114    */
115   public SingleColumnValueFilter(final byte [] family, final byte [] qualifier,
116       final CompareOp compareOp, final ByteArrayComparable comparator) {
117     this.columnFamily = family;
118     this.columnQualifier = qualifier;
119     this.compareOp = compareOp;
120     this.comparator = comparator;
121   }
122 
123   /**
124    * Constructor for protobuf deserialization only.
125    * @param family
126    * @param qualifier
127    * @param compareOp
128    * @param comparator
129    * @param filterIfMissing
130    * @param latestVersionOnly
131    */
132   protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
133       final CompareOp compareOp, ByteArrayComparable comparator, final boolean filterIfMissing,
134       final boolean latestVersionOnly) {
135     this(family, qualifier, compareOp, comparator);
136     this.filterIfMissing = filterIfMissing;
137     this.latestVersionOnly = latestVersionOnly;
138   }
139 
140   /**
141    * @return operator
142    */
143   public CompareOp getOperator() {
144     return compareOp;
145   }
146 
147   /**
148    * @return the comparator
149    */
150   public ByteArrayComparable getComparator() {
151     return comparator;
152   }
153 
154   /**
155    * @return the family
156    */
157   public byte[] getFamily() {
158     return columnFamily;
159   }
160 
161   /**
162    * @return the qualifier
163    */
164   public byte[] getQualifier() {
165     return columnQualifier;
166   }
167 
168   @Override
169   public boolean filterRowKey(Cell cell) throws IOException {
170     // Impl in FilterBase might do unnecessary copy for Off heap backed Cells.
171     return false;
172   }
173 
174   @Override
175   public ReturnCode filterKeyValue(Cell c) {
176     // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" + Bytes.toString(keyValue.getValue()));
177     if (this.matchedColumn) {
178       // We already found and matched the single column, all keys now pass
179       return ReturnCode.INCLUDE;
180     } else if (this.latestVersionOnly && this.foundColumn) {
181       // We found but did not match the single column, skip to next row
182       return ReturnCode.NEXT_ROW;
183     }
184     if (!CellUtil.matchingColumn(c, this.columnFamily, this.columnQualifier)) {
185       return ReturnCode.INCLUDE;
186     }
187     foundColumn = true;
188     if (filterColumnValue(c.getValueArray(),
189         c.getValueOffset(), c.getValueLength())) {
190       return this.latestVersionOnly? ReturnCode.NEXT_ROW: ReturnCode.INCLUDE;
191     }
192     this.matchedColumn = true;
193     return ReturnCode.INCLUDE;
194   }
195 
196   private boolean filterColumnValue(final byte [] data, final int offset,
197       final int length) {
198     int compareResult = this.comparator.compareTo(data, offset, length);
199     switch (this.compareOp) {
200     case LESS:
201       return compareResult <= 0;
202     case LESS_OR_EQUAL:
203       return compareResult < 0;
204     case EQUAL:
205       return compareResult != 0;
206     case NOT_EQUAL:
207       return compareResult == 0;
208     case GREATER_OR_EQUAL:
209       return compareResult > 0;
210     case GREATER:
211       return compareResult >= 0;
212     default:
213       throw new RuntimeException("Unknown Compare op " + compareOp.name());
214     }
215   }
216 
217   public boolean filterRow() {
218     // If column was found, return false if it was matched, true if it was not
219     // If column not found, return true if we filter if missing, false if not
220     return this.foundColumn? !this.matchedColumn: this.filterIfMissing;
221   }
222   
223   public boolean hasFilterRow() {
224     return true;
225   }
226 
227   public void reset() {
228     foundColumn = false;
229     matchedColumn = false;
230   }
231 
232   /**
233    * Get whether entire row should be filtered if column is not found.
234    * @return true if row should be skipped if column not found, false if row
235    * should be let through anyways
236    */
237   public boolean getFilterIfMissing() {
238     return filterIfMissing;
239   }
240 
241   /**
242    * Set whether entire row should be filtered if column is not found.
243    * <p>
244    * If true, the entire row will be skipped if the column is not found.
245    * <p>
246    * If false, the row will pass if the column is not found.  This is default.
247    * @param filterIfMissing flag
248    */
249   public void setFilterIfMissing(boolean filterIfMissing) {
250     this.filterIfMissing = filterIfMissing;
251   }
252 
253   /**
254    * Get whether only the latest version of the column value should be compared.
255    * If true, the row will be returned if only the latest version of the column
256    * value matches. If false, the row will be returned if any version of the
257    * column value matches. The default is true.
258    * @return return value
259    */
260   public boolean getLatestVersionOnly() {
261     return latestVersionOnly;
262   }
263 
264   /**
265    * Set whether only the latest version of the column value should be compared.
266    * If true, the row will be returned if only the latest version of the column
267    * value matches. If false, the row will be returned if any version of the
268    * column value matches. The default is true.
269    * @param latestVersionOnly flag
270    */
271   public void setLatestVersionOnly(boolean latestVersionOnly) {
272     this.latestVersionOnly = latestVersionOnly;
273   }
274 
275   public static Filter createFilterFromArguments(ArrayList<byte []> filterArguments) {
276     Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6,
277                                 "Expected 4 or 6 but got: %s", filterArguments.size());
278     byte [] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0));
279     byte [] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1));
280     CompareOp compareOp = ParseFilter.createCompareOp(filterArguments.get(2));
281     ByteArrayComparable comparator = ParseFilter.createComparator(
282       ParseFilter.removeQuotesFromByteArray(filterArguments.get(3)));
283 
284     if (comparator instanceof RegexStringComparator ||
285         comparator instanceof SubstringComparator) {
286       if (compareOp != CompareOp.EQUAL &&
287           compareOp != CompareOp.NOT_EQUAL) {
288         throw new IllegalArgumentException ("A regexstring comparator and substring comparator " +
289                                             "can only be used with EQUAL and NOT_EQUAL");
290       }
291     }
292 
293     SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier,
294                                                                  compareOp, comparator);
295 
296     if (filterArguments.size() == 6) {
297       boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4));
298       boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5));
299       filter.setFilterIfMissing(filterIfMissing);
300       filter.setLatestVersionOnly(latestVersionOnly);
301     }
302     return filter;
303   }
304 
305   FilterProtos.SingleColumnValueFilter convert() {
306     FilterProtos.SingleColumnValueFilter.Builder builder =
307       FilterProtos.SingleColumnValueFilter.newBuilder();
308     if (this.columnFamily != null) {
309       builder.setColumnFamily(ByteStringer.wrap(this.columnFamily));
310     }
311     if (this.columnQualifier != null) {
312       builder.setColumnQualifier(ByteStringer.wrap(this.columnQualifier));
313     }
314     HBaseProtos.CompareType compareOp = CompareType.valueOf(this.compareOp.name());
315     builder.setCompareOp(compareOp);
316     builder.setComparator(ProtobufUtil.toComparator(this.comparator));
317     builder.setFilterIfMissing(this.filterIfMissing);
318     builder.setLatestVersionOnly(this.latestVersionOnly);
319 
320     return builder.build();
321   }
322 
323   /**
324    * @return The filter serialized using pb
325    */
326   public byte [] toByteArray() {
327     return convert().toByteArray();
328   }
329 
330   /**
331    * @param pbBytes A pb serialized {@link SingleColumnValueFilter} instance
332    * @return An instance of {@link SingleColumnValueFilter} made from <code>bytes</code>
333    * @throws org.apache.hadoop.hbase.exceptions.DeserializationException
334    * @see #toByteArray
335    */
336   public static SingleColumnValueFilter parseFrom(final byte [] pbBytes)
337   throws DeserializationException {
338     FilterProtos.SingleColumnValueFilter proto;
339     try {
340       proto = FilterProtos.SingleColumnValueFilter.parseFrom(pbBytes);
341     } catch (InvalidProtocolBufferException e) {
342       throw new DeserializationException(e);
343     }
344 
345     final CompareOp compareOp =
346       CompareOp.valueOf(proto.getCompareOp().name());
347     final ByteArrayComparable comparator;
348     try {
349       comparator = ProtobufUtil.toComparator(proto.getComparator());
350     } catch (IOException ioe) {
351       throw new DeserializationException(ioe);
352     }
353 
354     return new SingleColumnValueFilter(proto.hasColumnFamily() ? proto.getColumnFamily()
355         .toByteArray() : null, proto.hasColumnQualifier() ? proto.getColumnQualifier()
356         .toByteArray() : null, compareOp, comparator, proto.getFilterIfMissing(), proto
357         .getLatestVersionOnly());
358   }
359 
360   /**
361    * @param other
362    * @return true if and only if the fields of the filter that are serialized
363    * are equal to the corresponding fields in other.  Used for testing.
364    */
365   boolean areSerializedFieldsEqual(Filter o) {
366     if (o == this) return true;
367     if (!(o instanceof SingleColumnValueFilter)) return false;
368 
369     SingleColumnValueFilter other = (SingleColumnValueFilter)o;
370     return Bytes.equals(this.getFamily(), other.getFamily())
371       && Bytes.equals(this.getQualifier(), other.getQualifier())
372       && this.compareOp.equals(other.compareOp)
373       && this.getComparator().areSerializedFieldsEqual(other.getComparator())
374       && this.getFilterIfMissing() == other.getFilterIfMissing()
375       && this.getLatestVersionOnly() == other.getLatestVersionOnly();
376   }
377 
378   /**
379    * The only CF this filter needs is given column family. So, it's the only essential
380    * column in whole scan. If filterIfMissing == false, all families are essential,
381    * because of possibility of skipping the rows without any data in filtered CF.
382    */
383   public boolean isFamilyEssential(byte[] name) {
384     return !this.filterIfMissing || Bytes.equals(name, this.columnFamily);
385   }
386 
387   @Override
388   public String toString() {
389     return String.format("%s (%s, %s, %s, %s)",
390         this.getClass().getSimpleName(), Bytes.toStringBinary(this.columnFamily),
391         Bytes.toStringBinary(this.columnQualifier), this.compareOp.name(),
392         Bytes.toStringBinary(this.comparator.getValue()));
393   }
394 }