View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.filter;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.KeyValue;
26  import org.apache.hadoop.hbase.client.Scan;
27  import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
28  import org.apache.hadoop.hbase.io.HbaseObjectWritable;
29  import org.apache.hadoop.hbase.util.Bytes;
30  
31  import java.io.DataInput;
32  import java.io.DataOutput;
33  import java.io.IOException;
34  import java.util.Arrays;
35  import java.util.List;
36  import java.util.ArrayList;
37  
38  import com.google.common.base.Preconditions;
39  
40  /**
41   * This filter is used to filter cells based on value. It takes a {@link CompareFilter.CompareOp}
42   * operator (equal, greater, not equal, etc), and either a byte [] value or
43   * a WritableByteArrayComparable.
44   * <p>
45   * If we have a byte [] value then we just do a lexicographic compare. For
46   * example, if passed value is 'b' and cell has 'a' and the compare operator
47   * is LESS, then we will filter out this cell (return true).  If this is not
48   * sufficient (eg you want to deserialize a long and then compare it to a fixed
49   * long value), then you can pass in your own comparator instead.
50   * <p>
51   * You must also specify a family and qualifier.  Only the value of this column
52   * will be tested. When using this filter on a {@link Scan} with specified
53   * inputs, the column to be tested should also be added as input (otherwise
54   * the filter will regard the column as missing).
55   * <p>
56   * To prevent the entire row from being emitted if the column is not found
57   * on a row, use {@link #setFilterIfMissing}.
58   * Otherwise, if the column is found, the entire row will be emitted only if
59   * the value passes.  If the value fails, the row will be filtered out.
60   * <p>
61   * In order to test values of previous versions (timestamps), set
62   * {@link #setLatestVersionOnly} to false. The default is true, meaning that
63   * only the latest version's value is tested and all previous versions are ignored.
64   * <p>
65   * To filter based on the value of all scanned columns, use {@link ValueFilter}.
66   */
67  public class SingleColumnValueFilter extends FilterBase {
68    static final Log LOG = LogFactory.getLog(SingleColumnValueFilter.class);
69  
70    protected byte [] columnFamily;
71    protected byte [] columnQualifier;
72    private CompareOp compareOp;
73    private WritableByteArrayComparable comparator;
74    private boolean foundColumn = false;
75    private boolean matchedColumn = false;
76    private boolean filterIfMissing = false;
77    private boolean latestVersionOnly = true;
78  
79    /**
80     * Writable constructor, do not use.
81     */
82    public SingleColumnValueFilter() {
83    }
84  
85    /**
86     * Constructor for binary compare of the value of a single column.  If the
87     * column is found and the condition passes, all columns of the row will be
88     * emitted.  If the condition fails, the row will not be emitted.
89     * <p>
90     * Use the filterIfColumnMissing flag to set whether the rest of the columns
91     * in a row will be emitted if the specified column to check is not found in
92     * the row.
93     *
94     * @param family name of column family
95     * @param qualifier name of column qualifier
96     * @param compareOp operator
97     * @param value value to compare column values against
98     */
99    public SingleColumnValueFilter(final byte [] family, final byte [] qualifier,
100       final CompareOp compareOp, final byte[] value) {
101     this(family, qualifier, compareOp, new BinaryComparator(value));
102   }
103 
104   /**
105    * Constructor for binary compare of the value of a single column.  If the
106    * column is found and the condition passes, all columns of the row will be
107    * emitted.  If the condition fails, the row will not be emitted.
108    * <p>
109    * Use the filterIfColumnMissing flag to set whether the rest of the columns
110    * in a row will be emitted if the specified column to check is not found in
111    * the row.
112    *
113    * @param family name of column family
114    * @param qualifier name of column qualifier
115    * @param compareOp operator
116    * @param comparator Comparator to use.
117    */
118   public SingleColumnValueFilter(final byte [] family, final byte [] qualifier,
119       final CompareOp compareOp, final WritableByteArrayComparable comparator) {
120     this.columnFamily = family;
121     this.columnQualifier = qualifier;
122     this.compareOp = compareOp;
123     this.comparator = comparator;
124   }
125 
126   /**
127    * @return operator
128    */
129   public CompareOp getOperator() {
130     return compareOp;
131   }
132 
133   /**
134    * @return the comparator
135    */
136   public WritableByteArrayComparable getComparator() {
137     return comparator;
138   }
139 
140   /**
141    * @return the family
142    */
143   public byte[] getFamily() {
144     return columnFamily;
145   }
146 
147   /**
148    * @return the qualifier
149    */
150   public byte[] getQualifier() {
151     return columnQualifier;
152   }
153 
154   public ReturnCode filterKeyValue(KeyValue keyValue) {
155     // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" + Bytes.toString(keyValue.getValue()));
156     if (this.matchedColumn) {
157       // We already found and matched the single column, all keys now pass
158       return ReturnCode.INCLUDE;
159     } else if (this.latestVersionOnly && this.foundColumn) {
160       // We found but did not match the single column, skip to next row
161       return ReturnCode.NEXT_ROW;
162     }
163     if (!keyValue.matchingColumn(this.columnFamily, this.columnQualifier)) {
164       return ReturnCode.INCLUDE;
165     }
166     foundColumn = true;
167     if (filterColumnValue(keyValue.getBuffer(),
168         keyValue.getValueOffset(), keyValue.getValueLength())) {
169       return this.latestVersionOnly? ReturnCode.NEXT_ROW: ReturnCode.INCLUDE;
170     }
171     this.matchedColumn = true;
172     return ReturnCode.INCLUDE;
173   }
174 
175   private boolean filterColumnValue(final byte [] data, final int offset,
176       final int length) {
177     int compareResult = this.comparator.compareTo(data, offset, length);
178     switch (this.compareOp) {
179     case LESS:
180       return compareResult <= 0;
181     case LESS_OR_EQUAL:
182       return compareResult < 0;
183     case EQUAL:
184       return compareResult != 0;
185     case NOT_EQUAL:
186       return compareResult == 0;
187     case GREATER_OR_EQUAL:
188       return compareResult > 0;
189     case GREATER:
190       return compareResult >= 0;
191     default:
192       throw new RuntimeException("Unknown Compare op " + compareOp.name());
193     }
194   }
195 
196   public boolean filterRow() {
197     // If column was found, return false if it was matched, true if it was not
198     // If column not found, return true if we filter if missing, false if not
199     return this.foundColumn? !this.matchedColumn: this.filterIfMissing;
200   }
201 
202   public void reset() {
203     foundColumn = false;
204     matchedColumn = false;
205   }
206 
207   /**
208    * Get whether entire row should be filtered if column is not found.
209    * @return true if row should be skipped if column not found, false if row
210    * should be let through anyways
211    */
212   public boolean getFilterIfMissing() {
213     return filterIfMissing;
214   }
215 
216   /**
217    * Set whether entire row should be filtered if column is not found.
218    * <p>
219    * If true, the entire row will be skipped if the column is not found.
220    * <p>
221    * If false, the row will pass if the column is not found.  This is default.
222    * @param filterIfMissing flag
223    */
224   public void setFilterIfMissing(boolean filterIfMissing) {
225     this.filterIfMissing = filterIfMissing;
226   }
227 
228   /**
229    * Get whether only the latest version of the column value should be compared.
230    * If true, the row will be returned if only the latest version of the column
231    * value matches. If false, the row will be returned if any version of the
232    * column value matches. The default is true.
233    * @return return value
234    */
235   public boolean getLatestVersionOnly() {
236     return latestVersionOnly;
237   }
238 
239   /**
240    * Set whether only the latest version of the column value should be compared.
241    * If true, the row will be returned if only the latest version of the column
242    * value matches. If false, the row will be returned if any version of the
243    * column value matches. The default is true.
244    * @param latestVersionOnly flag
245    */
246   public void setLatestVersionOnly(boolean latestVersionOnly) {
247     this.latestVersionOnly = latestVersionOnly;
248   }
249 
250   public static Filter createFilterFromArguments(ArrayList<byte []> filterArguments) {
251     Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6,
252                                 "Expected 4 or 6 but got: %s", filterArguments.size());
253     byte [] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0));
254     byte [] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1));
255     CompareOp compareOp = ParseFilter.createCompareOp(filterArguments.get(2));
256     WritableByteArrayComparable comparator = ParseFilter.createComparator(
257       ParseFilter.removeQuotesFromByteArray(filterArguments.get(3)));
258 
259     if (comparator instanceof RegexStringComparator ||
260         comparator instanceof SubstringComparator) {
261       if (compareOp != CompareOp.EQUAL &&
262           compareOp != CompareOp.NOT_EQUAL) {
263         throw new IllegalArgumentException ("A regexstring comparator and substring comparator " +
264                                             "can only be used with EQUAL and NOT_EQUAL");
265       }
266     }
267 
268     SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier,
269                                                                  compareOp, comparator);
270 
271     if (filterArguments.size() == 6) {
272       boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4));
273       boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5));
274       filter.setFilterIfMissing(filterIfMissing);
275       filter.setLatestVersionOnly(latestVersionOnly);
276     }
277     return filter;
278   }
279 
280   public void readFields(final DataInput in) throws IOException {
281     this.columnFamily = Bytes.readByteArray(in);
282     if(this.columnFamily.length == 0) {
283       this.columnFamily = null;
284     }
285     this.columnQualifier = Bytes.readByteArray(in);
286     if(this.columnQualifier.length == 0) {
287       this.columnQualifier = null;
288     }
289     this.compareOp = CompareOp.valueOf(in.readUTF());
290     this.comparator =
291       (WritableByteArrayComparable)HbaseObjectWritable.readObject(in, null);
292     this.foundColumn = in.readBoolean();
293     this.matchedColumn = in.readBoolean();
294     this.filterIfMissing = in.readBoolean();
295     this.latestVersionOnly = in.readBoolean();
296   }
297 
298   public void write(final DataOutput out) throws IOException {
299     Bytes.writeByteArray(out, this.columnFamily);
300     Bytes.writeByteArray(out, this.columnQualifier);
301     out.writeUTF(compareOp.name());
302     HbaseObjectWritable.writeObject(out, comparator,
303         WritableByteArrayComparable.class, null);
304     out.writeBoolean(foundColumn);
305     out.writeBoolean(matchedColumn);
306     out.writeBoolean(filterIfMissing);
307     out.writeBoolean(latestVersionOnly);
308   }
309 
310   /**
311    * The only CF this filter needs is given column family. So, it's the only essential
312    * column in whole scan. If filterIfMissing == false, all families are essential,
313    * because of possibility of skipping the rows without any data in filtered CF.
314    */
315   public boolean isFamilyEssential(byte[] name) {
316     return !this.filterIfMissing || Bytes.equals(name, this.columnFamily);
317   }
318 
319   @Override
320   public String toString() {
321     return String.format("%s (%s, %s, %s, %s)",
322         this.getClass().getSimpleName(), Bytes.toStringBinary(this.columnFamily),
323         Bytes.toStringBinary(this.columnQualifier), this.compareOp.name(),
324         Bytes.toStringBinary(this.comparator.getValue()));
325   }
326 }