001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Objects;
023import org.apache.hadoop.hbase.Cell;
024import org.apache.hadoop.hbase.CellUtil;
025import org.apache.hadoop.hbase.CompareOperator;
026import org.apache.hadoop.hbase.PrivateCellUtil;
027import org.apache.hadoop.hbase.exceptions.DeserializationException;
028import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
029import org.apache.hadoop.hbase.util.Bytes;
030import org.apache.yetus.audience.InterfaceAudience;
031
032import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
033import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
034import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
035
036import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
037import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
038import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
039import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.CompareType;
040
041/**
042 * This filter is used to filter cells based on value. It takes a {@link CompareFilter.CompareOp}
043 * operator (equal, greater, not equal, etc), and either a byte [] value or a ByteArrayComparable.
044 * <p>
045 * If we have a byte [] value then we just do a lexicographic compare. For example, if passed value
046 * is 'b' and cell has 'a' and the compare operator is LESS, then we will filter out this cell
047 * (return true). If this is not sufficient (eg you want to deserialize a long and then compare it
048 * to a fixed long value), then you can pass in your own comparator instead.
049 * <p>
050 * You must also specify a family and qualifier. Only the value of this column will be tested. When
051 * using this filter on a {@link org.apache.hadoop.hbase.CellScanner} with specified inputs, the
052 * column to be tested should also be added as input (otherwise the filter will regard the column as
053 * missing).
054 * <p>
055 * To prevent the entire row from being emitted if the column is not found on a row, use
056 * {@link #setFilterIfMissing}. Otherwise, if the column is found, the entire row will be emitted
057 * only if the value passes. If the value fails, the row will be filtered out.
058 * <p>
059 * In order to test values of previous versions (timestamps), set {@link #setLatestVersionOnly} to
060 * false. The default is true, meaning that only the latest version's value is tested and all
061 * previous versions are ignored.
062 * <p>
063 * To filter based on the value of all scanned columns, use {@link ValueFilter}.
064 */
065@InterfaceAudience.Public
066public class SingleColumnValueFilter extends FilterBase {
067
068  protected byte[] columnFamily;
069  protected byte[] columnQualifier;
070  protected CompareOperator op;
071  protected org.apache.hadoop.hbase.filter.ByteArrayComparable comparator;
072  protected boolean foundColumn = false;
073  protected boolean matchedColumn = false;
074  protected boolean filterIfMissing = false;
075  protected boolean latestVersionOnly = true;
076
077  /**
078   * Constructor for binary compare of the value of a single column. If the column is found and the
079   * condition passes, all columns of the row will be emitted. If the condition fails, the row will
080   * not be emitted.
081   * <p>
082   * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be
083   * emitted if the specified column to check is not found in the row.
084   * @param family    name of column family
085   * @param qualifier name of column qualifier
086   * @param compareOp operator
087   * @param value     value to compare column values against
088   * @deprecated Since 2.0.0. Will be removed in 3.0.0. Use
089   *             {@link #SingleColumnValueFilter(byte[], byte[], CompareOperator, byte[])} instead.
090   */
091  @Deprecated
092  public SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
093    final CompareOp compareOp, final byte[] value) {
094    this(family, qualifier, CompareOperator.valueOf(compareOp.name()),
095      new org.apache.hadoop.hbase.filter.BinaryComparator(value));
096  }
097
098  /**
099   * Constructor for binary compare of the value of a single column. If the column is found and the
100   * condition passes, all columns of the row will be emitted. If the condition fails, the row will
101   * not be emitted.
102   * <p>
103   * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be
104   * emitted if the specified column to check is not found in the row.
105   * @param family    name of column family
106   * @param qualifier name of column qualifier
107   * @param op        operator
108   * @param value     value to compare column values against
109   */
110  public SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
111    final CompareOperator op, final byte[] value) {
112    this(family, qualifier, op, new org.apache.hadoop.hbase.filter.BinaryComparator(value));
113  }
114
115  /**
116   * Constructor for binary compare of the value of a single column. If the column is found and the
117   * condition passes, all columns of the row will be emitted. If the condition fails, the row will
118   * not be emitted.
119   * <p>
120   * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be
121   * emitted if the specified column to check is not found in the row.
122   * @param family     name of column family
123   * @param qualifier  name of column qualifier
124   * @param compareOp  operator
125   * @param comparator Comparator to use.
126   * @deprecated Since 2.0.0. Will be removed in 3.0.0. Use
127   *             {@link #SingleColumnValueFilter(byte[], byte[], CompareOperator, ByteArrayComparable)}
128   *             instead.
129   */
130  @Deprecated
131  public SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
132    final CompareOp compareOp,
133    final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator) {
134    this(family, qualifier, CompareOperator.valueOf(compareOp.name()), comparator);
135  }
136
137  /**
138   * Constructor for binary compare of the value of a single column. If the column is found and the
139   * condition passes, all columns of the row will be emitted. If the condition fails, the row will
140   * not be emitted.
141   * <p>
142   * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be
143   * emitted if the specified column to check is not found in the row.
144   * @param family     name of column family
145   * @param qualifier  name of column qualifier
146   * @param op         operator
147   * @param comparator Comparator to use.
148   */
149  public SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
150    final CompareOperator op, final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator) {
151    this.columnFamily = family;
152    this.columnQualifier = qualifier;
153    this.op = op;
154    this.comparator = comparator;
155  }
156
157  /**
158   * Constructor for protobuf deserialization only.
159   * @deprecated Since 2.0.0. Will be removed in 3.0.0. Use
160   *             {@link #SingleColumnValueFilter(byte[], byte[], CompareOperator, ByteArrayComparable, boolean, boolean)}
161   *             instead.
162   */
163  @Deprecated
164  protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
165    final CompareOp compareOp, org.apache.hadoop.hbase.filter.ByteArrayComparable comparator,
166    final boolean filterIfMissing, final boolean latestVersionOnly) {
167    this(family, qualifier, CompareOperator.valueOf(compareOp.name()), comparator, filterIfMissing,
168      latestVersionOnly);
169  }
170
171  /**
172   * Constructor for protobuf deserialization only.
173   */
174  protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
175    final CompareOperator op, org.apache.hadoop.hbase.filter.ByteArrayComparable comparator,
176    final boolean filterIfMissing, final boolean latestVersionOnly) {
177    this(family, qualifier, op, comparator);
178    this.filterIfMissing = filterIfMissing;
179    this.latestVersionOnly = latestVersionOnly;
180  }
181
182  /**
183   * @deprecated since 2.0.0. Will be removed in 3.0.0. Use {@link #getCompareOperator()} instead.
184   */
185  @Deprecated
186  public CompareOp getOperator() {
187    return CompareOp.valueOf(op.name());
188  }
189
190  public CompareOperator getCompareOperator() {
191    return op;
192  }
193
194  /** Returns the comparator */
195  public org.apache.hadoop.hbase.filter.ByteArrayComparable getComparator() {
196    return comparator;
197  }
198
199  /** Returns the family */
200  public byte[] getFamily() {
201    return columnFamily;
202  }
203
204  /** Returns the qualifier */
205  public byte[] getQualifier() {
206    return columnQualifier;
207  }
208
209  @Override
210  public boolean filterRowKey(Cell cell) throws IOException {
211    // Impl in FilterBase might do unnecessary copy for Off heap backed Cells.
212    return false;
213  }
214
215  @Deprecated
216  @Override
217  public ReturnCode filterKeyValue(final Cell c) {
218    return filterCell(c);
219  }
220
221  @Override
222  public ReturnCode filterCell(final Cell c) {
223    // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" +
224    // Bytes.toString(keyValue.getValue()));
225    if (this.matchedColumn) {
226      // We already found and matched the single column, all keys now pass
227      return ReturnCode.INCLUDE;
228    } else if (this.latestVersionOnly && this.foundColumn) {
229      // We found but did not match the single column, skip to next row
230      return ReturnCode.NEXT_ROW;
231    }
232    if (!CellUtil.matchingColumn(c, this.columnFamily, this.columnQualifier)) {
233      return ReturnCode.INCLUDE;
234    }
235    foundColumn = true;
236    if (filterColumnValue(c)) {
237      return this.latestVersionOnly ? ReturnCode.NEXT_ROW : ReturnCode.INCLUDE;
238    }
239    this.matchedColumn = true;
240    return ReturnCode.INCLUDE;
241  }
242
243  private boolean filterColumnValue(final Cell cell) {
244    int compareResult = PrivateCellUtil.compareValue(cell, this.comparator);
245    return CompareFilter.compare(this.op, compareResult);
246  }
247
248  @Override
249  public boolean filterRow() {
250    // If column was found, return false if it was matched, true if it was not
251    // If column not found, return true if we filter if missing, false if not
252    return this.foundColumn ? !this.matchedColumn : this.filterIfMissing;
253  }
254
255  @Override
256  public boolean hasFilterRow() {
257    return true;
258  }
259
260  @Override
261  public void reset() {
262    foundColumn = false;
263    matchedColumn = false;
264  }
265
266  /**
267   * Get whether entire row should be filtered if column is not found.
268   * @return true if row should be skipped if column not found, false if row should be let through
269   *         anyways
270   */
271  public boolean getFilterIfMissing() {
272    return filterIfMissing;
273  }
274
275  /**
276   * Set whether entire row should be filtered if column is not found.
277   * <p>
278   * If true, the entire row will be skipped if the column is not found.
279   * <p>
280   * If false, the row will pass if the column is not found. This is default.
281   * @param filterIfMissing flag
282   */
283  public void setFilterIfMissing(boolean filterIfMissing) {
284    this.filterIfMissing = filterIfMissing;
285  }
286
287  /**
288   * Get whether only the latest version of the column value should be compared. If true, the row
289   * will be returned if only the latest version of the column value matches. If false, the row will
290   * be returned if any version of the column value matches. The default is true.
291   * @return return value
292   */
293  public boolean getLatestVersionOnly() {
294    return latestVersionOnly;
295  }
296
297  /**
298   * Set whether only the latest version of the column value should be compared. If true, the row
299   * will be returned if only the latest version of the column value matches. If false, the row will
300   * be returned if any version of the column value matches. The default is true.
301   * @param latestVersionOnly flag
302   */
303  public void setLatestVersionOnly(boolean latestVersionOnly) {
304    this.latestVersionOnly = latestVersionOnly;
305  }
306
307  public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) {
308    Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6,
309      "Expected 4 or 6 but got: %s", filterArguments.size());
310    byte[] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0));
311    byte[] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1));
312    CompareOperator op = ParseFilter.createCompareOperator(filterArguments.get(2));
313    org.apache.hadoop.hbase.filter.ByteArrayComparable comparator =
314      ParseFilter.createComparator(ParseFilter.removeQuotesFromByteArray(filterArguments.get(3)));
315
316    if (comparator instanceof RegexStringComparator || comparator instanceof SubstringComparator) {
317      if (op != CompareOperator.EQUAL && op != CompareOperator.NOT_EQUAL) {
318        throw new IllegalArgumentException("A regexstring comparator and substring comparator "
319          + "can only be used with EQUAL and NOT_EQUAL");
320      }
321    }
322
323    SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier, op, comparator);
324
325    if (filterArguments.size() == 6) {
326      boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4));
327      boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5));
328      filter.setFilterIfMissing(filterIfMissing);
329      filter.setLatestVersionOnly(latestVersionOnly);
330    }
331    return filter;
332  }
333
334  FilterProtos.SingleColumnValueFilter convert() {
335    FilterProtos.SingleColumnValueFilter.Builder builder =
336      FilterProtos.SingleColumnValueFilter.newBuilder();
337    if (this.columnFamily != null) {
338      builder.setColumnFamily(UnsafeByteOperations.unsafeWrap(this.columnFamily));
339    }
340    if (this.columnQualifier != null) {
341      builder.setColumnQualifier(UnsafeByteOperations.unsafeWrap(this.columnQualifier));
342    }
343    HBaseProtos.CompareType compareOp = CompareType.valueOf(this.op.name());
344    builder.setCompareOp(compareOp);
345    builder.setComparator(ProtobufUtil.toComparator(this.comparator));
346    builder.setFilterIfMissing(this.filterIfMissing);
347    builder.setLatestVersionOnly(this.latestVersionOnly);
348
349    return builder.build();
350  }
351
352  /** Returns The filter serialized using pb */
353  @Override
354  public byte[] toByteArray() {
355    return convert().toByteArray();
356  }
357
358  /**
359   * Parse a serialized representation of {@link SingleColumnValueFilter}
360   * @param pbBytes A pb serialized {@link SingleColumnValueFilter} instance
361   * @return An instance of {@link SingleColumnValueFilter} made from <code>bytes</code>
362   * @throws DeserializationException if an error occurred
363   * @see #toByteArray
364   */
365  public static SingleColumnValueFilter parseFrom(final byte[] pbBytes)
366    throws DeserializationException {
367    FilterProtos.SingleColumnValueFilter proto;
368    try {
369      proto = FilterProtos.SingleColumnValueFilter.parseFrom(pbBytes);
370    } catch (InvalidProtocolBufferException e) {
371      throw new DeserializationException(e);
372    }
373
374    final CompareOperator compareOp = CompareOperator.valueOf(proto.getCompareOp().name());
375    final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator;
376    try {
377      comparator = ProtobufUtil.toComparator(proto.getComparator());
378    } catch (IOException ioe) {
379      throw new DeserializationException(ioe);
380    }
381
382    return new SingleColumnValueFilter(
383      proto.hasColumnFamily() ? proto.getColumnFamily().toByteArray() : null,
384      proto.hasColumnQualifier() ? proto.getColumnQualifier().toByteArray() : null, compareOp,
385      comparator, proto.getFilterIfMissing(), proto.getLatestVersionOnly());
386  }
387
388  /**
389   * Returns true if and only if the fields of the filter that are serialized are equal to the
390   * corresponding fields in other. Used for testing.
391   */
392  @Override
393  boolean areSerializedFieldsEqual(Filter o) {
394    if (o == this) return true;
395    if (!(o instanceof SingleColumnValueFilter)) return false;
396
397    SingleColumnValueFilter other = (SingleColumnValueFilter) o;
398    return Bytes.equals(this.getFamily(), other.getFamily())
399      && Bytes.equals(this.getQualifier(), other.getQualifier()) && this.op.equals(other.op)
400      && this.getComparator().areSerializedFieldsEqual(other.getComparator())
401      && this.getFilterIfMissing() == other.getFilterIfMissing()
402      && this.getLatestVersionOnly() == other.getLatestVersionOnly();
403  }
404
405  /**
406   * The only CF this filter needs is given column family. So, it's the only essential column in
407   * whole scan. If filterIfMissing == false, all families are essential, because of possibility of
408   * skipping the rows without any data in filtered CF.
409   */
410  @Override
411  public boolean isFamilyEssential(byte[] name) {
412    return !this.filterIfMissing || Bytes.equals(name, this.columnFamily);
413  }
414
415  @Override
416  public String toString() {
417    return String.format("%s (%s, %s, %s, %s)", this.getClass().getSimpleName(),
418      Bytes.toStringBinary(this.columnFamily), Bytes.toStringBinary(this.columnQualifier),
419      this.op.name(), Bytes.toStringBinary(this.comparator.getValue()));
420  }
421
422  @Override
423  public boolean equals(Object obj) {
424    return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj);
425  }
426
427  @Override
428  public int hashCode() {
429    return Objects.hash(Bytes.hashCode(getFamily()), Bytes.hashCode(getQualifier()), this.op,
430      getComparator(), getFilterIfMissing(), getLatestVersionOnly());
431  }
432}