001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Objects;
023import org.apache.hadoop.hbase.Cell;
024import org.apache.hadoop.hbase.CellUtil;
025import org.apache.hadoop.hbase.CompareOperator;
026import org.apache.hadoop.hbase.PrivateCellUtil;
027import org.apache.hadoop.hbase.exceptions.DeserializationException;
028import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
029import org.apache.hadoop.hbase.util.Bytes;
030import org.apache.yetus.audience.InterfaceAudience;
031
032import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
033import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
034import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
035
036import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
037import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
038import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
039import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.CompareType;
040
041/**
042 * This filter is used to filter cells based on value. It takes a {@link CompareFilter.CompareOp}
043 * operator (equal, greater, not equal, etc), and either a byte [] value or a ByteArrayComparable.
044 * <p>
045 * If we have a byte [] value then we just do a lexicographic compare. For example, if passed value
046 * is 'b' and cell has 'a' and the compare operator is LESS, then we will filter out this cell
047 * (return true). If this is not sufficient (eg you want to deserialize a long and then compare it
048 * to a fixed long value), then you can pass in your own comparator instead.
049 * <p>
050 * You must also specify a family and qualifier. Only the value of this column will be tested. When
051 * using this filter on a {@link org.apache.hadoop.hbase.CellScanner} with specified inputs, the
052 * column to be tested should also be added as input (otherwise the filter will regard the column as
053 * missing).
054 * <p>
055 * To prevent the entire row from being emitted if the column is not found on a row, use
056 * {@link #setFilterIfMissing}. Otherwise, if the column is found, the entire row will be emitted
057 * only if the value passes. If the value fails, the row will be filtered out.
058 * <p>
059 * In order to test values of previous versions (timestamps), set {@link #setLatestVersionOnly} to
060 * false. The default is true, meaning that only the latest version's value is tested and all
061 * previous versions are ignored.
062 * <p>
063 * To filter based on the value of all scanned columns, use {@link ValueFilter}.
064 */
065@InterfaceAudience.Public
066public class SingleColumnValueFilter extends FilterBase {
067
068  protected byte[] columnFamily;
069  protected byte[] columnQualifier;
070  protected CompareOperator op;
071  protected org.apache.hadoop.hbase.filter.ByteArrayComparable comparator;
072  protected boolean foundColumn = false;
073  protected boolean matchedColumn = false;
074  protected boolean filterIfMissing = false;
075  protected boolean latestVersionOnly = true;
076
077  /**
078   * Constructor for binary compare of the value of a single column. If the column is found and the
079   * condition passes, all columns of the row will be emitted. If the condition fails, the row will
080   * not be emitted.
081   * <p>
082   * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be
083   * emitted if the specified column to check is not found in the row.
084   * @param family    name of column family
085   * @param qualifier name of column qualifier
086   * @param compareOp operator
087   * @param value     value to compare column values against
088   * @deprecated Since 2.0.0. Will be removed in 3.0.0. Use
089   *             {@link #SingleColumnValueFilter(byte[], byte[], CompareOperator, byte[])} instead.
090   */
091  @Deprecated
092  public SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
093    final CompareOp compareOp, final byte[] value) {
094    this(family, qualifier, CompareOperator.valueOf(compareOp.name()),
095      new org.apache.hadoop.hbase.filter.BinaryComparator(value));
096  }
097
098  /**
099   * Constructor for binary compare of the value of a single column. If the column is found and the
100   * condition passes, all columns of the row will be emitted. If the condition fails, the row will
101   * not be emitted.
102   * <p>
103   * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be
104   * emitted if the specified column to check is not found in the row.
105   * @param family    name of column family
106   * @param qualifier name of column qualifier
107   * @param op        operator
108   * @param value     value to compare column values against
109   */
110  public SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
111    final CompareOperator op, final byte[] value) {
112    this(family, qualifier, op, new org.apache.hadoop.hbase.filter.BinaryComparator(value));
113  }
114
115  /**
116   * Constructor for binary compare of the value of a single column. If the column is found and the
117   * condition passes, all columns of the row will be emitted. If the condition fails, the row will
118   * not be emitted.
119   * <p>
120   * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be
121   * emitted if the specified column to check is not found in the row.
122   * @param family     name of column family
123   * @param qualifier  name of column qualifier
124   * @param compareOp  operator
125   * @param comparator Comparator to use.
126   * @deprecated Since 2.0.0. Will be removed in 3.0.0. Use
127   *             {@link #SingleColumnValueFilter(byte[], byte[], CompareOperator, ByteArrayComparable)}
128   *             instead.
129   */
130  @Deprecated
131  public SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
132    final CompareOp compareOp,
133    final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator) {
134    this(family, qualifier, CompareOperator.valueOf(compareOp.name()), comparator);
135  }
136
137  /**
138   * Constructor for binary compare of the value of a single column. If the column is found and the
139   * condition passes, all columns of the row will be emitted. If the condition fails, the row will
140   * not be emitted.
141   * <p>
142   * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be
143   * emitted if the specified column to check is not found in the row.
144   * @param family     name of column family
145   * @param qualifier  name of column qualifier
146   * @param op         operator
147   * @param comparator Comparator to use.
148   */
149  public SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
150    final CompareOperator op, final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator) {
151    this.columnFamily = family;
152    this.columnQualifier = qualifier;
153    this.op = op;
154    this.comparator = comparator;
155  }
156
157  /**
158   * Constructor for protobuf deserialization only. nnnnnn * @deprecated Since 2.0.0. Will be
159   * removed in 3.0.0. Use
160   * {@link #SingleColumnValueFilter(byte[], byte[], CompareOperator, ByteArrayComparable, boolean, boolean)}
161   * instead.
162   */
163  @Deprecated
164  protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
165    final CompareOp compareOp, org.apache.hadoop.hbase.filter.ByteArrayComparable comparator,
166    final boolean filterIfMissing, final boolean latestVersionOnly) {
167    this(family, qualifier, CompareOperator.valueOf(compareOp.name()), comparator, filterIfMissing,
168      latestVersionOnly);
169  }
170
171  /**
172   * Constructor for protobuf deserialization only. nnnnnn
173   */
174  protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
175    final CompareOperator op, org.apache.hadoop.hbase.filter.ByteArrayComparable comparator,
176    final boolean filterIfMissing, final boolean latestVersionOnly) {
177    this(family, qualifier, op, comparator);
178    this.filterIfMissing = filterIfMissing;
179    this.latestVersionOnly = latestVersionOnly;
180  }
181
182  /**
183   * n * @deprecated since 2.0.0. Will be removed in 3.0.0. Use {@link #getCompareOperator()}
184   * instead.
185   */
186  @Deprecated
187  public CompareOp getOperator() {
188    return CompareOp.valueOf(op.name());
189  }
190
191  public CompareOperator getCompareOperator() {
192    return op;
193  }
194
195  /** Returns the comparator */
196  public org.apache.hadoop.hbase.filter.ByteArrayComparable getComparator() {
197    return comparator;
198  }
199
200  /** Returns the family */
201  public byte[] getFamily() {
202    return columnFamily;
203  }
204
205  /** Returns the qualifier */
206  public byte[] getQualifier() {
207    return columnQualifier;
208  }
209
210  @Override
211  public boolean filterRowKey(Cell cell) throws IOException {
212    // Impl in FilterBase might do unnecessary copy for Off heap backed Cells.
213    return false;
214  }
215
216  @Deprecated
217  @Override
218  public ReturnCode filterKeyValue(final Cell c) {
219    return filterCell(c);
220  }
221
222  @Override
223  public ReturnCode filterCell(final Cell c) {
224    // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" +
225    // Bytes.toString(keyValue.getValue()));
226    if (this.matchedColumn) {
227      // We already found and matched the single column, all keys now pass
228      return ReturnCode.INCLUDE;
229    } else if (this.latestVersionOnly && this.foundColumn) {
230      // We found but did not match the single column, skip to next row
231      return ReturnCode.NEXT_ROW;
232    }
233    if (!CellUtil.matchingColumn(c, this.columnFamily, this.columnQualifier)) {
234      return ReturnCode.INCLUDE;
235    }
236    foundColumn = true;
237    if (filterColumnValue(c)) {
238      return this.latestVersionOnly ? ReturnCode.NEXT_ROW : ReturnCode.INCLUDE;
239    }
240    this.matchedColumn = true;
241    return ReturnCode.INCLUDE;
242  }
243
244  private boolean filterColumnValue(final Cell cell) {
245    int compareResult = PrivateCellUtil.compareValue(cell, this.comparator);
246    return CompareFilter.compare(this.op, compareResult);
247  }
248
249  @Override
250  public boolean filterRow() {
251    // If column was found, return false if it was matched, true if it was not
252    // If column not found, return true if we filter if missing, false if not
253    return this.foundColumn ? !this.matchedColumn : this.filterIfMissing;
254  }
255
256  @Override
257  public boolean hasFilterRow() {
258    return true;
259  }
260
261  @Override
262  public void reset() {
263    foundColumn = false;
264    matchedColumn = false;
265  }
266
267  /**
268   * Get whether entire row should be filtered if column is not found.
269   * @return true if row should be skipped if column not found, false if row should be let through
270   *         anyways
271   */
272  public boolean getFilterIfMissing() {
273    return filterIfMissing;
274  }
275
276  /**
277   * Set whether entire row should be filtered if column is not found.
278   * <p>
279   * If true, the entire row will be skipped if the column is not found.
280   * <p>
281   * If false, the row will pass if the column is not found. This is default.
282   * @param filterIfMissing flag
283   */
284  public void setFilterIfMissing(boolean filterIfMissing) {
285    this.filterIfMissing = filterIfMissing;
286  }
287
288  /**
289   * Get whether only the latest version of the column value should be compared. If true, the row
290   * will be returned if only the latest version of the column value matches. If false, the row will
291   * be returned if any version of the column value matches. The default is true.
292   * @return return value
293   */
294  public boolean getLatestVersionOnly() {
295    return latestVersionOnly;
296  }
297
298  /**
299   * Set whether only the latest version of the column value should be compared. If true, the row
300   * will be returned if only the latest version of the column value matches. If false, the row will
301   * be returned if any version of the column value matches. The default is true.
302   * @param latestVersionOnly flag
303   */
304  public void setLatestVersionOnly(boolean latestVersionOnly) {
305    this.latestVersionOnly = latestVersionOnly;
306  }
307
308  public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) {
309    Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6,
310      "Expected 4 or 6 but got: %s", filterArguments.size());
311    byte[] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0));
312    byte[] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1));
313    CompareOperator op = ParseFilter.createCompareOperator(filterArguments.get(2));
314    org.apache.hadoop.hbase.filter.ByteArrayComparable comparator =
315      ParseFilter.createComparator(ParseFilter.removeQuotesFromByteArray(filterArguments.get(3)));
316
317    if (comparator instanceof RegexStringComparator || comparator instanceof SubstringComparator) {
318      if (op != CompareOperator.EQUAL && op != CompareOperator.NOT_EQUAL) {
319        throw new IllegalArgumentException("A regexstring comparator and substring comparator "
320          + "can only be used with EQUAL and NOT_EQUAL");
321      }
322    }
323
324    SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier, op, comparator);
325
326    if (filterArguments.size() == 6) {
327      boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4));
328      boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5));
329      filter.setFilterIfMissing(filterIfMissing);
330      filter.setLatestVersionOnly(latestVersionOnly);
331    }
332    return filter;
333  }
334
335  FilterProtos.SingleColumnValueFilter convert() {
336    FilterProtos.SingleColumnValueFilter.Builder builder =
337      FilterProtos.SingleColumnValueFilter.newBuilder();
338    if (this.columnFamily != null) {
339      builder.setColumnFamily(UnsafeByteOperations.unsafeWrap(this.columnFamily));
340    }
341    if (this.columnQualifier != null) {
342      builder.setColumnQualifier(UnsafeByteOperations.unsafeWrap(this.columnQualifier));
343    }
344    HBaseProtos.CompareType compareOp = CompareType.valueOf(this.op.name());
345    builder.setCompareOp(compareOp);
346    builder.setComparator(ProtobufUtil.toComparator(this.comparator));
347    builder.setFilterIfMissing(this.filterIfMissing);
348    builder.setLatestVersionOnly(this.latestVersionOnly);
349
350    return builder.build();
351  }
352
353  /** Returns The filter serialized using pb */
354  @Override
355  public byte[] toByteArray() {
356    return convert().toByteArray();
357  }
358
359  /**
360   * Parse a serialized representation of {@link SingleColumnValueFilter}
361   * @param pbBytes A pb serialized {@link SingleColumnValueFilter} instance
362   * @return An instance of {@link SingleColumnValueFilter} made from <code>bytes</code>
363   * @throws DeserializationException if an error occurred
364   * @see #toByteArray
365   */
366  public static SingleColumnValueFilter parseFrom(final byte[] pbBytes)
367    throws DeserializationException {
368    FilterProtos.SingleColumnValueFilter proto;
369    try {
370      proto = FilterProtos.SingleColumnValueFilter.parseFrom(pbBytes);
371    } catch (InvalidProtocolBufferException e) {
372      throw new DeserializationException(e);
373    }
374
375    final CompareOperator compareOp = CompareOperator.valueOf(proto.getCompareOp().name());
376    final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator;
377    try {
378      comparator = ProtobufUtil.toComparator(proto.getComparator());
379    } catch (IOException ioe) {
380      throw new DeserializationException(ioe);
381    }
382
383    return new SingleColumnValueFilter(
384      proto.hasColumnFamily() ? proto.getColumnFamily().toByteArray() : null,
385      proto.hasColumnQualifier() ? proto.getColumnQualifier().toByteArray() : null, compareOp,
386      comparator, proto.getFilterIfMissing(), proto.getLatestVersionOnly());
387  }
388
389  /**
390   * Returns true if and only if the fields of the filter that are serialized are equal to the
391   * corresponding fields in other. Used for testing.
392   */
393  @Override
394  boolean areSerializedFieldsEqual(Filter o) {
395    if (o == this) return true;
396    if (!(o instanceof SingleColumnValueFilter)) return false;
397
398    SingleColumnValueFilter other = (SingleColumnValueFilter) o;
399    return Bytes.equals(this.getFamily(), other.getFamily())
400      && Bytes.equals(this.getQualifier(), other.getQualifier()) && this.op.equals(other.op)
401      && this.getComparator().areSerializedFieldsEqual(other.getComparator())
402      && this.getFilterIfMissing() == other.getFilterIfMissing()
403      && this.getLatestVersionOnly() == other.getLatestVersionOnly();
404  }
405
406  /**
407   * The only CF this filter needs is given column family. So, it's the only essential column in
408   * whole scan. If filterIfMissing == false, all families are essential, because of possibility of
409   * skipping the rows without any data in filtered CF.
410   */
411  @Override
412  public boolean isFamilyEssential(byte[] name) {
413    return !this.filterIfMissing || Bytes.equals(name, this.columnFamily);
414  }
415
416  @Override
417  public String toString() {
418    return String.format("%s (%s, %s, %s, %s)", this.getClass().getSimpleName(),
419      Bytes.toStringBinary(this.columnFamily), Bytes.toStringBinary(this.columnQualifier),
420      this.op.name(), Bytes.toStringBinary(this.comparator.getValue()));
421  }
422
423  @Override
424  public boolean equals(Object obj) {
425    return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj);
426  }
427
428  @Override
429  public int hashCode() {
430    return Objects.hash(Bytes.hashCode(getFamily()), Bytes.hashCode(getQualifier()), this.op,
431      getComparator(), getFilterIfMissing(), getLatestVersionOnly());
432  }
433}