001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Objects;
023import org.apache.hadoop.hbase.Cell;
024import org.apache.hadoop.hbase.CellUtil;
025import org.apache.hadoop.hbase.CompareOperator;
026import org.apache.hadoop.hbase.PrivateCellUtil;
027import org.apache.hadoop.hbase.exceptions.DeserializationException;
028import org.apache.hadoop.hbase.util.Bytes;
029import org.apache.yetus.audience.InterfaceAudience;
030
031import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
032import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
033import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
034
035import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
036import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
037import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
038import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.CompareType;
039
040/**
041 * This filter is used to filter cells based on value. It takes a {@link CompareOperator} operator
042 * (equal, greater, not equal, etc), and either a byte [] value or a ByteArrayComparable.
043 * <p>
044 * If we have a byte [] value then we just do a lexicographic compare. For example, if passed value
045 * is 'b' and cell has 'a' and the compare operator is LESS, then we will filter out this cell
046 * (return true). If this is not sufficient (eg you want to deserialize a long and then compare it
047 * to a fixed long value), then you can pass in your own comparator instead.
048 * <p>
049 * You must also specify a family and qualifier. Only the value of this column will be tested. When
050 * using this filter on a {@link org.apache.hadoop.hbase.CellScanner} with specified inputs, the
051 * column to be tested should also be added as input (otherwise the filter will regard the column as
052 * missing).
053 * <p>
054 * To prevent the entire row from being emitted if the column is not found on a row, use
055 * {@link #setFilterIfMissing}. Otherwise, if the column is found, the entire row will be emitted
056 * only if the value passes. If the value fails, the row will be filtered out.
057 * <p>
058 * In order to test values of previous versions (timestamps), set {@link #setLatestVersionOnly} to
059 * false. The default is true, meaning that only the latest version's value is tested and all
060 * previous versions are ignored.
061 * <p>
062 * To filter based on the value of all scanned columns, use {@link ValueFilter}.
063 */
064@InterfaceAudience.Public
065public class SingleColumnValueFilter extends FilterBase {
066
067  protected byte[] columnFamily;
068  protected byte[] columnQualifier;
069  protected CompareOperator op;
070  protected org.apache.hadoop.hbase.filter.ByteArrayComparable comparator;
071  protected boolean foundColumn = false;
072  protected boolean matchedColumn = false;
073  protected boolean filterIfMissing = false;
074  protected boolean latestVersionOnly = true;
075
076  /**
077   * Constructor for binary compare of the value of a single column. If the column is found and the
078   * condition passes, all columns of the row will be emitted. If the condition fails, the row will
079   * not be emitted.
080   * <p>
081   * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be
082   * emitted if the specified column to check is not found in the row.
083   * @param family    name of column family
084   * @param qualifier name of column qualifier
085   * @param op        operator
086   * @param value     value to compare column values against
087   */
088  public SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
089    final CompareOperator op, final byte[] value) {
090    this(family, qualifier, op, new org.apache.hadoop.hbase.filter.BinaryComparator(value));
091  }
092
093  /**
094   * Constructor for binary compare of the value of a single column. If the column is found and the
095   * condition passes, all columns of the row will be emitted. If the condition fails, the row will
096   * not be emitted.
097   * <p>
098   * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be
099   * emitted if the specified column to check is not found in the row.
100   * @param family     name of column family
101   * @param qualifier  name of column qualifier
102   * @param op         operator
103   * @param comparator Comparator to use.
104   */
105  public SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
106    final CompareOperator op, final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator) {
107    this.columnFamily = family;
108    this.columnQualifier = qualifier;
109    this.op = op;
110    this.comparator = comparator;
111  }
112
113  /**
114   * Constructor for protobuf deserialization only.
115   */
116  protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
117    final CompareOperator op, org.apache.hadoop.hbase.filter.ByteArrayComparable comparator,
118    final boolean filterIfMissing, final boolean latestVersionOnly) {
119    this(family, qualifier, op, comparator);
120    this.filterIfMissing = filterIfMissing;
121    this.latestVersionOnly = latestVersionOnly;
122  }
123
124  public CompareOperator getCompareOperator() {
125    return op;
126  }
127
128  /** Returns the comparator */
129  public org.apache.hadoop.hbase.filter.ByteArrayComparable getComparator() {
130    return comparator;
131  }
132
133  /** Returns the family */
134  public byte[] getFamily() {
135    return columnFamily;
136  }
137
138  /** Returns the qualifier */
139  public byte[] getQualifier() {
140    return columnQualifier;
141  }
142
143  @Override
144  public boolean filterRowKey(Cell cell) throws IOException {
145    // Impl in FilterBase might do unnecessary copy for Off heap backed Cells.
146    return false;
147  }
148
149  @Override
150  public ReturnCode filterCell(final Cell c) {
151    // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" +
152    // Bytes.toString(keyValue.getValue()));
153    if (this.matchedColumn) {
154      // We already found and matched the single column, all keys now pass
155      return ReturnCode.INCLUDE;
156    } else if (this.latestVersionOnly && this.foundColumn) {
157      // We found but did not match the single column, skip to next row
158      return ReturnCode.NEXT_ROW;
159    }
160    if (!CellUtil.matchingColumn(c, this.columnFamily, this.columnQualifier)) {
161      return ReturnCode.INCLUDE;
162    }
163    foundColumn = true;
164    if (filterColumnValue(c)) {
165      return this.latestVersionOnly ? ReturnCode.NEXT_ROW : ReturnCode.INCLUDE;
166    }
167    this.matchedColumn = true;
168    return ReturnCode.INCLUDE;
169  }
170
171  private boolean filterColumnValue(final Cell cell) {
172    int compareResult = PrivateCellUtil.compareValue(cell, this.comparator);
173    return CompareFilter.compare(this.op, compareResult);
174  }
175
176  @Override
177  public boolean filterRow() {
178    // If column was found, return false if it was matched, true if it was not
179    // If column not found, return true if we filter if missing, false if not
180    return this.foundColumn ? !this.matchedColumn : this.filterIfMissing;
181  }
182
183  @Override
184  public boolean hasFilterRow() {
185    return true;
186  }
187
188  @Override
189  public void reset() {
190    foundColumn = false;
191    matchedColumn = false;
192  }
193
194  /**
195   * Get whether entire row should be filtered if column is not found.
196   * @return true if row should be skipped if column not found, false if row should be let through
197   *         anyways
198   */
199  public boolean getFilterIfMissing() {
200    return filterIfMissing;
201  }
202
203  /**
204   * Set whether entire row should be filtered if column is not found.
205   * <p>
206   * If true, the entire row will be skipped if the column is not found.
207   * <p>
208   * If false, the row will pass if the column is not found. This is default.
209   * @param filterIfMissing flag
210   */
211  public void setFilterIfMissing(boolean filterIfMissing) {
212    this.filterIfMissing = filterIfMissing;
213  }
214
215  /**
216   * Get whether only the latest version of the column value should be compared. If true, the row
217   * will be returned if only the latest version of the column value matches. If false, the row will
218   * be returned if any version of the column value matches. The default is true.
219   * @return return value
220   */
221  public boolean getLatestVersionOnly() {
222    return latestVersionOnly;
223  }
224
225  /**
226   * Set whether only the latest version of the column value should be compared. If true, the row
227   * will be returned if only the latest version of the column value matches. If false, the row will
228   * be returned if any version of the column value matches. The default is true.
229   * @param latestVersionOnly flag
230   */
231  public void setLatestVersionOnly(boolean latestVersionOnly) {
232    this.latestVersionOnly = latestVersionOnly;
233  }
234
235  public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) {
236    Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6,
237      "Expected 4 or 6 but got: %s", filterArguments.size());
238    byte[] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0));
239    byte[] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1));
240    CompareOperator op = ParseFilter.createCompareOperator(filterArguments.get(2));
241    org.apache.hadoop.hbase.filter.ByteArrayComparable comparator =
242      ParseFilter.createComparator(ParseFilter.removeQuotesFromByteArray(filterArguments.get(3)));
243
244    if (comparator instanceof RegexStringComparator || comparator instanceof SubstringComparator) {
245      if (op != CompareOperator.EQUAL && op != CompareOperator.NOT_EQUAL) {
246        throw new IllegalArgumentException("A regexstring comparator and substring comparator "
247          + "can only be used with EQUAL and NOT_EQUAL");
248      }
249    }
250
251    SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier, op, comparator);
252
253    if (filterArguments.size() == 6) {
254      boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4));
255      boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5));
256      filter.setFilterIfMissing(filterIfMissing);
257      filter.setLatestVersionOnly(latestVersionOnly);
258    }
259    return filter;
260  }
261
262  FilterProtos.SingleColumnValueFilter convert() {
263    FilterProtos.SingleColumnValueFilter.Builder builder =
264      FilterProtos.SingleColumnValueFilter.newBuilder();
265    if (this.columnFamily != null) {
266      builder.setColumnFamily(UnsafeByteOperations.unsafeWrap(this.columnFamily));
267    }
268    if (this.columnQualifier != null) {
269      builder.setColumnQualifier(UnsafeByteOperations.unsafeWrap(this.columnQualifier));
270    }
271    HBaseProtos.CompareType compareOp = CompareType.valueOf(this.op.name());
272    builder.setCompareOp(compareOp);
273    builder.setComparator(ProtobufUtil.toComparator(this.comparator));
274    builder.setFilterIfMissing(this.filterIfMissing);
275    builder.setLatestVersionOnly(this.latestVersionOnly);
276
277    return builder.build();
278  }
279
280  /** Returns The filter serialized using pb */
281  @Override
282  public byte[] toByteArray() {
283    return convert().toByteArray();
284  }
285
286  /**
287   * Parse a serialized representation of {@link SingleColumnValueFilter}
288   * @param pbBytes A pb serialized {@link SingleColumnValueFilter} instance
289   * @return An instance of {@link SingleColumnValueFilter} made from <code>bytes</code>
290   * @throws DeserializationException if an error occurred
291   * @see #toByteArray
292   */
293  public static SingleColumnValueFilter parseFrom(final byte[] pbBytes)
294    throws DeserializationException {
295    FilterProtos.SingleColumnValueFilter proto;
296    try {
297      proto = FilterProtos.SingleColumnValueFilter.parseFrom(pbBytes);
298    } catch (InvalidProtocolBufferException e) {
299      throw new DeserializationException(e);
300    }
301
302    final CompareOperator compareOp = CompareOperator.valueOf(proto.getCompareOp().name());
303    final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator;
304    try {
305      comparator = ProtobufUtil.toComparator(proto.getComparator());
306    } catch (IOException ioe) {
307      throw new DeserializationException(ioe);
308    }
309
310    return new SingleColumnValueFilter(
311      proto.hasColumnFamily() ? proto.getColumnFamily().toByteArray() : null,
312      proto.hasColumnQualifier() ? proto.getColumnQualifier().toByteArray() : null, compareOp,
313      comparator, proto.getFilterIfMissing(), proto.getLatestVersionOnly());
314  }
315
316  /**
317   * Returns true if and only if the fields of the filter that are serialized are equal to the
318   * corresponding fields in other. Used for testing.
319   */
320  @Override
321  boolean areSerializedFieldsEqual(Filter o) {
322    if (o == this) return true;
323    if (!(o instanceof SingleColumnValueFilter)) return false;
324
325    SingleColumnValueFilter other = (SingleColumnValueFilter) o;
326    return Bytes.equals(this.getFamily(), other.getFamily())
327      && Bytes.equals(this.getQualifier(), other.getQualifier()) && this.op.equals(other.op)
328      && this.getComparator().areSerializedFieldsEqual(other.getComparator())
329      && this.getFilterIfMissing() == other.getFilterIfMissing()
330      && this.getLatestVersionOnly() == other.getLatestVersionOnly();
331  }
332
333  /**
334   * The only CF this filter needs is given column family. So, it's the only essential column in
335   * whole scan. If filterIfMissing == false, all families are essential, because of possibility of
336   * skipping the rows without any data in filtered CF.
337   */
338  @Override
339  public boolean isFamilyEssential(byte[] name) {
340    return !this.filterIfMissing || Bytes.equals(name, this.columnFamily);
341  }
342
343  @Override
344  public String toString() {
345    return String.format("%s (%s, %s, %s, %s)", this.getClass().getSimpleName(),
346      Bytes.toStringBinary(this.columnFamily), Bytes.toStringBinary(this.columnQualifier),
347      this.op.name(), Bytes.toStringBinary(this.comparator.getValue()));
348  }
349
350  @Override
351  public boolean equals(Object obj) {
352    return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj);
353  }
354
355  @Override
356  public int hashCode() {
357    return Objects.hash(Bytes.hashCode(getFamily()), Bytes.hashCode(getQualifier()), this.op,
358      getComparator(), getFilterIfMissing(), getLatestVersionOnly());
359  }
360}