001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Objects;
023import org.apache.hadoop.hbase.Cell;
024import org.apache.hadoop.hbase.CellUtil;
025import org.apache.hadoop.hbase.CompareOperator;
026import org.apache.hadoop.hbase.PrivateCellUtil;
027import org.apache.hadoop.hbase.exceptions.DeserializationException;
028import org.apache.hadoop.hbase.util.Bytes;
029import org.apache.yetus.audience.InterfaceAudience;
030
031import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
032import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
033import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
034
035import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
036import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
037import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
038import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.CompareType;
039
040/**
041 * This filter is used to filter cells based on value. It takes a {@link CompareOperator} operator
042 * (equal, greater, not equal, etc), and either a byte [] value or a ByteArrayComparable.
043 * <p>
044 * If we have a byte [] value then we just do a lexicographic compare. For example, if passed value
045 * is 'b' and cell has 'a' and the compare operator is LESS, then we will filter out this cell
046 * (return true). If this is not sufficient (eg you want to deserialize a long and then compare it
047 * to a fixed long value), then you can pass in your own comparator instead.
048 * <p>
049 * You must also specify a family and qualifier. Only the value of this column will be tested. When
050 * using this filter on a {@link org.apache.hadoop.hbase.CellScanner} with specified inputs, the
051 * column to be tested should also be added as input (otherwise the filter will regard the column as
052 * missing).
053 * <p>
054 * To prevent the entire row from being emitted if the column is not found on a row, use
055 * {@link #setFilterIfMissing}. Otherwise, if the column is found, the entire row will be emitted
056 * only if the value passes. If the value fails, the row will be filtered out.
057 * <p>
058 * In order to test values of previous versions (timestamps), set {@link #setLatestVersionOnly} to
059 * false. The default is true, meaning that only the latest version's value is tested and all
060 * previous versions are ignored.
061 * <p>
062 * To filter based on the value of all scanned columns, use {@link ValueFilter}.
063 */
064@InterfaceAudience.Public
065public class SingleColumnValueFilter extends FilterBase {
066
067  protected byte[] columnFamily;
068  protected byte[] columnQualifier;
069  protected CompareOperator op;
070  protected org.apache.hadoop.hbase.filter.ByteArrayComparable comparator;
071  protected boolean foundColumn = false;
072  protected boolean matchedColumn = false;
073  protected boolean filterIfMissing = false;
074  protected boolean latestVersionOnly = true;
075
076  /**
077   * Constructor for binary compare of the value of a single column. If the column is found and the
078   * condition passes, all columns of the row will be emitted. If the condition fails, the row will
079   * not be emitted.
080   * <p>
081   * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be
082   * emitted if the specified column to check is not found in the row.
083   * @param family    name of column family
084   * @param qualifier name of column qualifier
085   * @param op        operator
086   * @param value     value to compare column values against
087   */
088  public SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
089    final CompareOperator op, final byte[] value) {
090    this(family, qualifier, op, new org.apache.hadoop.hbase.filter.BinaryComparator(value));
091  }
092
093  /**
094   * Constructor for binary compare of the value of a single column. If the column is found and the
095   * condition passes, all columns of the row will be emitted. If the condition fails, the row will
096   * not be emitted.
097   * <p>
098   * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be
099   * emitted if the specified column to check is not found in the row.
100   * @param family     name of column family
101   * @param qualifier  name of column qualifier
102   * @param op         operator
103   * @param comparator Comparator to use.
104   */
105  public SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
106    final CompareOperator op, final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator) {
107    this.columnFamily = family;
108    this.columnQualifier = qualifier;
109    this.op = op;
110    this.comparator = comparator;
111  }
112
113  /**
114   * Constructor for protobuf deserialization only. nnnnnn
115   */
116  protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
117    final CompareOperator op, org.apache.hadoop.hbase.filter.ByteArrayComparable comparator,
118    final boolean filterIfMissing, final boolean latestVersionOnly) {
119    this(family, qualifier, op, comparator);
120    this.filterIfMissing = filterIfMissing;
121    this.latestVersionOnly = latestVersionOnly;
122  }
123
124  public CompareOperator getCompareOperator() {
125    return op;
126  }
127
128  /**
129   * @return the comparator
130   */
131  public org.apache.hadoop.hbase.filter.ByteArrayComparable getComparator() {
132    return comparator;
133  }
134
135  /**
136   * @return the family
137   */
138  public byte[] getFamily() {
139    return columnFamily;
140  }
141
142  /**
143   * @return the qualifier
144   */
145  public byte[] getQualifier() {
146    return columnQualifier;
147  }
148
149  @Override
150  public boolean filterRowKey(Cell cell) throws IOException {
151    // Impl in FilterBase might do unnecessary copy for Off heap backed Cells.
152    return false;
153  }
154
155  @Override
156  public ReturnCode filterCell(final Cell c) {
157    // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" +
158    // Bytes.toString(keyValue.getValue()));
159    if (this.matchedColumn) {
160      // We already found and matched the single column, all keys now pass
161      return ReturnCode.INCLUDE;
162    } else if (this.latestVersionOnly && this.foundColumn) {
163      // We found but did not match the single column, skip to next row
164      return ReturnCode.NEXT_ROW;
165    }
166    if (!CellUtil.matchingColumn(c, this.columnFamily, this.columnQualifier)) {
167      return ReturnCode.INCLUDE;
168    }
169    foundColumn = true;
170    if (filterColumnValue(c)) {
171      return this.latestVersionOnly ? ReturnCode.NEXT_ROW : ReturnCode.INCLUDE;
172    }
173    this.matchedColumn = true;
174    return ReturnCode.INCLUDE;
175  }
176
177  private boolean filterColumnValue(final Cell cell) {
178    int compareResult = PrivateCellUtil.compareValue(cell, this.comparator);
179    return CompareFilter.compare(this.op, compareResult);
180  }
181
182  @Override
183  public boolean filterRow() {
184    // If column was found, return false if it was matched, true if it was not
185    // If column not found, return true if we filter if missing, false if not
186    return this.foundColumn ? !this.matchedColumn : this.filterIfMissing;
187  }
188
189  @Override
190  public boolean hasFilterRow() {
191    return true;
192  }
193
194  @Override
195  public void reset() {
196    foundColumn = false;
197    matchedColumn = false;
198  }
199
200  /**
201   * Get whether entire row should be filtered if column is not found.
202   * @return true if row should be skipped if column not found, false if row should be let through
203   *         anyways
204   */
205  public boolean getFilterIfMissing() {
206    return filterIfMissing;
207  }
208
209  /**
210   * Set whether entire row should be filtered if column is not found.
211   * <p>
212   * If true, the entire row will be skipped if the column is not found.
213   * <p>
214   * If false, the row will pass if the column is not found. This is default.
215   * @param filterIfMissing flag
216   */
217  public void setFilterIfMissing(boolean filterIfMissing) {
218    this.filterIfMissing = filterIfMissing;
219  }
220
221  /**
222   * Get whether only the latest version of the column value should be compared. If true, the row
223   * will be returned if only the latest version of the column value matches. If false, the row will
224   * be returned if any version of the column value matches. The default is true.
225   * @return return value
226   */
227  public boolean getLatestVersionOnly() {
228    return latestVersionOnly;
229  }
230
231  /**
232   * Set whether only the latest version of the column value should be compared. If true, the row
233   * will be returned if only the latest version of the column value matches. If false, the row will
234   * be returned if any version of the column value matches. The default is true.
235   * @param latestVersionOnly flag
236   */
237  public void setLatestVersionOnly(boolean latestVersionOnly) {
238    this.latestVersionOnly = latestVersionOnly;
239  }
240
241  public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) {
242    Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6,
243      "Expected 4 or 6 but got: %s", filterArguments.size());
244    byte[] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0));
245    byte[] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1));
246    CompareOperator op = ParseFilter.createCompareOperator(filterArguments.get(2));
247    org.apache.hadoop.hbase.filter.ByteArrayComparable comparator =
248      ParseFilter.createComparator(ParseFilter.removeQuotesFromByteArray(filterArguments.get(3)));
249
250    if (comparator instanceof RegexStringComparator || comparator instanceof SubstringComparator) {
251      if (op != CompareOperator.EQUAL && op != CompareOperator.NOT_EQUAL) {
252        throw new IllegalArgumentException("A regexstring comparator and substring comparator "
253          + "can only be used with EQUAL and NOT_EQUAL");
254      }
255    }
256
257    SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier, op, comparator);
258
259    if (filterArguments.size() == 6) {
260      boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4));
261      boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5));
262      filter.setFilterIfMissing(filterIfMissing);
263      filter.setLatestVersionOnly(latestVersionOnly);
264    }
265    return filter;
266  }
267
268  FilterProtos.SingleColumnValueFilter convert() {
269    FilterProtos.SingleColumnValueFilter.Builder builder =
270      FilterProtos.SingleColumnValueFilter.newBuilder();
271    if (this.columnFamily != null) {
272      builder.setColumnFamily(UnsafeByteOperations.unsafeWrap(this.columnFamily));
273    }
274    if (this.columnQualifier != null) {
275      builder.setColumnQualifier(UnsafeByteOperations.unsafeWrap(this.columnQualifier));
276    }
277    HBaseProtos.CompareType compareOp = CompareType.valueOf(this.op.name());
278    builder.setCompareOp(compareOp);
279    builder.setComparator(ProtobufUtil.toComparator(this.comparator));
280    builder.setFilterIfMissing(this.filterIfMissing);
281    builder.setLatestVersionOnly(this.latestVersionOnly);
282
283    return builder.build();
284  }
285
286  /**
287   * @return The filter serialized using pb
288   */
289  @Override
290  public byte[] toByteArray() {
291    return convert().toByteArray();
292  }
293
294  /**
295   * @param pbBytes A pb serialized {@link SingleColumnValueFilter} instance
296   * @return An instance of {@link SingleColumnValueFilter} made from <code>bytes</code>
297   * @see #toByteArray
298   */
299  public static SingleColumnValueFilter parseFrom(final byte[] pbBytes)
300    throws DeserializationException {
301    FilterProtos.SingleColumnValueFilter proto;
302    try {
303      proto = FilterProtos.SingleColumnValueFilter.parseFrom(pbBytes);
304    } catch (InvalidProtocolBufferException e) {
305      throw new DeserializationException(e);
306    }
307
308    final CompareOperator compareOp = CompareOperator.valueOf(proto.getCompareOp().name());
309    final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator;
310    try {
311      comparator = ProtobufUtil.toComparator(proto.getComparator());
312    } catch (IOException ioe) {
313      throw new DeserializationException(ioe);
314    }
315
316    return new SingleColumnValueFilter(
317      proto.hasColumnFamily() ? proto.getColumnFamily().toByteArray() : null,
318      proto.hasColumnQualifier() ? proto.getColumnQualifier().toByteArray() : null, compareOp,
319      comparator, proto.getFilterIfMissing(), proto.getLatestVersionOnly());
320  }
321
322  /**
323   * @return true if and only if the fields of the filter that are serialized are equal to the
324   *         corresponding fields in other. Used for testing.
325   */
326  @Override
327  boolean areSerializedFieldsEqual(Filter o) {
328    if (o == this) return true;
329    if (!(o instanceof SingleColumnValueFilter)) return false;
330
331    SingleColumnValueFilter other = (SingleColumnValueFilter) o;
332    return Bytes.equals(this.getFamily(), other.getFamily())
333      && Bytes.equals(this.getQualifier(), other.getQualifier()) && this.op.equals(other.op)
334      && this.getComparator().areSerializedFieldsEqual(other.getComparator())
335      && this.getFilterIfMissing() == other.getFilterIfMissing()
336      && this.getLatestVersionOnly() == other.getLatestVersionOnly();
337  }
338
339  /**
340   * The only CF this filter needs is given column family. So, it's the only essential column in
341   * whole scan. If filterIfMissing == false, all families are essential, because of possibility of
342   * skipping the rows without any data in filtered CF.
343   */
344  @Override
345  public boolean isFamilyEssential(byte[] name) {
346    return !this.filterIfMissing || Bytes.equals(name, this.columnFamily);
347  }
348
349  @Override
350  public String toString() {
351    return String.format("%s (%s, %s, %s, %s)", this.getClass().getSimpleName(),
352      Bytes.toStringBinary(this.columnFamily), Bytes.toStringBinary(this.columnQualifier),
353      this.op.name(), Bytes.toStringBinary(this.comparator.getValue()));
354  }
355
356  @Override
357  public boolean equals(Object obj) {
358    return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj);
359  }
360
361  @Override
362  public int hashCode() {
363    return Objects.hash(Bytes.hashCode(getFamily()), Bytes.hashCode(getQualifier()), this.op,
364      getComparator(), getFilterIfMissing(), getLatestVersionOnly());
365  }
366}