001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Objects; 023import org.apache.hadoop.hbase.Cell; 024import org.apache.hadoop.hbase.CellUtil; 025import org.apache.hadoop.hbase.CompareOperator; 026import org.apache.hadoop.hbase.PrivateCellUtil; 027import org.apache.hadoop.hbase.exceptions.DeserializationException; 028import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; 029import org.apache.hadoop.hbase.util.Bytes; 030import org.apache.yetus.audience.InterfaceAudience; 031 032import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 033import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; 034import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations; 035 036import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 037import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; 038import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; 039import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.CompareType; 040 041/** 042 * This filter is used to filter cells based on value. It takes a {@link CompareFilter.CompareOp} 043 * operator (equal, greater, not equal, etc), and either a byte [] value or a ByteArrayComparable. 044 * <p> 045 * If we have a byte [] value then we just do a lexicographic compare. For example, if passed value 046 * is 'b' and cell has 'a' and the compare operator is LESS, then we will filter out this cell 047 * (return true). If this is not sufficient (eg you want to deserialize a long and then compare it 048 * to a fixed long value), then you can pass in your own comparator instead. 049 * <p> 050 * You must also specify a family and qualifier. Only the value of this column will be tested. When 051 * using this filter on a {@link org.apache.hadoop.hbase.CellScanner} with specified inputs, the 052 * column to be tested should also be added as input (otherwise the filter will regard the column as 053 * missing). 054 * <p> 055 * To prevent the entire row from being emitted if the column is not found on a row, use 056 * {@link #setFilterIfMissing}. Otherwise, if the column is found, the entire row will be emitted 057 * only if the value passes. If the value fails, the row will be filtered out. 058 * <p> 059 * In order to test values of previous versions (timestamps), set {@link #setLatestVersionOnly} to 060 * false. The default is true, meaning that only the latest version's value is tested and all 061 * previous versions are ignored. 062 * <p> 063 * To filter based on the value of all scanned columns, use {@link ValueFilter}. 064 */ 065@InterfaceAudience.Public 066public class SingleColumnValueFilter extends FilterBase { 067 068 protected byte[] columnFamily; 069 protected byte[] columnQualifier; 070 protected CompareOperator op; 071 protected org.apache.hadoop.hbase.filter.ByteArrayComparable comparator; 072 protected boolean foundColumn = false; 073 protected boolean matchedColumn = false; 074 protected boolean filterIfMissing = false; 075 protected boolean latestVersionOnly = true; 076 077 /** 078 * Constructor for binary compare of the value of a single column. If the column is found and the 079 * condition passes, all columns of the row will be emitted. If the condition fails, the row will 080 * not be emitted. 081 * <p> 082 * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be 083 * emitted if the specified column to check is not found in the row. 084 * @param family name of column family 085 * @param qualifier name of column qualifier 086 * @param compareOp operator 087 * @param value value to compare column values against 088 * @deprecated Since 2.0.0. Will be removed in 3.0.0. Use 089 * {@link #SingleColumnValueFilter(byte[], byte[], CompareOperator, byte[])} instead. 090 */ 091 @Deprecated 092 public SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 093 final CompareOp compareOp, final byte[] value) { 094 this(family, qualifier, CompareOperator.valueOf(compareOp.name()), 095 new org.apache.hadoop.hbase.filter.BinaryComparator(value)); 096 } 097 098 /** 099 * Constructor for binary compare of the value of a single column. If the column is found and the 100 * condition passes, all columns of the row will be emitted. If the condition fails, the row will 101 * not be emitted. 102 * <p> 103 * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be 104 * emitted if the specified column to check is not found in the row. 105 * @param family name of column family 106 * @param qualifier name of column qualifier 107 * @param op operator 108 * @param value value to compare column values against 109 */ 110 public SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 111 final CompareOperator op, final byte[] value) { 112 this(family, qualifier, op, new org.apache.hadoop.hbase.filter.BinaryComparator(value)); 113 } 114 115 /** 116 * Constructor for binary compare of the value of a single column. If the column is found and the 117 * condition passes, all columns of the row will be emitted. If the condition fails, the row will 118 * not be emitted. 119 * <p> 120 * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be 121 * emitted if the specified column to check is not found in the row. 122 * @param family name of column family 123 * @param qualifier name of column qualifier 124 * @param compareOp operator 125 * @param comparator Comparator to use. 126 * @deprecated Since 2.0.0. Will be removed in 3.0.0. Use 127 * {@link #SingleColumnValueFilter(byte[], byte[], CompareOperator, ByteArrayComparable)} 128 * instead. 129 */ 130 @Deprecated 131 public SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 132 final CompareOp compareOp, 133 final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator) { 134 this(family, qualifier, CompareOperator.valueOf(compareOp.name()), comparator); 135 } 136 137 /** 138 * Constructor for binary compare of the value of a single column. If the column is found and the 139 * condition passes, all columns of the row will be emitted. If the condition fails, the row will 140 * not be emitted. 141 * <p> 142 * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be 143 * emitted if the specified column to check is not found in the row. 144 * @param family name of column family 145 * @param qualifier name of column qualifier 146 * @param op operator 147 * @param comparator Comparator to use. 148 */ 149 public SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 150 final CompareOperator op, final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator) { 151 this.columnFamily = family; 152 this.columnQualifier = qualifier; 153 this.op = op; 154 this.comparator = comparator; 155 } 156 157 /** 158 * Constructor for protobuf deserialization only. nnnnnn * @deprecated Since 2.0.0. Will be 159 * removed in 3.0.0. Use 160 * {@link #SingleColumnValueFilter(byte[], byte[], CompareOperator, ByteArrayComparable, boolean, boolean)} 161 * instead. 162 */ 163 @Deprecated 164 protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 165 final CompareOp compareOp, org.apache.hadoop.hbase.filter.ByteArrayComparable comparator, 166 final boolean filterIfMissing, final boolean latestVersionOnly) { 167 this(family, qualifier, CompareOperator.valueOf(compareOp.name()), comparator, filterIfMissing, 168 latestVersionOnly); 169 } 170 171 /** 172 * Constructor for protobuf deserialization only. nnnnnn 173 */ 174 protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 175 final CompareOperator op, org.apache.hadoop.hbase.filter.ByteArrayComparable comparator, 176 final boolean filterIfMissing, final boolean latestVersionOnly) { 177 this(family, qualifier, op, comparator); 178 this.filterIfMissing = filterIfMissing; 179 this.latestVersionOnly = latestVersionOnly; 180 } 181 182 /** 183 * n * @deprecated since 2.0.0. Will be removed in 3.0.0. Use {@link #getCompareOperator()} 184 * instead. 185 */ 186 @Deprecated 187 public CompareOp getOperator() { 188 return CompareOp.valueOf(op.name()); 189 } 190 191 public CompareOperator getCompareOperator() { 192 return op; 193 } 194 195 /** Returns the comparator */ 196 public org.apache.hadoop.hbase.filter.ByteArrayComparable getComparator() { 197 return comparator; 198 } 199 200 /** Returns the family */ 201 public byte[] getFamily() { 202 return columnFamily; 203 } 204 205 /** Returns the qualifier */ 206 public byte[] getQualifier() { 207 return columnQualifier; 208 } 209 210 @Override 211 public boolean filterRowKey(Cell cell) throws IOException { 212 // Impl in FilterBase might do unnecessary copy for Off heap backed Cells. 213 return false; 214 } 215 216 @Deprecated 217 @Override 218 public ReturnCode filterKeyValue(final Cell c) { 219 return filterCell(c); 220 } 221 222 @Override 223 public ReturnCode filterCell(final Cell c) { 224 // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" + 225 // Bytes.toString(keyValue.getValue())); 226 if (this.matchedColumn) { 227 // We already found and matched the single column, all keys now pass 228 return ReturnCode.INCLUDE; 229 } else if (this.latestVersionOnly && this.foundColumn) { 230 // We found but did not match the single column, skip to next row 231 return ReturnCode.NEXT_ROW; 232 } 233 if (!CellUtil.matchingColumn(c, this.columnFamily, this.columnQualifier)) { 234 return ReturnCode.INCLUDE; 235 } 236 foundColumn = true; 237 if (filterColumnValue(c)) { 238 return this.latestVersionOnly ? ReturnCode.NEXT_ROW : ReturnCode.INCLUDE; 239 } 240 this.matchedColumn = true; 241 return ReturnCode.INCLUDE; 242 } 243 244 private boolean filterColumnValue(final Cell cell) { 245 int compareResult = PrivateCellUtil.compareValue(cell, this.comparator); 246 return CompareFilter.compare(this.op, compareResult); 247 } 248 249 @Override 250 public boolean filterRow() { 251 // If column was found, return false if it was matched, true if it was not 252 // If column not found, return true if we filter if missing, false if not 253 return this.foundColumn ? !this.matchedColumn : this.filterIfMissing; 254 } 255 256 @Override 257 public boolean hasFilterRow() { 258 return true; 259 } 260 261 @Override 262 public void reset() { 263 foundColumn = false; 264 matchedColumn = false; 265 } 266 267 /** 268 * Get whether entire row should be filtered if column is not found. 269 * @return true if row should be skipped if column not found, false if row should be let through 270 * anyways 271 */ 272 public boolean getFilterIfMissing() { 273 return filterIfMissing; 274 } 275 276 /** 277 * Set whether entire row should be filtered if column is not found. 278 * <p> 279 * If true, the entire row will be skipped if the column is not found. 280 * <p> 281 * If false, the row will pass if the column is not found. This is default. 282 * @param filterIfMissing flag 283 */ 284 public void setFilterIfMissing(boolean filterIfMissing) { 285 this.filterIfMissing = filterIfMissing; 286 } 287 288 /** 289 * Get whether only the latest version of the column value should be compared. If true, the row 290 * will be returned if only the latest version of the column value matches. If false, the row will 291 * be returned if any version of the column value matches. The default is true. 292 * @return return value 293 */ 294 public boolean getLatestVersionOnly() { 295 return latestVersionOnly; 296 } 297 298 /** 299 * Set whether only the latest version of the column value should be compared. If true, the row 300 * will be returned if only the latest version of the column value matches. If false, the row will 301 * be returned if any version of the column value matches. The default is true. 302 * @param latestVersionOnly flag 303 */ 304 public void setLatestVersionOnly(boolean latestVersionOnly) { 305 this.latestVersionOnly = latestVersionOnly; 306 } 307 308 public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) { 309 Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6, 310 "Expected 4 or 6 but got: %s", filterArguments.size()); 311 byte[] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0)); 312 byte[] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1)); 313 CompareOperator op = ParseFilter.createCompareOperator(filterArguments.get(2)); 314 org.apache.hadoop.hbase.filter.ByteArrayComparable comparator = 315 ParseFilter.createComparator(ParseFilter.removeQuotesFromByteArray(filterArguments.get(3))); 316 317 if (comparator instanceof RegexStringComparator || comparator instanceof SubstringComparator) { 318 if (op != CompareOperator.EQUAL && op != CompareOperator.NOT_EQUAL) { 319 throw new IllegalArgumentException("A regexstring comparator and substring comparator " 320 + "can only be used with EQUAL and NOT_EQUAL"); 321 } 322 } 323 324 SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier, op, comparator); 325 326 if (filterArguments.size() == 6) { 327 boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4)); 328 boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5)); 329 filter.setFilterIfMissing(filterIfMissing); 330 filter.setLatestVersionOnly(latestVersionOnly); 331 } 332 return filter; 333 } 334 335 FilterProtos.SingleColumnValueFilter convert() { 336 FilterProtos.SingleColumnValueFilter.Builder builder = 337 FilterProtos.SingleColumnValueFilter.newBuilder(); 338 if (this.columnFamily != null) { 339 builder.setColumnFamily(UnsafeByteOperations.unsafeWrap(this.columnFamily)); 340 } 341 if (this.columnQualifier != null) { 342 builder.setColumnQualifier(UnsafeByteOperations.unsafeWrap(this.columnQualifier)); 343 } 344 HBaseProtos.CompareType compareOp = CompareType.valueOf(this.op.name()); 345 builder.setCompareOp(compareOp); 346 builder.setComparator(ProtobufUtil.toComparator(this.comparator)); 347 builder.setFilterIfMissing(this.filterIfMissing); 348 builder.setLatestVersionOnly(this.latestVersionOnly); 349 350 return builder.build(); 351 } 352 353 /** Returns The filter serialized using pb */ 354 @Override 355 public byte[] toByteArray() { 356 return convert().toByteArray(); 357 } 358 359 /** 360 * Parse a serialized representation of {@link SingleColumnValueFilter} 361 * @param pbBytes A pb serialized {@link SingleColumnValueFilter} instance 362 * @return An instance of {@link SingleColumnValueFilter} made from <code>bytes</code> 363 * @throws DeserializationException if an error occurred 364 * @see #toByteArray 365 */ 366 public static SingleColumnValueFilter parseFrom(final byte[] pbBytes) 367 throws DeserializationException { 368 FilterProtos.SingleColumnValueFilter proto; 369 try { 370 proto = FilterProtos.SingleColumnValueFilter.parseFrom(pbBytes); 371 } catch (InvalidProtocolBufferException e) { 372 throw new DeserializationException(e); 373 } 374 375 final CompareOperator compareOp = CompareOperator.valueOf(proto.getCompareOp().name()); 376 final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator; 377 try { 378 comparator = ProtobufUtil.toComparator(proto.getComparator()); 379 } catch (IOException ioe) { 380 throw new DeserializationException(ioe); 381 } 382 383 return new SingleColumnValueFilter( 384 proto.hasColumnFamily() ? proto.getColumnFamily().toByteArray() : null, 385 proto.hasColumnQualifier() ? proto.getColumnQualifier().toByteArray() : null, compareOp, 386 comparator, proto.getFilterIfMissing(), proto.getLatestVersionOnly()); 387 } 388 389 /** 390 * Returns true if and only if the fields of the filter that are serialized are equal to the 391 * corresponding fields in other. Used for testing. 392 */ 393 @Override 394 boolean areSerializedFieldsEqual(Filter o) { 395 if (o == this) return true; 396 if (!(o instanceof SingleColumnValueFilter)) return false; 397 398 SingleColumnValueFilter other = (SingleColumnValueFilter) o; 399 return Bytes.equals(this.getFamily(), other.getFamily()) 400 && Bytes.equals(this.getQualifier(), other.getQualifier()) && this.op.equals(other.op) 401 && this.getComparator().areSerializedFieldsEqual(other.getComparator()) 402 && this.getFilterIfMissing() == other.getFilterIfMissing() 403 && this.getLatestVersionOnly() == other.getLatestVersionOnly(); 404 } 405 406 /** 407 * The only CF this filter needs is given column family. So, it's the only essential column in 408 * whole scan. If filterIfMissing == false, all families are essential, because of possibility of 409 * skipping the rows without any data in filtered CF. 410 */ 411 @Override 412 public boolean isFamilyEssential(byte[] name) { 413 return !this.filterIfMissing || Bytes.equals(name, this.columnFamily); 414 } 415 416 @Override 417 public String toString() { 418 return String.format("%s (%s, %s, %s, %s)", this.getClass().getSimpleName(), 419 Bytes.toStringBinary(this.columnFamily), Bytes.toStringBinary(this.columnQualifier), 420 this.op.name(), Bytes.toStringBinary(this.comparator.getValue())); 421 } 422 423 @Override 424 public boolean equals(Object obj) { 425 return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj); 426 } 427 428 @Override 429 public int hashCode() { 430 return Objects.hash(Bytes.hashCode(getFamily()), Bytes.hashCode(getQualifier()), this.op, 431 getComparator(), getFilterIfMissing(), getLatestVersionOnly()); 432 } 433}