001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Objects; 023import org.apache.hadoop.hbase.Cell; 024import org.apache.hadoop.hbase.CellUtil; 025import org.apache.hadoop.hbase.CompareOperator; 026import org.apache.hadoop.hbase.PrivateCellUtil; 027import org.apache.hadoop.hbase.exceptions.DeserializationException; 028import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; 029import org.apache.hadoop.hbase.util.Bytes; 030import org.apache.yetus.audience.InterfaceAudience; 031 032import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 033import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; 034import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations; 035 036import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 037import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; 038import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; 039import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.CompareType; 040 041/** 042 * This filter is used to filter cells based on value. It takes a {@link CompareFilter.CompareOp} 043 * operator (equal, greater, not equal, etc), and either a byte [] value or a ByteArrayComparable. 044 * <p> 045 * If we have a byte [] value then we just do a lexicographic compare. For example, if passed value 046 * is 'b' and cell has 'a' and the compare operator is LESS, then we will filter out this cell 047 * (return true). If this is not sufficient (eg you want to deserialize a long and then compare it 048 * to a fixed long value), then you can pass in your own comparator instead. 049 * <p> 050 * You must also specify a family and qualifier. Only the value of this column will be tested. When 051 * using this filter on a {@link org.apache.hadoop.hbase.CellScanner} with specified inputs, the 052 * column to be tested should also be added as input (otherwise the filter will regard the column as 053 * missing). 054 * <p> 055 * To prevent the entire row from being emitted if the column is not found on a row, use 056 * {@link #setFilterIfMissing}. Otherwise, if the column is found, the entire row will be emitted 057 * only if the value passes. If the value fails, the row will be filtered out. 058 * <p> 059 * In order to test values of previous versions (timestamps), set {@link #setLatestVersionOnly} to 060 * false. The default is true, meaning that only the latest version's value is tested and all 061 * previous versions are ignored. 062 * <p> 063 * To filter based on the value of all scanned columns, use {@link ValueFilter}. 064 */ 065@InterfaceAudience.Public 066public class SingleColumnValueFilter extends FilterBase { 067 068 protected byte[] columnFamily; 069 protected byte[] columnQualifier; 070 protected CompareOperator op; 071 protected org.apache.hadoop.hbase.filter.ByteArrayComparable comparator; 072 protected boolean foundColumn = false; 073 protected boolean matchedColumn = false; 074 protected boolean filterIfMissing = false; 075 protected boolean latestVersionOnly = true; 076 077 /** 078 * Constructor for binary compare of the value of a single column. If the column is found and the 079 * condition passes, all columns of the row will be emitted. If the condition fails, the row will 080 * not be emitted. 081 * <p> 082 * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be 083 * emitted if the specified column to check is not found in the row. 084 * @param family name of column family 085 * @param qualifier name of column qualifier 086 * @param compareOp operator 087 * @param value value to compare column values against 088 * @deprecated Since 2.0.0. Will be removed in 3.0.0. Use 089 * {@link #SingleColumnValueFilter(byte[], byte[], CompareOperator, byte[])} instead. 090 */ 091 @Deprecated 092 public SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 093 final CompareOp compareOp, final byte[] value) { 094 this(family, qualifier, CompareOperator.valueOf(compareOp.name()), 095 new org.apache.hadoop.hbase.filter.BinaryComparator(value)); 096 } 097 098 /** 099 * Constructor for binary compare of the value of a single column. If the column is found and the 100 * condition passes, all columns of the row will be emitted. If the condition fails, the row will 101 * not be emitted. 102 * <p> 103 * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be 104 * emitted if the specified column to check is not found in the row. 105 * @param family name of column family 106 * @param qualifier name of column qualifier 107 * @param op operator 108 * @param value value to compare column values against 109 */ 110 public SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 111 final CompareOperator op, final byte[] value) { 112 this(family, qualifier, op, new org.apache.hadoop.hbase.filter.BinaryComparator(value)); 113 } 114 115 /** 116 * Constructor for binary compare of the value of a single column. If the column is found and the 117 * condition passes, all columns of the row will be emitted. If the condition fails, the row will 118 * not be emitted. 119 * <p> 120 * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be 121 * emitted if the specified column to check is not found in the row. 122 * @param family name of column family 123 * @param qualifier name of column qualifier 124 * @param compareOp operator 125 * @param comparator Comparator to use. 126 * @deprecated Since 2.0.0. Will be removed in 3.0.0. Use 127 * {@link #SingleColumnValueFilter(byte[], byte[], CompareOperator, ByteArrayComparable)} 128 * instead. 129 */ 130 @Deprecated 131 public SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 132 final CompareOp compareOp, 133 final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator) { 134 this(family, qualifier, CompareOperator.valueOf(compareOp.name()), comparator); 135 } 136 137 /** 138 * Constructor for binary compare of the value of a single column. If the column is found and the 139 * condition passes, all columns of the row will be emitted. If the condition fails, the row will 140 * not be emitted. 141 * <p> 142 * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be 143 * emitted if the specified column to check is not found in the row. 144 * @param family name of column family 145 * @param qualifier name of column qualifier 146 * @param op operator 147 * @param comparator Comparator to use. 148 */ 149 public SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 150 final CompareOperator op, final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator) { 151 this.columnFamily = family; 152 this.columnQualifier = qualifier; 153 this.op = op; 154 this.comparator = comparator; 155 } 156 157 /** 158 * Constructor for protobuf deserialization only. 159 * @deprecated Since 2.0.0. Will be removed in 3.0.0. Use 160 * {@link #SingleColumnValueFilter(byte[], byte[], CompareOperator, ByteArrayComparable, boolean, boolean)} 161 * instead. 162 */ 163 @Deprecated 164 protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 165 final CompareOp compareOp, org.apache.hadoop.hbase.filter.ByteArrayComparable comparator, 166 final boolean filterIfMissing, final boolean latestVersionOnly) { 167 this(family, qualifier, CompareOperator.valueOf(compareOp.name()), comparator, filterIfMissing, 168 latestVersionOnly); 169 } 170 171 /** 172 * Constructor for protobuf deserialization only. 173 */ 174 protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 175 final CompareOperator op, org.apache.hadoop.hbase.filter.ByteArrayComparable comparator, 176 final boolean filterIfMissing, final boolean latestVersionOnly) { 177 this(family, qualifier, op, comparator); 178 this.filterIfMissing = filterIfMissing; 179 this.latestVersionOnly = latestVersionOnly; 180 } 181 182 /** 183 * @deprecated since 2.0.0. Will be removed in 3.0.0. Use {@link #getCompareOperator()} instead. 184 */ 185 @Deprecated 186 public CompareOp getOperator() { 187 return CompareOp.valueOf(op.name()); 188 } 189 190 public CompareOperator getCompareOperator() { 191 return op; 192 } 193 194 /** Returns the comparator */ 195 public org.apache.hadoop.hbase.filter.ByteArrayComparable getComparator() { 196 return comparator; 197 } 198 199 /** Returns the family */ 200 public byte[] getFamily() { 201 return columnFamily; 202 } 203 204 /** Returns the qualifier */ 205 public byte[] getQualifier() { 206 return columnQualifier; 207 } 208 209 @Override 210 public boolean filterRowKey(Cell cell) throws IOException { 211 // Impl in FilterBase might do unnecessary copy for Off heap backed Cells. 212 return false; 213 } 214 215 @Deprecated 216 @Override 217 public ReturnCode filterKeyValue(final Cell c) { 218 return filterCell(c); 219 } 220 221 @Override 222 public ReturnCode filterCell(final Cell c) { 223 // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" + 224 // Bytes.toString(keyValue.getValue())); 225 if (this.matchedColumn) { 226 // We already found and matched the single column, all keys now pass 227 return ReturnCode.INCLUDE; 228 } else if (this.latestVersionOnly && this.foundColumn) { 229 // We found but did not match the single column, skip to next row 230 return ReturnCode.NEXT_ROW; 231 } 232 if (!CellUtil.matchingColumn(c, this.columnFamily, this.columnQualifier)) { 233 return ReturnCode.INCLUDE; 234 } 235 foundColumn = true; 236 if (filterColumnValue(c)) { 237 return this.latestVersionOnly ? ReturnCode.NEXT_ROW : ReturnCode.INCLUDE; 238 } 239 this.matchedColumn = true; 240 return ReturnCode.INCLUDE; 241 } 242 243 private boolean filterColumnValue(final Cell cell) { 244 int compareResult = PrivateCellUtil.compareValue(cell, this.comparator); 245 return CompareFilter.compare(this.op, compareResult); 246 } 247 248 @Override 249 public boolean filterRow() { 250 // If column was found, return false if it was matched, true if it was not 251 // If column not found, return true if we filter if missing, false if not 252 return this.foundColumn ? !this.matchedColumn : this.filterIfMissing; 253 } 254 255 @Override 256 public boolean hasFilterRow() { 257 return true; 258 } 259 260 @Override 261 public void reset() { 262 foundColumn = false; 263 matchedColumn = false; 264 } 265 266 /** 267 * Get whether entire row should be filtered if column is not found. 268 * @return true if row should be skipped if column not found, false if row should be let through 269 * anyways 270 */ 271 public boolean getFilterIfMissing() { 272 return filterIfMissing; 273 } 274 275 /** 276 * Set whether entire row should be filtered if column is not found. 277 * <p> 278 * If true, the entire row will be skipped if the column is not found. 279 * <p> 280 * If false, the row will pass if the column is not found. This is default. 281 * @param filterIfMissing flag 282 */ 283 public void setFilterIfMissing(boolean filterIfMissing) { 284 this.filterIfMissing = filterIfMissing; 285 } 286 287 /** 288 * Get whether only the latest version of the column value should be compared. If true, the row 289 * will be returned if only the latest version of the column value matches. If false, the row will 290 * be returned if any version of the column value matches. The default is true. 291 * @return return value 292 */ 293 public boolean getLatestVersionOnly() { 294 return latestVersionOnly; 295 } 296 297 /** 298 * Set whether only the latest version of the column value should be compared. If true, the row 299 * will be returned if only the latest version of the column value matches. If false, the row will 300 * be returned if any version of the column value matches. The default is true. 301 * @param latestVersionOnly flag 302 */ 303 public void setLatestVersionOnly(boolean latestVersionOnly) { 304 this.latestVersionOnly = latestVersionOnly; 305 } 306 307 public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) { 308 Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6, 309 "Expected 4 or 6 but got: %s", filterArguments.size()); 310 byte[] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0)); 311 byte[] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1)); 312 CompareOperator op = ParseFilter.createCompareOperator(filterArguments.get(2)); 313 org.apache.hadoop.hbase.filter.ByteArrayComparable comparator = 314 ParseFilter.createComparator(ParseFilter.removeQuotesFromByteArray(filterArguments.get(3))); 315 316 if (comparator instanceof RegexStringComparator || comparator instanceof SubstringComparator) { 317 if (op != CompareOperator.EQUAL && op != CompareOperator.NOT_EQUAL) { 318 throw new IllegalArgumentException("A regexstring comparator and substring comparator " 319 + "can only be used with EQUAL and NOT_EQUAL"); 320 } 321 } 322 323 SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier, op, comparator); 324 325 if (filterArguments.size() == 6) { 326 boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4)); 327 boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5)); 328 filter.setFilterIfMissing(filterIfMissing); 329 filter.setLatestVersionOnly(latestVersionOnly); 330 } 331 return filter; 332 } 333 334 FilterProtos.SingleColumnValueFilter convert() { 335 FilterProtos.SingleColumnValueFilter.Builder builder = 336 FilterProtos.SingleColumnValueFilter.newBuilder(); 337 if (this.columnFamily != null) { 338 builder.setColumnFamily(UnsafeByteOperations.unsafeWrap(this.columnFamily)); 339 } 340 if (this.columnQualifier != null) { 341 builder.setColumnQualifier(UnsafeByteOperations.unsafeWrap(this.columnQualifier)); 342 } 343 HBaseProtos.CompareType compareOp = CompareType.valueOf(this.op.name()); 344 builder.setCompareOp(compareOp); 345 builder.setComparator(ProtobufUtil.toComparator(this.comparator)); 346 builder.setFilterIfMissing(this.filterIfMissing); 347 builder.setLatestVersionOnly(this.latestVersionOnly); 348 349 return builder.build(); 350 } 351 352 /** Returns The filter serialized using pb */ 353 @Override 354 public byte[] toByteArray() { 355 return convert().toByteArray(); 356 } 357 358 /** 359 * Parse a serialized representation of {@link SingleColumnValueFilter} 360 * @param pbBytes A pb serialized {@link SingleColumnValueFilter} instance 361 * @return An instance of {@link SingleColumnValueFilter} made from <code>bytes</code> 362 * @throws DeserializationException if an error occurred 363 * @see #toByteArray 364 */ 365 public static SingleColumnValueFilter parseFrom(final byte[] pbBytes) 366 throws DeserializationException { 367 FilterProtos.SingleColumnValueFilter proto; 368 try { 369 proto = FilterProtos.SingleColumnValueFilter.parseFrom(pbBytes); 370 } catch (InvalidProtocolBufferException e) { 371 throw new DeserializationException(e); 372 } 373 374 final CompareOperator compareOp = CompareOperator.valueOf(proto.getCompareOp().name()); 375 final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator; 376 try { 377 comparator = ProtobufUtil.toComparator(proto.getComparator()); 378 } catch (IOException ioe) { 379 throw new DeserializationException(ioe); 380 } 381 382 return new SingleColumnValueFilter( 383 proto.hasColumnFamily() ? proto.getColumnFamily().toByteArray() : null, 384 proto.hasColumnQualifier() ? proto.getColumnQualifier().toByteArray() : null, compareOp, 385 comparator, proto.getFilterIfMissing(), proto.getLatestVersionOnly()); 386 } 387 388 /** 389 * Returns true if and only if the fields of the filter that are serialized are equal to the 390 * corresponding fields in other. Used for testing. 391 */ 392 @Override 393 boolean areSerializedFieldsEqual(Filter o) { 394 if (o == this) return true; 395 if (!(o instanceof SingleColumnValueFilter)) return false; 396 397 SingleColumnValueFilter other = (SingleColumnValueFilter) o; 398 return Bytes.equals(this.getFamily(), other.getFamily()) 399 && Bytes.equals(this.getQualifier(), other.getQualifier()) && this.op.equals(other.op) 400 && this.getComparator().areSerializedFieldsEqual(other.getComparator()) 401 && this.getFilterIfMissing() == other.getFilterIfMissing() 402 && this.getLatestVersionOnly() == other.getLatestVersionOnly(); 403 } 404 405 /** 406 * The only CF this filter needs is given column family. So, it's the only essential column in 407 * whole scan. If filterIfMissing == false, all families are essential, because of possibility of 408 * skipping the rows without any data in filtered CF. 409 */ 410 @Override 411 public boolean isFamilyEssential(byte[] name) { 412 return !this.filterIfMissing || Bytes.equals(name, this.columnFamily); 413 } 414 415 @Override 416 public String toString() { 417 return String.format("%s (%s, %s, %s, %s)", this.getClass().getSimpleName(), 418 Bytes.toStringBinary(this.columnFamily), Bytes.toStringBinary(this.columnQualifier), 419 this.op.name(), Bytes.toStringBinary(this.comparator.getValue())); 420 } 421 422 @Override 423 public boolean equals(Object obj) { 424 return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj); 425 } 426 427 @Override 428 public int hashCode() { 429 return Objects.hash(Bytes.hashCode(getFamily()), Bytes.hashCode(getQualifier()), this.op, 430 getComparator(), getFilterIfMissing(), getLatestVersionOnly()); 431 } 432}