001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Objects; 023import org.apache.hadoop.hbase.Cell; 024import org.apache.hadoop.hbase.CellUtil; 025import org.apache.hadoop.hbase.CompareOperator; 026import org.apache.hadoop.hbase.PrivateCellUtil; 027import org.apache.hadoop.hbase.exceptions.DeserializationException; 028import org.apache.hadoop.hbase.util.Bytes; 029import org.apache.yetus.audience.InterfaceAudience; 030 031import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 032import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; 033import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations; 034 035import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 036import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; 037import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; 038import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.CompareType; 039 040/** 041 * This filter is used to filter cells based on value. It takes a {@link CompareOperator} operator 042 * (equal, greater, not equal, etc), and either a byte [] value or a ByteArrayComparable. 043 * <p> 044 * If we have a byte [] value then we just do a lexicographic compare. For example, if passed value 045 * is 'b' and cell has 'a' and the compare operator is LESS, then we will filter out this cell 046 * (return true). If this is not sufficient (eg you want to deserialize a long and then compare it 047 * to a fixed long value), then you can pass in your own comparator instead. 048 * <p> 049 * You must also specify a family and qualifier. Only the value of this column will be tested. When 050 * using this filter on a {@link org.apache.hadoop.hbase.CellScanner} with specified inputs, the 051 * column to be tested should also be added as input (otherwise the filter will regard the column as 052 * missing). 053 * <p> 054 * To prevent the entire row from being emitted if the column is not found on a row, use 055 * {@link #setFilterIfMissing}. Otherwise, if the column is found, the entire row will be emitted 056 * only if the value passes. If the value fails, the row will be filtered out. 057 * <p> 058 * In order to test values of previous versions (timestamps), set {@link #setLatestVersionOnly} to 059 * false. The default is true, meaning that only the latest version's value is tested and all 060 * previous versions are ignored. 061 * <p> 062 * To filter based on the value of all scanned columns, use {@link ValueFilter}. 063 */ 064@InterfaceAudience.Public 065public class SingleColumnValueFilter extends FilterBase { 066 067 protected byte[] columnFamily; 068 protected byte[] columnQualifier; 069 protected CompareOperator op; 070 protected org.apache.hadoop.hbase.filter.ByteArrayComparable comparator; 071 protected boolean foundColumn = false; 072 protected boolean matchedColumn = false; 073 protected boolean filterIfMissing = false; 074 protected boolean latestVersionOnly = true; 075 076 /** 077 * Constructor for binary compare of the value of a single column. If the column is found and the 078 * condition passes, all columns of the row will be emitted. If the condition fails, the row will 079 * not be emitted. 080 * <p> 081 * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be 082 * emitted if the specified column to check is not found in the row. 083 * @param family name of column family 084 * @param qualifier name of column qualifier 085 * @param op operator 086 * @param value value to compare column values against 087 */ 088 public SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 089 final CompareOperator op, final byte[] value) { 090 this(family, qualifier, op, new org.apache.hadoop.hbase.filter.BinaryComparator(value)); 091 } 092 093 /** 094 * Constructor for binary compare of the value of a single column. If the column is found and the 095 * condition passes, all columns of the row will be emitted. If the condition fails, the row will 096 * not be emitted. 097 * <p> 098 * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be 099 * emitted if the specified column to check is not found in the row. 100 * @param family name of column family 101 * @param qualifier name of column qualifier 102 * @param op operator 103 * @param comparator Comparator to use. 104 */ 105 public SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 106 final CompareOperator op, final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator) { 107 this.columnFamily = family; 108 this.columnQualifier = qualifier; 109 this.op = op; 110 this.comparator = comparator; 111 } 112 113 /** 114 * Constructor for protobuf deserialization only. nnnnnn 115 */ 116 protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 117 final CompareOperator op, org.apache.hadoop.hbase.filter.ByteArrayComparable comparator, 118 final boolean filterIfMissing, final boolean latestVersionOnly) { 119 this(family, qualifier, op, comparator); 120 this.filterIfMissing = filterIfMissing; 121 this.latestVersionOnly = latestVersionOnly; 122 } 123 124 public CompareOperator getCompareOperator() { 125 return op; 126 } 127 128 /** 129 * @return the comparator 130 */ 131 public org.apache.hadoop.hbase.filter.ByteArrayComparable getComparator() { 132 return comparator; 133 } 134 135 /** 136 * @return the family 137 */ 138 public byte[] getFamily() { 139 return columnFamily; 140 } 141 142 /** 143 * @return the qualifier 144 */ 145 public byte[] getQualifier() { 146 return columnQualifier; 147 } 148 149 @Override 150 public boolean filterRowKey(Cell cell) throws IOException { 151 // Impl in FilterBase might do unnecessary copy for Off heap backed Cells. 152 return false; 153 } 154 155 @Override 156 public ReturnCode filterCell(final Cell c) { 157 // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" + 158 // Bytes.toString(keyValue.getValue())); 159 if (this.matchedColumn) { 160 // We already found and matched the single column, all keys now pass 161 return ReturnCode.INCLUDE; 162 } else if (this.latestVersionOnly && this.foundColumn) { 163 // We found but did not match the single column, skip to next row 164 return ReturnCode.NEXT_ROW; 165 } 166 if (!CellUtil.matchingColumn(c, this.columnFamily, this.columnQualifier)) { 167 return ReturnCode.INCLUDE; 168 } 169 foundColumn = true; 170 if (filterColumnValue(c)) { 171 return this.latestVersionOnly ? ReturnCode.NEXT_ROW : ReturnCode.INCLUDE; 172 } 173 this.matchedColumn = true; 174 return ReturnCode.INCLUDE; 175 } 176 177 private boolean filterColumnValue(final Cell cell) { 178 int compareResult = PrivateCellUtil.compareValue(cell, this.comparator); 179 return CompareFilter.compare(this.op, compareResult); 180 } 181 182 @Override 183 public boolean filterRow() { 184 // If column was found, return false if it was matched, true if it was not 185 // If column not found, return true if we filter if missing, false if not 186 return this.foundColumn ? !this.matchedColumn : this.filterIfMissing; 187 } 188 189 @Override 190 public boolean hasFilterRow() { 191 return true; 192 } 193 194 @Override 195 public void reset() { 196 foundColumn = false; 197 matchedColumn = false; 198 } 199 200 /** 201 * Get whether entire row should be filtered if column is not found. 202 * @return true if row should be skipped if column not found, false if row should be let through 203 * anyways 204 */ 205 public boolean getFilterIfMissing() { 206 return filterIfMissing; 207 } 208 209 /** 210 * Set whether entire row should be filtered if column is not found. 211 * <p> 212 * If true, the entire row will be skipped if the column is not found. 213 * <p> 214 * If false, the row will pass if the column is not found. This is default. 215 * @param filterIfMissing flag 216 */ 217 public void setFilterIfMissing(boolean filterIfMissing) { 218 this.filterIfMissing = filterIfMissing; 219 } 220 221 /** 222 * Get whether only the latest version of the column value should be compared. If true, the row 223 * will be returned if only the latest version of the column value matches. If false, the row will 224 * be returned if any version of the column value matches. The default is true. 225 * @return return value 226 */ 227 public boolean getLatestVersionOnly() { 228 return latestVersionOnly; 229 } 230 231 /** 232 * Set whether only the latest version of the column value should be compared. If true, the row 233 * will be returned if only the latest version of the column value matches. If false, the row will 234 * be returned if any version of the column value matches. The default is true. 235 * @param latestVersionOnly flag 236 */ 237 public void setLatestVersionOnly(boolean latestVersionOnly) { 238 this.latestVersionOnly = latestVersionOnly; 239 } 240 241 public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) { 242 Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6, 243 "Expected 4 or 6 but got: %s", filterArguments.size()); 244 byte[] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0)); 245 byte[] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1)); 246 CompareOperator op = ParseFilter.createCompareOperator(filterArguments.get(2)); 247 org.apache.hadoop.hbase.filter.ByteArrayComparable comparator = 248 ParseFilter.createComparator(ParseFilter.removeQuotesFromByteArray(filterArguments.get(3))); 249 250 if (comparator instanceof RegexStringComparator || comparator instanceof SubstringComparator) { 251 if (op != CompareOperator.EQUAL && op != CompareOperator.NOT_EQUAL) { 252 throw new IllegalArgumentException("A regexstring comparator and substring comparator " 253 + "can only be used with EQUAL and NOT_EQUAL"); 254 } 255 } 256 257 SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier, op, comparator); 258 259 if (filterArguments.size() == 6) { 260 boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4)); 261 boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5)); 262 filter.setFilterIfMissing(filterIfMissing); 263 filter.setLatestVersionOnly(latestVersionOnly); 264 } 265 return filter; 266 } 267 268 FilterProtos.SingleColumnValueFilter convert() { 269 FilterProtos.SingleColumnValueFilter.Builder builder = 270 FilterProtos.SingleColumnValueFilter.newBuilder(); 271 if (this.columnFamily != null) { 272 builder.setColumnFamily(UnsafeByteOperations.unsafeWrap(this.columnFamily)); 273 } 274 if (this.columnQualifier != null) { 275 builder.setColumnQualifier(UnsafeByteOperations.unsafeWrap(this.columnQualifier)); 276 } 277 HBaseProtos.CompareType compareOp = CompareType.valueOf(this.op.name()); 278 builder.setCompareOp(compareOp); 279 builder.setComparator(ProtobufUtil.toComparator(this.comparator)); 280 builder.setFilterIfMissing(this.filterIfMissing); 281 builder.setLatestVersionOnly(this.latestVersionOnly); 282 283 return builder.build(); 284 } 285 286 /** 287 * @return The filter serialized using pb 288 */ 289 @Override 290 public byte[] toByteArray() { 291 return convert().toByteArray(); 292 } 293 294 /** 295 * @param pbBytes A pb serialized {@link SingleColumnValueFilter} instance 296 * @return An instance of {@link SingleColumnValueFilter} made from <code>bytes</code> 297 * @see #toByteArray 298 */ 299 public static SingleColumnValueFilter parseFrom(final byte[] pbBytes) 300 throws DeserializationException { 301 FilterProtos.SingleColumnValueFilter proto; 302 try { 303 proto = FilterProtos.SingleColumnValueFilter.parseFrom(pbBytes); 304 } catch (InvalidProtocolBufferException e) { 305 throw new DeserializationException(e); 306 } 307 308 final CompareOperator compareOp = CompareOperator.valueOf(proto.getCompareOp().name()); 309 final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator; 310 try { 311 comparator = ProtobufUtil.toComparator(proto.getComparator()); 312 } catch (IOException ioe) { 313 throw new DeserializationException(ioe); 314 } 315 316 return new SingleColumnValueFilter( 317 proto.hasColumnFamily() ? proto.getColumnFamily().toByteArray() : null, 318 proto.hasColumnQualifier() ? proto.getColumnQualifier().toByteArray() : null, compareOp, 319 comparator, proto.getFilterIfMissing(), proto.getLatestVersionOnly()); 320 } 321 322 /** 323 * @return true if and only if the fields of the filter that are serialized are equal to the 324 * corresponding fields in other. Used for testing. 325 */ 326 @Override 327 boolean areSerializedFieldsEqual(Filter o) { 328 if (o == this) return true; 329 if (!(o instanceof SingleColumnValueFilter)) return false; 330 331 SingleColumnValueFilter other = (SingleColumnValueFilter) o; 332 return Bytes.equals(this.getFamily(), other.getFamily()) 333 && Bytes.equals(this.getQualifier(), other.getQualifier()) && this.op.equals(other.op) 334 && this.getComparator().areSerializedFieldsEqual(other.getComparator()) 335 && this.getFilterIfMissing() == other.getFilterIfMissing() 336 && this.getLatestVersionOnly() == other.getLatestVersionOnly(); 337 } 338 339 /** 340 * The only CF this filter needs is given column family. So, it's the only essential column in 341 * whole scan. If filterIfMissing == false, all families are essential, because of possibility of 342 * skipping the rows without any data in filtered CF. 343 */ 344 @Override 345 public boolean isFamilyEssential(byte[] name) { 346 return !this.filterIfMissing || Bytes.equals(name, this.columnFamily); 347 } 348 349 @Override 350 public String toString() { 351 return String.format("%s (%s, %s, %s, %s)", this.getClass().getSimpleName(), 352 Bytes.toStringBinary(this.columnFamily), Bytes.toStringBinary(this.columnQualifier), 353 this.op.name(), Bytes.toStringBinary(this.comparator.getValue())); 354 } 355 356 @Override 357 public boolean equals(Object obj) { 358 return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj); 359 } 360 361 @Override 362 public int hashCode() { 363 return Objects.hash(Bytes.hashCode(getFamily()), Bytes.hashCode(getQualifier()), this.op, 364 getComparator(), getFilterIfMissing(), getLatestVersionOnly()); 365 } 366}