001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Objects; 023import org.apache.hadoop.hbase.Cell; 024import org.apache.hadoop.hbase.CellUtil; 025import org.apache.hadoop.hbase.CompareOperator; 026import org.apache.hadoop.hbase.PrivateCellUtil; 027import org.apache.hadoop.hbase.exceptions.DeserializationException; 028import org.apache.hadoop.hbase.util.Bytes; 029import org.apache.yetus.audience.InterfaceAudience; 030 031import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 032import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; 033import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations; 034 035import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 036import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; 037import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; 038import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.CompareType; 039 040/** 041 * This filter is used to filter cells based on value. It takes a {@link CompareOperator} operator 042 * (equal, greater, not equal, etc), and either a byte [] value or a ByteArrayComparable. 043 * <p> 044 * If we have a byte [] value then we just do a lexicographic compare. For example, if passed value 045 * is 'b' and cell has 'a' and the compare operator is LESS, then we will filter out this cell 046 * (return true). If this is not sufficient (eg you want to deserialize a long and then compare it 047 * to a fixed long value), then you can pass in your own comparator instead. 048 * <p> 049 * You must also specify a family and qualifier. Only the value of this column will be tested. When 050 * using this filter on a {@link org.apache.hadoop.hbase.CellScanner} with specified inputs, the 051 * column to be tested should also be added as input (otherwise the filter will regard the column as 052 * missing). 053 * <p> 054 * To prevent the entire row from being emitted if the column is not found on a row, use 055 * {@link #setFilterIfMissing}. Otherwise, if the column is found, the entire row will be emitted 056 * only if the value passes. If the value fails, the row will be filtered out. 057 * <p> 058 * In order to test values of previous versions (timestamps), set {@link #setLatestVersionOnly} to 059 * false. The default is true, meaning that only the latest version's value is tested and all 060 * previous versions are ignored. 061 * <p> 062 * To filter based on the value of all scanned columns, use {@link ValueFilter}. 063 */ 064@InterfaceAudience.Public 065public class SingleColumnValueFilter extends FilterBase { 066 067 protected byte[] columnFamily; 068 protected byte[] columnQualifier; 069 protected CompareOperator op; 070 protected org.apache.hadoop.hbase.filter.ByteArrayComparable comparator; 071 protected boolean foundColumn = false; 072 protected boolean matchedColumn = false; 073 protected boolean filterIfMissing = false; 074 protected boolean latestVersionOnly = true; 075 076 /** 077 * Constructor for binary compare of the value of a single column. If the column is found and the 078 * condition passes, all columns of the row will be emitted. If the condition fails, the row will 079 * not be emitted. 080 * <p> 081 * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be 082 * emitted if the specified column to check is not found in the row. 083 * @param family name of column family 084 * @param qualifier name of column qualifier 085 * @param op operator 086 * @param value value to compare column values against 087 */ 088 public SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 089 final CompareOperator op, final byte[] value) { 090 this(family, qualifier, op, new org.apache.hadoop.hbase.filter.BinaryComparator(value)); 091 } 092 093 /** 094 * Constructor for binary compare of the value of a single column. If the column is found and the 095 * condition passes, all columns of the row will be emitted. If the condition fails, the row will 096 * not be emitted. 097 * <p> 098 * Use the filterIfColumnMissing flag to set whether the rest of the columns in a row will be 099 * emitted if the specified column to check is not found in the row. 100 * @param family name of column family 101 * @param qualifier name of column qualifier 102 * @param op operator 103 * @param comparator Comparator to use. 104 */ 105 public SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 106 final CompareOperator op, final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator) { 107 this.columnFamily = family; 108 this.columnQualifier = qualifier; 109 this.op = op; 110 this.comparator = comparator; 111 } 112 113 /** 114 * Constructor for protobuf deserialization only. 115 */ 116 protected SingleColumnValueFilter(final byte[] family, final byte[] qualifier, 117 final CompareOperator op, org.apache.hadoop.hbase.filter.ByteArrayComparable comparator, 118 final boolean filterIfMissing, final boolean latestVersionOnly) { 119 this(family, qualifier, op, comparator); 120 this.filterIfMissing = filterIfMissing; 121 this.latestVersionOnly = latestVersionOnly; 122 } 123 124 public CompareOperator getCompareOperator() { 125 return op; 126 } 127 128 /** Returns the comparator */ 129 public org.apache.hadoop.hbase.filter.ByteArrayComparable getComparator() { 130 return comparator; 131 } 132 133 /** Returns the family */ 134 public byte[] getFamily() { 135 return columnFamily; 136 } 137 138 /** Returns the qualifier */ 139 public byte[] getQualifier() { 140 return columnQualifier; 141 } 142 143 @Override 144 public boolean filterRowKey(Cell cell) throws IOException { 145 // Impl in FilterBase might do unnecessary copy for Off heap backed Cells. 146 return false; 147 } 148 149 @Override 150 public ReturnCode filterCell(final Cell c) { 151 // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" + 152 // Bytes.toString(keyValue.getValue())); 153 if (this.matchedColumn) { 154 // We already found and matched the single column, all keys now pass 155 return ReturnCode.INCLUDE; 156 } else if (this.latestVersionOnly && this.foundColumn) { 157 // We found but did not match the single column, skip to next row 158 return ReturnCode.NEXT_ROW; 159 } 160 if (!CellUtil.matchingColumn(c, this.columnFamily, this.columnQualifier)) { 161 return ReturnCode.INCLUDE; 162 } 163 foundColumn = true; 164 if (filterColumnValue(c)) { 165 return this.latestVersionOnly ? ReturnCode.NEXT_ROW : ReturnCode.INCLUDE; 166 } 167 this.matchedColumn = true; 168 return ReturnCode.INCLUDE; 169 } 170 171 private boolean filterColumnValue(final Cell cell) { 172 int compareResult = PrivateCellUtil.compareValue(cell, this.comparator); 173 return CompareFilter.compare(this.op, compareResult); 174 } 175 176 @Override 177 public boolean filterRow() { 178 // If column was found, return false if it was matched, true if it was not 179 // If column not found, return true if we filter if missing, false if not 180 return this.foundColumn ? !this.matchedColumn : this.filterIfMissing; 181 } 182 183 @Override 184 public boolean hasFilterRow() { 185 return true; 186 } 187 188 @Override 189 public void reset() { 190 foundColumn = false; 191 matchedColumn = false; 192 } 193 194 /** 195 * Get whether entire row should be filtered if column is not found. 196 * @return true if row should be skipped if column not found, false if row should be let through 197 * anyways 198 */ 199 public boolean getFilterIfMissing() { 200 return filterIfMissing; 201 } 202 203 /** 204 * Set whether entire row should be filtered if column is not found. 205 * <p> 206 * If true, the entire row will be skipped if the column is not found. 207 * <p> 208 * If false, the row will pass if the column is not found. This is default. 209 * @param filterIfMissing flag 210 */ 211 public void setFilterIfMissing(boolean filterIfMissing) { 212 this.filterIfMissing = filterIfMissing; 213 } 214 215 /** 216 * Get whether only the latest version of the column value should be compared. If true, the row 217 * will be returned if only the latest version of the column value matches. If false, the row will 218 * be returned if any version of the column value matches. The default is true. 219 * @return return value 220 */ 221 public boolean getLatestVersionOnly() { 222 return latestVersionOnly; 223 } 224 225 /** 226 * Set whether only the latest version of the column value should be compared. If true, the row 227 * will be returned if only the latest version of the column value matches. If false, the row will 228 * be returned if any version of the column value matches. The default is true. 229 * @param latestVersionOnly flag 230 */ 231 public void setLatestVersionOnly(boolean latestVersionOnly) { 232 this.latestVersionOnly = latestVersionOnly; 233 } 234 235 public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) { 236 Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6, 237 "Expected 4 or 6 but got: %s", filterArguments.size()); 238 byte[] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0)); 239 byte[] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1)); 240 CompareOperator op = ParseFilter.createCompareOperator(filterArguments.get(2)); 241 org.apache.hadoop.hbase.filter.ByteArrayComparable comparator = 242 ParseFilter.createComparator(ParseFilter.removeQuotesFromByteArray(filterArguments.get(3))); 243 244 if (comparator instanceof RegexStringComparator || comparator instanceof SubstringComparator) { 245 if (op != CompareOperator.EQUAL && op != CompareOperator.NOT_EQUAL) { 246 throw new IllegalArgumentException("A regexstring comparator and substring comparator " 247 + "can only be used with EQUAL and NOT_EQUAL"); 248 } 249 } 250 251 SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier, op, comparator); 252 253 if (filterArguments.size() == 6) { 254 boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4)); 255 boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5)); 256 filter.setFilterIfMissing(filterIfMissing); 257 filter.setLatestVersionOnly(latestVersionOnly); 258 } 259 return filter; 260 } 261 262 FilterProtos.SingleColumnValueFilter convert() { 263 FilterProtos.SingleColumnValueFilter.Builder builder = 264 FilterProtos.SingleColumnValueFilter.newBuilder(); 265 if (this.columnFamily != null) { 266 builder.setColumnFamily(UnsafeByteOperations.unsafeWrap(this.columnFamily)); 267 } 268 if (this.columnQualifier != null) { 269 builder.setColumnQualifier(UnsafeByteOperations.unsafeWrap(this.columnQualifier)); 270 } 271 HBaseProtos.CompareType compareOp = CompareType.valueOf(this.op.name()); 272 builder.setCompareOp(compareOp); 273 builder.setComparator(ProtobufUtil.toComparator(this.comparator)); 274 builder.setFilterIfMissing(this.filterIfMissing); 275 builder.setLatestVersionOnly(this.latestVersionOnly); 276 277 return builder.build(); 278 } 279 280 /** Returns The filter serialized using pb */ 281 @Override 282 public byte[] toByteArray() { 283 return convert().toByteArray(); 284 } 285 286 /** 287 * Parse a serialized representation of {@link SingleColumnValueFilter} 288 * @param pbBytes A pb serialized {@link SingleColumnValueFilter} instance 289 * @return An instance of {@link SingleColumnValueFilter} made from <code>bytes</code> 290 * @throws DeserializationException if an error occurred 291 * @see #toByteArray 292 */ 293 public static SingleColumnValueFilter parseFrom(final byte[] pbBytes) 294 throws DeserializationException { 295 FilterProtos.SingleColumnValueFilter proto; 296 try { 297 proto = FilterProtos.SingleColumnValueFilter.parseFrom(pbBytes); 298 } catch (InvalidProtocolBufferException e) { 299 throw new DeserializationException(e); 300 } 301 302 final CompareOperator compareOp = CompareOperator.valueOf(proto.getCompareOp().name()); 303 final org.apache.hadoop.hbase.filter.ByteArrayComparable comparator; 304 try { 305 comparator = ProtobufUtil.toComparator(proto.getComparator()); 306 } catch (IOException ioe) { 307 throw new DeserializationException(ioe); 308 } 309 310 return new SingleColumnValueFilter( 311 proto.hasColumnFamily() ? proto.getColumnFamily().toByteArray() : null, 312 proto.hasColumnQualifier() ? proto.getColumnQualifier().toByteArray() : null, compareOp, 313 comparator, proto.getFilterIfMissing(), proto.getLatestVersionOnly()); 314 } 315 316 /** 317 * Returns true if and only if the fields of the filter that are serialized are equal to the 318 * corresponding fields in other. Used for testing. 319 */ 320 @Override 321 boolean areSerializedFieldsEqual(Filter o) { 322 if (o == this) return true; 323 if (!(o instanceof SingleColumnValueFilter)) return false; 324 325 SingleColumnValueFilter other = (SingleColumnValueFilter) o; 326 return Bytes.equals(this.getFamily(), other.getFamily()) 327 && Bytes.equals(this.getQualifier(), other.getQualifier()) && this.op.equals(other.op) 328 && this.getComparator().areSerializedFieldsEqual(other.getComparator()) 329 && this.getFilterIfMissing() == other.getFilterIfMissing() 330 && this.getLatestVersionOnly() == other.getLatestVersionOnly(); 331 } 332 333 /** 334 * The only CF this filter needs is given column family. So, it's the only essential column in 335 * whole scan. If filterIfMissing == false, all families are essential, because of possibility of 336 * skipping the rows without any data in filtered CF. 337 */ 338 @Override 339 public boolean isFamilyEssential(byte[] name) { 340 return !this.filterIfMissing || Bytes.equals(name, this.columnFamily); 341 } 342 343 @Override 344 public String toString() { 345 return String.format("%s (%s, %s, %s, %s)", this.getClass().getSimpleName(), 346 Bytes.toStringBinary(this.columnFamily), Bytes.toStringBinary(this.columnQualifier), 347 this.op.name(), Bytes.toStringBinary(this.comparator.getValue())); 348 } 349 350 @Override 351 public boolean equals(Object obj) { 352 return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj); 353 } 354 355 @Override 356 public int hashCode() { 357 return Objects.hash(Bytes.hashCode(getFamily()), Bytes.hashCode(getQualifier()), this.op, 358 getComparator(), getFilterIfMissing(), getLatestVersionOnly()); 359 } 360}