001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Comparator; 024import java.util.List; 025import java.util.Objects; 026import java.util.PriorityQueue; 027import org.apache.hadoop.hbase.Cell; 028import org.apache.hadoop.hbase.CellComparator; 029import org.apache.hadoop.hbase.PrivateCellUtil; 030import org.apache.hadoop.hbase.exceptions.DeserializationException; 031import org.apache.hadoop.hbase.unsafe.HBasePlatformDependent; 032import org.apache.hadoop.hbase.util.Bytes; 033import org.apache.hadoop.hbase.util.Pair; 034import org.apache.yetus.audience.InterfaceAudience; 035 036import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; 037import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations; 038 039import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; 040import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.BytesBytesPair; 041 042/** 043 * This is optimized version of a standard FuzzyRowFilter Filters data based on fuzzy row key. 044 * Performs fast-forwards during scanning. It takes pairs (row key, fuzzy info) to match row keys. 045 * Where fuzzy info is a byte array with 0 or 1 as its values: 046 * <ul> 047 * <li>0 - means that this byte in provided row key is fixed, i.e. row key's byte at same position 048 * must match</li> 049 * <li>1 - means that this byte in provided row key is NOT fixed, i.e. row key's byte at this 050 * position can be different from the one in provided row key</li> 051 * </ul> 052 * Example: 053 * <p/> 054 * Let's assume row key format is userId_actionId_year_month. Length of userId is fixed and is 4, 055 * length of actionId is 2 and year and month are 4 and 2 bytes long respectively. 056 * <p/> 057 * Let's assume that we need to fetch all users that performed certain action (encoded as "99") in 058 * Jan of any year. Then the pair (row key, fuzzy info) would be the following: 059 * 060 * <pre> 061 * row key = "????_99_????_01" (one can use any value instead of "?") 062 * fuzzy info = "\x01\x01\x01\x01\x00\x00\x00\x00\x01\x01\x01\x01\x00\x00\x00" 063 * </pre> 064 * 065 * I.e. fuzzy info tells the matching mask is "????_99_????_01", where at ? can be any value. 066 */ 067@InterfaceAudience.Public 068public class FuzzyRowFilter extends FilterBase implements HintingFilter { 069 private static final boolean UNSAFE_UNALIGNED = HBasePlatformDependent.unaligned(); 070 private List<Pair<byte[], byte[]>> fuzzyKeysData; 071 // Used to record whether we want to skip the current row. 072 // Usually we should use filterRowKey here but in the current scan implementation, if filterRowKey 073 // returns true, we will just skip to next row, instead of calling getNextCellHint to determine 074 // the actual next row, so we need to implement filterCell and return SEEK_NEXT_USING_HINT to let 075 // upper layer call getNextCellHint. 076 // And if we do not implement filterRow, sometimes we will get incorrect result when using 077 // FuzzyRowFilter together with other filters, please see the description for HBASE-26967 for more 078 // details. 079 private boolean filterRow; 080 private boolean done = false; 081 082 /** 083 * The index of a last successfully found matching fuzzy string (in fuzzyKeysData). We will start 084 * matching next KV with this one. If they do not match then we will return back to the one-by-one 085 * iteration over fuzzyKeysData. 086 */ 087 private int lastFoundIndex = -1; 088 089 /** 090 * Row tracker (keeps all next rows after SEEK_NEXT_USING_HINT was returned) 091 */ 092 private RowTracker tracker; 093 094 public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) { 095 List<Pair<byte[], byte[]>> fuzzyKeyDataCopy = new ArrayList<>(fuzzyKeysData.size()); 096 097 for (Pair<byte[], byte[]> aFuzzyKeysData : fuzzyKeysData) { 098 if (aFuzzyKeysData.getFirst().length != aFuzzyKeysData.getSecond().length) { 099 Pair<String, String> readable = new Pair<>(Bytes.toStringBinary(aFuzzyKeysData.getFirst()), 100 Bytes.toStringBinary(aFuzzyKeysData.getSecond())); 101 throw new IllegalArgumentException("Fuzzy pair lengths do not match: " + readable); 102 } 103 104 Pair<byte[], byte[]> p = new Pair<>(); 105 // create a copy of pair bytes so that they are not modified by the filter. 106 p.setFirst(Arrays.copyOf(aFuzzyKeysData.getFirst(), aFuzzyKeysData.getFirst().length)); 107 p.setSecond(Arrays.copyOf(aFuzzyKeysData.getSecond(), aFuzzyKeysData.getSecond().length)); 108 109 // update mask ( 0 -> -1 (0xff), 1 -> 2) 110 p.setSecond(preprocessMask(p.getSecond())); 111 preprocessSearchKey(p); 112 113 fuzzyKeyDataCopy.add(p); 114 } 115 this.fuzzyKeysData = fuzzyKeyDataCopy; 116 this.tracker = new RowTracker(); 117 } 118 119 private void preprocessSearchKey(Pair<byte[], byte[]> p) { 120 if (!UNSAFE_UNALIGNED) { 121 // do nothing 122 return; 123 } 124 byte[] key = p.getFirst(); 125 byte[] mask = p.getSecond(); 126 for (int i = 0; i < mask.length; i++) { 127 // set non-fixed part of a search key to 0. 128 if (mask[i] == 2) { 129 key[i] = 0; 130 } 131 } 132 } 133 134 /** 135 * We need to preprocess mask array, as since we treat 2's as unfixed positions and -1 (0xff) as 136 * fixed positions 137 * @return mask array 138 */ 139 private byte[] preprocessMask(byte[] mask) { 140 if (!UNSAFE_UNALIGNED) { 141 // do nothing 142 return mask; 143 } 144 if (isPreprocessedMask(mask)) return mask; 145 for (int i = 0; i < mask.length; i++) { 146 if (mask[i] == 0) { 147 mask[i] = -1; // 0 -> -1 148 } else if (mask[i] == 1) { 149 mask[i] = 2;// 1 -> 2 150 } 151 } 152 return mask; 153 } 154 155 private boolean isPreprocessedMask(byte[] mask) { 156 for (int i = 0; i < mask.length; i++) { 157 if (mask[i] != -1 && mask[i] != 2) { 158 return false; 159 } 160 } 161 return true; 162 } 163 164 /** 165 * Returns the Fuzzy keys in the format expected by the constructor. 166 * @return the Fuzzy keys in the format expected by the constructor 167 */ 168 public List<Pair<byte[], byte[]>> getFuzzyKeys() { 169 List<Pair<byte[], byte[]>> returnList = new ArrayList<>(fuzzyKeysData.size()); 170 for (Pair<byte[], byte[]> fuzzyKey : fuzzyKeysData) { 171 Pair<byte[], byte[]> returnKey = new Pair<>(); 172 // This won't revert the original key's don't care values, but we don't care. 173 returnKey.setFirst(Arrays.copyOf(fuzzyKey.getFirst(), fuzzyKey.getFirst().length)); 174 byte[] returnMask = Arrays.copyOf(fuzzyKey.getSecond(), fuzzyKey.getSecond().length); 175 if (UNSAFE_UNALIGNED && isPreprocessedMask(returnMask)) { 176 // Revert the preprocessing. 177 for (int i = 0; i < returnMask.length; i++) { 178 if (returnMask[i] == -1) { 179 returnMask[i] = 0; // -1 >> 0 180 } else if (returnMask[i] == 2) { 181 returnMask[i] = 1;// 2 >> 1 182 } 183 } 184 } 185 returnKey.setSecond(returnMask); 186 returnList.add(returnKey); 187 } 188 return returnList; 189 } 190 191 @Override 192 public void reset() throws IOException { 193 filterRow = false; 194 } 195 196 @Override 197 public boolean filterRow() throws IOException { 198 return filterRow; 199 } 200 201 @Override 202 public ReturnCode filterCell(final Cell c) { 203 final int startIndex = lastFoundIndex >= 0 ? lastFoundIndex : 0; 204 final int size = fuzzyKeysData.size(); 205 for (int i = startIndex; i < size + startIndex; i++) { 206 final int index = i % size; 207 Pair<byte[], byte[]> fuzzyData = fuzzyKeysData.get(index); 208 // This shift is idempotent - always end up with 0 and -1 as mask values. 209 for (int j = 0; j < fuzzyData.getSecond().length; j++) { 210 fuzzyData.getSecond()[j] >>= 2; 211 } 212 SatisfiesCode satisfiesCode = satisfies(isReversed(), c.getRowArray(), c.getRowOffset(), 213 c.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond()); 214 if (satisfiesCode == SatisfiesCode.YES) { 215 lastFoundIndex = index; 216 return ReturnCode.INCLUDE; 217 } 218 } 219 // NOT FOUND -> seek next using hint 220 lastFoundIndex = -1; 221 filterRow = true; 222 return ReturnCode.SEEK_NEXT_USING_HINT; 223 224 } 225 226 @Override 227 public Cell getNextCellHint(Cell currentCell) { 228 boolean result = tracker.updateTracker(currentCell); 229 if (result == false) { 230 done = true; 231 return null; 232 } 233 byte[] nextRowKey = tracker.nextRow(); 234 return PrivateCellUtil.createFirstOnRow(nextRowKey, 0, (short) nextRowKey.length); 235 } 236 237 /** 238 * If we have multiple fuzzy keys, row tracker should improve overall performance. It calculates 239 * all next rows (one per every fuzzy key) and put them (the fuzzy key is bundled) into a priority 240 * queue so that the smallest row key always appears at queue head, which helps to decide the 241 * "Next Cell Hint". As scanning going on, the number of candidate rows in the RowTracker will 242 * remain the size of fuzzy keys until some of the fuzzy keys won't possibly have matches any 243 * more. 244 */ 245 private class RowTracker { 246 private final PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>> nextRows; 247 private boolean initialized = false; 248 249 RowTracker() { 250 nextRows = new PriorityQueue<>(fuzzyKeysData.size(), 251 new Comparator<Pair<byte[], Pair<byte[], byte[]>>>() { 252 @Override 253 public int compare(Pair<byte[], Pair<byte[], byte[]>> o1, 254 Pair<byte[], Pair<byte[], byte[]>> o2) { 255 return isReversed() 256 ? Bytes.compareTo(o2.getFirst(), o1.getFirst()) 257 : Bytes.compareTo(o1.getFirst(), o2.getFirst()); 258 } 259 }); 260 } 261 262 byte[] nextRow() { 263 if (nextRows.isEmpty()) { 264 throw new IllegalStateException("NextRows should not be empty, " 265 + "make sure to call nextRow() after updateTracker() return true"); 266 } else { 267 return nextRows.peek().getFirst(); 268 } 269 } 270 271 boolean updateTracker(Cell currentCell) { 272 if (!initialized) { 273 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) { 274 updateWith(currentCell, fuzzyData); 275 } 276 initialized = true; 277 } else { 278 while (!nextRows.isEmpty() && !lessThan(currentCell, nextRows.peek().getFirst())) { 279 Pair<byte[], Pair<byte[], byte[]>> head = nextRows.poll(); 280 Pair<byte[], byte[]> fuzzyData = head.getSecond(); 281 updateWith(currentCell, fuzzyData); 282 } 283 } 284 return !nextRows.isEmpty(); 285 } 286 287 boolean lessThan(Cell currentCell, byte[] nextRowKey) { 288 int compareResult = 289 CellComparator.getInstance().compareRows(currentCell, nextRowKey, 0, nextRowKey.length); 290 return (!isReversed() && compareResult < 0) || (isReversed() && compareResult > 0); 291 } 292 293 void updateWith(Cell currentCell, Pair<byte[], byte[]> fuzzyData) { 294 byte[] nextRowKeyCandidate = 295 getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), currentCell.getRowOffset(), 296 currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond()); 297 if (nextRowKeyCandidate != null) { 298 nextRows.add(new Pair<>(nextRowKeyCandidate, fuzzyData)); 299 } 300 } 301 302 } 303 304 @Override 305 public boolean filterAllRemaining() { 306 return done; 307 } 308 309 /** Returns The filter serialized using pb */ 310 @Override 311 public byte[] toByteArray() { 312 FilterProtos.FuzzyRowFilter.Builder builder = FilterProtos.FuzzyRowFilter.newBuilder(); 313 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) { 314 BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder(); 315 bbpBuilder.setFirst(UnsafeByteOperations.unsafeWrap(fuzzyData.getFirst())); 316 bbpBuilder.setSecond(UnsafeByteOperations.unsafeWrap(fuzzyData.getSecond())); 317 builder.addFuzzyKeysData(bbpBuilder); 318 } 319 return builder.build().toByteArray(); 320 } 321 322 /** 323 * Parse a serialized representation of {@link FuzzyRowFilter} 324 * @param pbBytes A pb serialized {@link FuzzyRowFilter} instance 325 * @return An instance of {@link FuzzyRowFilter} made from <code>bytes</code> 326 * @throws DeserializationException if an error occurred 327 * @see #toByteArray 328 */ 329 public static FuzzyRowFilter parseFrom(final byte[] pbBytes) throws DeserializationException { 330 FilterProtos.FuzzyRowFilter proto; 331 try { 332 proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes); 333 } catch (InvalidProtocolBufferException e) { 334 throw new DeserializationException(e); 335 } 336 int count = proto.getFuzzyKeysDataCount(); 337 ArrayList<Pair<byte[], byte[]>> fuzzyKeysData = new ArrayList<>(count); 338 for (int i = 0; i < count; ++i) { 339 BytesBytesPair current = proto.getFuzzyKeysData(i); 340 byte[] keyBytes = current.getFirst().toByteArray(); 341 byte[] keyMeta = current.getSecond().toByteArray(); 342 fuzzyKeysData.add(new Pair<>(keyBytes, keyMeta)); 343 } 344 return new FuzzyRowFilter(fuzzyKeysData); 345 } 346 347 @Override 348 public String toString() { 349 final StringBuilder sb = new StringBuilder(); 350 sb.append("FuzzyRowFilter"); 351 sb.append("{fuzzyKeysData="); 352 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) { 353 sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":"); 354 sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}'); 355 } 356 sb.append("}, "); 357 return sb.toString(); 358 } 359 360 // Utility methods 361 362 static enum SatisfiesCode { 363 /** row satisfies fuzzy rule */ 364 YES, 365 /** row doesn't satisfy fuzzy rule, but there's possible greater row that does */ 366 NEXT_EXISTS, 367 /** row doesn't satisfy fuzzy rule and there's no greater row that does */ 368 NO_NEXT 369 } 370 371 static SatisfiesCode satisfies(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { 372 return satisfies(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta); 373 } 374 375 static SatisfiesCode satisfies(boolean reverse, byte[] row, byte[] fuzzyKeyBytes, 376 byte[] fuzzyKeyMeta) { 377 return satisfies(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta); 378 } 379 380 static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int length, 381 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { 382 383 if (!UNSAFE_UNALIGNED) { 384 return satisfiesNoUnsafe(reverse, row, offset, length, fuzzyKeyBytes, fuzzyKeyMeta); 385 } 386 387 if (row == null) { 388 // do nothing, let scan to proceed 389 return SatisfiesCode.YES; 390 } 391 length = Math.min(length, fuzzyKeyBytes.length); 392 int numWords = length / Bytes.SIZEOF_LONG; 393 394 int j = numWords << 3; // numWords * SIZEOF_LONG; 395 396 for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) { 397 long fuzzyBytes = Bytes.toLong(fuzzyKeyBytes, i); 398 long fuzzyMeta = Bytes.toLong(fuzzyKeyMeta, i); 399 long rowValue = Bytes.toLong(row, offset + i); 400 if ((rowValue & fuzzyMeta) != fuzzyBytes) { 401 // We always return NEXT_EXISTS 402 return SatisfiesCode.NEXT_EXISTS; 403 } 404 } 405 406 int off = j; 407 408 if (length - off >= Bytes.SIZEOF_INT) { 409 int fuzzyBytes = Bytes.toInt(fuzzyKeyBytes, off); 410 int fuzzyMeta = Bytes.toInt(fuzzyKeyMeta, off); 411 int rowValue = Bytes.toInt(row, offset + off); 412 if ((rowValue & fuzzyMeta) != fuzzyBytes) { 413 // We always return NEXT_EXISTS 414 return SatisfiesCode.NEXT_EXISTS; 415 } 416 off += Bytes.SIZEOF_INT; 417 } 418 419 if (length - off >= Bytes.SIZEOF_SHORT) { 420 short fuzzyBytes = Bytes.toShort(fuzzyKeyBytes, off); 421 short fuzzyMeta = Bytes.toShort(fuzzyKeyMeta, off); 422 short rowValue = Bytes.toShort(row, offset + off); 423 if ((rowValue & fuzzyMeta) != fuzzyBytes) { 424 // We always return NEXT_EXISTS 425 // even if it does not (in this case getNextForFuzzyRule 426 // will return null) 427 return SatisfiesCode.NEXT_EXISTS; 428 } 429 off += Bytes.SIZEOF_SHORT; 430 } 431 432 if (length - off >= Bytes.SIZEOF_BYTE) { 433 int fuzzyBytes = fuzzyKeyBytes[off] & 0xff; 434 int fuzzyMeta = fuzzyKeyMeta[off] & 0xff; 435 int rowValue = row[offset + off] & 0xff; 436 if ((rowValue & fuzzyMeta) != fuzzyBytes) { 437 // We always return NEXT_EXISTS 438 return SatisfiesCode.NEXT_EXISTS; 439 } 440 } 441 return SatisfiesCode.YES; 442 } 443 444 static SatisfiesCode satisfiesNoUnsafe(boolean reverse, byte[] row, int offset, int length, 445 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { 446 if (row == null) { 447 // do nothing, let scan to proceed 448 return SatisfiesCode.YES; 449 } 450 451 Order order = Order.orderFor(reverse); 452 boolean nextRowKeyCandidateExists = false; 453 454 for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) { 455 // First, checking if this position is fixed and not equals the given one 456 boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0; 457 boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset]; 458 if (fixedByteIncorrect) { 459 // in this case there's another row that satisfies fuzzy rule and bigger than this row 460 if (nextRowKeyCandidateExists) { 461 return SatisfiesCode.NEXT_EXISTS; 462 } 463 464 // If this row byte is less than fixed then there's a byte array bigger than 465 // this row and which satisfies the fuzzy rule. Otherwise there's no such byte array: 466 // this row is simply bigger than any byte array that satisfies the fuzzy rule 467 boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF); 468 if (rowByteLessThanFixed && !reverse) { 469 return SatisfiesCode.NEXT_EXISTS; 470 } else if (!rowByteLessThanFixed && reverse) { 471 return SatisfiesCode.NEXT_EXISTS; 472 } else { 473 return SatisfiesCode.NO_NEXT; 474 } 475 } 476 477 // Second, checking if this position is not fixed and byte value is not the biggest. In this 478 // case there's a byte array bigger than this row and which satisfies the fuzzy rule. To get 479 // bigger byte array that satisfies the rule we need to just increase this byte 480 // (see the code of getNextForFuzzyRule below) by one. 481 // Note: if non-fixed byte is already at biggest value, this doesn't allow us to say there's 482 // bigger one that satisfies the rule as it can't be increased. 483 if (fuzzyKeyMeta[i] == 1 && !order.isMax(fuzzyKeyBytes[i])) { 484 nextRowKeyCandidateExists = true; 485 } 486 } 487 return SatisfiesCode.YES; 488 } 489 490 static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { 491 return getNextForFuzzyRule(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta); 492 } 493 494 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, byte[] fuzzyKeyBytes, 495 byte[] fuzzyKeyMeta) { 496 return getNextForFuzzyRule(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta); 497 } 498 499 /** Abstracts directional comparisons based on scan direction. */ 500 private enum Order { 501 ASC { 502 @Override 503 public boolean lt(int lhs, int rhs) { 504 return lhs < rhs; 505 } 506 507 @Override 508 public boolean gt(int lhs, int rhs) { 509 return lhs > rhs; 510 } 511 512 @Override 513 public byte inc(byte val) { 514 // TODO: what about over/underflow? 515 return (byte) (val + 1); 516 } 517 518 @Override 519 public boolean isMax(byte val) { 520 return val == (byte) 0xff; 521 } 522 523 @Override 524 public byte min() { 525 return 0; 526 } 527 }, 528 DESC { 529 @Override 530 public boolean lt(int lhs, int rhs) { 531 return lhs > rhs; 532 } 533 534 @Override 535 public boolean gt(int lhs, int rhs) { 536 return lhs < rhs; 537 } 538 539 @Override 540 public byte inc(byte val) { 541 // TODO: what about over/underflow? 542 return (byte) (val - 1); 543 } 544 545 @Override 546 public boolean isMax(byte val) { 547 return val == 0; 548 } 549 550 @Override 551 public byte min() { 552 return (byte) 0xFF; 553 } 554 }; 555 556 public static Order orderFor(boolean reverse) { 557 return reverse ? DESC : ASC; 558 } 559 560 /** Returns true when {@code lhs < rhs}. */ 561 public abstract boolean lt(int lhs, int rhs); 562 563 /** Returns true when {@code lhs > rhs}. */ 564 public abstract boolean gt(int lhs, int rhs); 565 566 /** Returns {@code val} incremented by 1. */ 567 public abstract byte inc(byte val); 568 569 /** Return true when {@code val} is the maximum value */ 570 public abstract boolean isMax(byte val); 571 572 /** Return the minimum value according to this ordering scheme. */ 573 public abstract byte min(); 574 } 575 576 /** 577 * @return greater byte array than given (row) which satisfies the fuzzy rule if it exists, null 578 * otherwise 579 */ 580 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length, 581 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { 582 // To find out the next "smallest" byte array that satisfies fuzzy rule and "greater" than 583 // the given one we do the following: 584 // 1. setting values on all "fixed" positions to the values from fuzzyKeyBytes 585 // 2. if during the first step given row did not increase, then we increase the value at 586 // the first "non-fixed" position (where it is not maximum already) 587 588 // It is easier to perform this by using fuzzyKeyBytes copy and setting "non-fixed" position 589 // values than otherwise. 590 byte[] result = 591 Arrays.copyOf(fuzzyKeyBytes, length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length); 592 if (reverse && length > fuzzyKeyBytes.length) { 593 // we need trailing 0xff's instead of trailing 0x00's 594 for (int i = fuzzyKeyBytes.length; i < result.length; i++) { 595 result[i] = (byte) 0xFF; 596 } 597 } 598 int toInc = -1; 599 final Order order = Order.orderFor(reverse); 600 601 boolean increased = false; 602 for (int i = 0; i < result.length; i++) { 603 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0 /* non-fixed */) { 604 result[i] = row[offset + i]; 605 if (!order.isMax(row[offset + i])) { 606 // this is "non-fixed" position and is not at max value, hence we can increase it 607 toInc = i; 608 } 609 } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == -1 /* fixed */) { 610 if (order.lt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) { 611 // if setting value for any fixed position increased the original array, 612 // we are OK 613 increased = true; 614 break; 615 } 616 617 if (order.gt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) { 618 // if setting value for any fixed position makes array "smaller", then just stop: 619 // in case we found some non-fixed position to increase we will do it, otherwise 620 // there's no "next" row key that satisfies fuzzy rule and "greater" than given row 621 break; 622 } 623 } 624 } 625 626 if (!increased) { 627 if (toInc < 0) { 628 return null; 629 } 630 result[toInc] = order.inc(result[toInc]); 631 632 // Setting all "non-fixed" positions to zeroes to the right of the one we increased so 633 // that found "next" row key is the smallest possible 634 for (int i = toInc + 1; i < result.length; i++) { 635 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0 /* non-fixed */) { 636 result[i] = order.min(); 637 } 638 } 639 } 640 641 return reverse ? result : trimTrailingZeroes(result, fuzzyKeyMeta, toInc); 642 } 643 644 /** 645 * For forward scanner, next cell hint should not contain any trailing zeroes unless they are part 646 * of fuzzyKeyMeta hint = '\x01\x01\x01\x00\x00' will skip valid row '\x01\x01\x01' 647 * @param toInc - position of incremented byte 648 * @return trimmed version of result 649 */ 650 651 private static byte[] trimTrailingZeroes(byte[] result, byte[] fuzzyKeyMeta, int toInc) { 652 int off = fuzzyKeyMeta.length >= result.length ? result.length - 1 : fuzzyKeyMeta.length - 1; 653 for (; off >= 0; off--) { 654 if (fuzzyKeyMeta[off] != 0) break; 655 } 656 if (off < toInc) off = toInc; 657 byte[] retValue = new byte[off + 1]; 658 System.arraycopy(result, 0, retValue, 0, retValue.length); 659 return retValue; 660 } 661 662 /** 663 * Returns true if and only if the fields of the filter that are serialized are equal to the 664 * corresponding fields in other. Used for testing. 665 */ 666 @Override 667 boolean areSerializedFieldsEqual(Filter o) { 668 if (o == this) { 669 return true; 670 } 671 if (!(o instanceof FuzzyRowFilter)) { 672 return false; 673 } 674 FuzzyRowFilter other = (FuzzyRowFilter) o; 675 if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false; 676 for (int i = 0; i < fuzzyKeysData.size(); ++i) { 677 Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i); 678 Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i); 679 if ( 680 !(Bytes.equals(thisData.getFirst(), otherData.getFirst()) 681 && Bytes.equals(thisData.getSecond(), otherData.getSecond())) 682 ) { 683 return false; 684 } 685 } 686 return true; 687 } 688 689 @Override 690 public boolean equals(Object obj) { 691 return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj); 692 } 693 694 @Override 695 public int hashCode() { 696 return Objects.hash(this.fuzzyKeysData); 697 } 698}