001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Comparator;
024import java.util.List;
025import java.util.Objects;
026import java.util.PriorityQueue;
027import org.apache.hadoop.hbase.Cell;
028import org.apache.hadoop.hbase.CellComparator;
029import org.apache.hadoop.hbase.PrivateCellUtil;
030import org.apache.hadoop.hbase.exceptions.DeserializationException;
031import org.apache.hadoop.hbase.unsafe.HBasePlatformDependent;
032import org.apache.hadoop.hbase.util.Bytes;
033import org.apache.hadoop.hbase.util.Pair;
034import org.apache.yetus.audience.InterfaceAudience;
035
036import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
037import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
038
039import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
040import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.BytesBytesPair;
041
042/**
043 * This is optimized version of a standard FuzzyRowFilter Filters data based on fuzzy row key.
044 * Performs fast-forwards during scanning. It takes pairs (row key, fuzzy info) to match row keys.
045 * Where fuzzy info is a byte array with 0 or 1 as its values:
046 * <ul>
047 * <li>0 - means that this byte in provided row key is fixed, i.e. row key's byte at same position
048 * must match</li>
049 * <li>1 - means that this byte in provided row key is NOT fixed, i.e. row key's byte at this
050 * position can be different from the one in provided row key</li>
051 * </ul>
052 * Example:
053 * <p/>
054 * Let's assume row key format is userId_actionId_year_month. Length of userId is fixed and is 4,
055 * length of actionId is 2 and year and month are 4 and 2 bytes long respectively.
056 * <p/>
057 * Let's assume that we need to fetch all users that performed certain action (encoded as "99") in
058 * Jan of any year. Then the pair (row key, fuzzy info) would be the following:
059 *
060 * <pre>
061 * row key = "????_99_????_01" (one can use any value instead of "?")
062 * fuzzy info = "\x01\x01\x01\x01\x00\x00\x00\x00\x01\x01\x01\x01\x00\x00\x00"
063 * </pre>
064 *
065 * I.e. fuzzy info tells the matching mask is "????_99_????_01", where at ? can be any value.
066 */
067@InterfaceAudience.Public
068public class FuzzyRowFilter extends FilterBase implements HintingFilter {
069  private static final boolean UNSAFE_UNALIGNED = HBasePlatformDependent.unaligned();
070  private List<Pair<byte[], byte[]>> fuzzyKeysData;
071  // Used to record whether we want to skip the current row.
072  // Usually we should use filterRowKey here but in the current scan implementation, if filterRowKey
073  // returns true, we will just skip to next row, instead of calling getNextCellHint to determine
074  // the actual next row, so we need to implement filterCell and return SEEK_NEXT_USING_HINT to let
075  // upper layer call getNextCellHint.
076  // And if we do not implement filterRow, sometimes we will get incorrect result when using
077  // FuzzyRowFilter together with other filters, please see the description for HBASE-26967 for more
078  // details.
079  private boolean filterRow;
080  private boolean done = false;
081
082  /**
083   * The index of a last successfully found matching fuzzy string (in fuzzyKeysData). We will start
084   * matching next KV with this one. If they do not match then we will return back to the one-by-one
085   * iteration over fuzzyKeysData.
086   */
087  private int lastFoundIndex = -1;
088
089  /**
090   * Row tracker (keeps all next rows after SEEK_NEXT_USING_HINT was returned)
091   */
092  private RowTracker tracker;
093
094  public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
095    List<Pair<byte[], byte[]>> fuzzyKeyDataCopy = new ArrayList<>(fuzzyKeysData.size());
096
097    for (Pair<byte[], byte[]> aFuzzyKeysData : fuzzyKeysData) {
098      if (aFuzzyKeysData.getFirst().length != aFuzzyKeysData.getSecond().length) {
099        Pair<String, String> readable = new Pair<>(Bytes.toStringBinary(aFuzzyKeysData.getFirst()),
100          Bytes.toStringBinary(aFuzzyKeysData.getSecond()));
101        throw new IllegalArgumentException("Fuzzy pair lengths do not match: " + readable);
102      }
103
104      Pair<byte[], byte[]> p = new Pair<>();
105      // create a copy of pair bytes so that they are not modified by the filter.
106      p.setFirst(Arrays.copyOf(aFuzzyKeysData.getFirst(), aFuzzyKeysData.getFirst().length));
107      p.setSecond(Arrays.copyOf(aFuzzyKeysData.getSecond(), aFuzzyKeysData.getSecond().length));
108
109      // update mask ( 0 -> -1 (0xff), 1 -> 2)
110      p.setSecond(preprocessMask(p.getSecond()));
111      preprocessSearchKey(p);
112
113      fuzzyKeyDataCopy.add(p);
114    }
115    this.fuzzyKeysData = fuzzyKeyDataCopy;
116    this.tracker = new RowTracker();
117  }
118
119  private void preprocessSearchKey(Pair<byte[], byte[]> p) {
120    if (!UNSAFE_UNALIGNED) {
121      // do nothing
122      return;
123    }
124    byte[] key = p.getFirst();
125    byte[] mask = p.getSecond();
126    for (int i = 0; i < mask.length; i++) {
127      // set non-fixed part of a search key to 0.
128      if (mask[i] == 2) {
129        key[i] = 0;
130      }
131    }
132  }
133
134  /**
135   * We need to preprocess mask array, as since we treat 2's as unfixed positions and -1 (0xff) as
136   * fixed positions
137   * @return mask array
138   */
139  private byte[] preprocessMask(byte[] mask) {
140    if (!UNSAFE_UNALIGNED) {
141      // do nothing
142      return mask;
143    }
144    if (isPreprocessedMask(mask)) return mask;
145    for (int i = 0; i < mask.length; i++) {
146      if (mask[i] == 0) {
147        mask[i] = -1; // 0 -> -1
148      } else if (mask[i] == 1) {
149        mask[i] = 2;// 1 -> 2
150      }
151    }
152    return mask;
153  }
154
155  private boolean isPreprocessedMask(byte[] mask) {
156    for (int i = 0; i < mask.length; i++) {
157      if (mask[i] != -1 && mask[i] != 2) {
158        return false;
159      }
160    }
161    return true;
162  }
163
164  /**
165   * Returns the Fuzzy keys in the format expected by the constructor.
166   * @return the Fuzzy keys in the format expected by the constructor
167   */
168  public List<Pair<byte[], byte[]>> getFuzzyKeys() {
169    List<Pair<byte[], byte[]>> returnList = new ArrayList<>(fuzzyKeysData.size());
170    for (Pair<byte[], byte[]> fuzzyKey : fuzzyKeysData) {
171      Pair<byte[], byte[]> returnKey = new Pair<>();
172      // This won't revert the original key's don't care values, but we don't care.
173      returnKey.setFirst(Arrays.copyOf(fuzzyKey.getFirst(), fuzzyKey.getFirst().length));
174      byte[] returnMask = Arrays.copyOf(fuzzyKey.getSecond(), fuzzyKey.getSecond().length);
175      if (UNSAFE_UNALIGNED && isPreprocessedMask(returnMask)) {
176        // Revert the preprocessing.
177        for (int i = 0; i < returnMask.length; i++) {
178          if (returnMask[i] == -1) {
179            returnMask[i] = 0; // -1 >> 0
180          } else if (returnMask[i] == 2) {
181            returnMask[i] = 1;// 2 >> 1
182          }
183        }
184      }
185      returnKey.setSecond(returnMask);
186      returnList.add(returnKey);
187    }
188    return returnList;
189  }
190
191  @Override
192  public void reset() throws IOException {
193    filterRow = false;
194  }
195
196  @Override
197  public boolean filterRow() throws IOException {
198    return filterRow;
199  }
200
201  @Override
202  public ReturnCode filterCell(final Cell c) {
203    final int startIndex = lastFoundIndex >= 0 ? lastFoundIndex : 0;
204    final int size = fuzzyKeysData.size();
205    for (int i = startIndex; i < size + startIndex; i++) {
206      final int index = i % size;
207      Pair<byte[], byte[]> fuzzyData = fuzzyKeysData.get(index);
208      // This shift is idempotent - always end up with 0 and -1 as mask values.
209      for (int j = 0; j < fuzzyData.getSecond().length; j++) {
210        fuzzyData.getSecond()[j] >>= 2;
211      }
212      SatisfiesCode satisfiesCode = satisfies(isReversed(), c.getRowArray(), c.getRowOffset(),
213        c.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond());
214      if (satisfiesCode == SatisfiesCode.YES) {
215        lastFoundIndex = index;
216        return ReturnCode.INCLUDE;
217      }
218    }
219    // NOT FOUND -> seek next using hint
220    lastFoundIndex = -1;
221    filterRow = true;
222    return ReturnCode.SEEK_NEXT_USING_HINT;
223
224  }
225
226  @Override
227  public Cell getNextCellHint(Cell currentCell) {
228    boolean result = tracker.updateTracker(currentCell);
229    if (result == false) {
230      done = true;
231      return null;
232    }
233    byte[] nextRowKey = tracker.nextRow();
234    return PrivateCellUtil.createFirstOnRow(nextRowKey, 0, (short) nextRowKey.length);
235  }
236
237  /**
238   * If we have multiple fuzzy keys, row tracker should improve overall performance. It calculates
239   * all next rows (one per every fuzzy key) and put them (the fuzzy key is bundled) into a priority
240   * queue so that the smallest row key always appears at queue head, which helps to decide the
241   * "Next Cell Hint". As scanning going on, the number of candidate rows in the RowTracker will
242   * remain the size of fuzzy keys until some of the fuzzy keys won't possibly have matches any
243   * more.
244   */
245  private class RowTracker {
246    private final PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>> nextRows;
247    private boolean initialized = false;
248
249    RowTracker() {
250      nextRows = new PriorityQueue<>(fuzzyKeysData.size(),
251        new Comparator<Pair<byte[], Pair<byte[], byte[]>>>() {
252          @Override
253          public int compare(Pair<byte[], Pair<byte[], byte[]>> o1,
254            Pair<byte[], Pair<byte[], byte[]>> o2) {
255            return isReversed()
256              ? Bytes.compareTo(o2.getFirst(), o1.getFirst())
257              : Bytes.compareTo(o1.getFirst(), o2.getFirst());
258          }
259        });
260    }
261
262    byte[] nextRow() {
263      if (nextRows.isEmpty()) {
264        throw new IllegalStateException("NextRows should not be empty, "
265          + "make sure to call nextRow() after updateTracker() return true");
266      } else {
267        return nextRows.peek().getFirst();
268      }
269    }
270
271    boolean updateTracker(Cell currentCell) {
272      if (!initialized) {
273        for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
274          updateWith(currentCell, fuzzyData);
275        }
276        initialized = true;
277      } else {
278        while (!nextRows.isEmpty() && !lessThan(currentCell, nextRows.peek().getFirst())) {
279          Pair<byte[], Pair<byte[], byte[]>> head = nextRows.poll();
280          Pair<byte[], byte[]> fuzzyData = head.getSecond();
281          updateWith(currentCell, fuzzyData);
282        }
283      }
284      return !nextRows.isEmpty();
285    }
286
287    boolean lessThan(Cell currentCell, byte[] nextRowKey) {
288      int compareResult =
289        CellComparator.getInstance().compareRows(currentCell, nextRowKey, 0, nextRowKey.length);
290      return (!isReversed() && compareResult < 0) || (isReversed() && compareResult > 0);
291    }
292
293    void updateWith(Cell currentCell, Pair<byte[], byte[]> fuzzyData) {
294      byte[] nextRowKeyCandidate =
295        getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), currentCell.getRowOffset(),
296          currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond());
297      if (nextRowKeyCandidate != null) {
298        nextRows.add(new Pair<>(nextRowKeyCandidate, fuzzyData));
299      }
300    }
301
302  }
303
304  @Override
305  public boolean filterAllRemaining() {
306    return done;
307  }
308
309  /** Returns The filter serialized using pb */
310  @Override
311  public byte[] toByteArray() {
312    FilterProtos.FuzzyRowFilter.Builder builder = FilterProtos.FuzzyRowFilter.newBuilder();
313    for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
314      BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder();
315      bbpBuilder.setFirst(UnsafeByteOperations.unsafeWrap(fuzzyData.getFirst()));
316      bbpBuilder.setSecond(UnsafeByteOperations.unsafeWrap(fuzzyData.getSecond()));
317      builder.addFuzzyKeysData(bbpBuilder);
318    }
319    return builder.build().toByteArray();
320  }
321
322  /**
323   * Parse a serialized representation of {@link FuzzyRowFilter}
324   * @param pbBytes A pb serialized {@link FuzzyRowFilter} instance
325   * @return An instance of {@link FuzzyRowFilter} made from <code>bytes</code>
326   * @throws DeserializationException if an error occurred
327   * @see #toByteArray
328   */
329  public static FuzzyRowFilter parseFrom(final byte[] pbBytes) throws DeserializationException {
330    FilterProtos.FuzzyRowFilter proto;
331    try {
332      proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes);
333    } catch (InvalidProtocolBufferException e) {
334      throw new DeserializationException(e);
335    }
336    int count = proto.getFuzzyKeysDataCount();
337    ArrayList<Pair<byte[], byte[]>> fuzzyKeysData = new ArrayList<>(count);
338    for (int i = 0; i < count; ++i) {
339      BytesBytesPair current = proto.getFuzzyKeysData(i);
340      byte[] keyBytes = current.getFirst().toByteArray();
341      byte[] keyMeta = current.getSecond().toByteArray();
342      fuzzyKeysData.add(new Pair<>(keyBytes, keyMeta));
343    }
344    return new FuzzyRowFilter(fuzzyKeysData);
345  }
346
347  @Override
348  public String toString() {
349    final StringBuilder sb = new StringBuilder();
350    sb.append("FuzzyRowFilter");
351    sb.append("{fuzzyKeysData=");
352    for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
353      sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":");
354      sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}');
355    }
356    sb.append("}, ");
357    return sb.toString();
358  }
359
360  // Utility methods
361
362  static enum SatisfiesCode {
363    /** row satisfies fuzzy rule */
364    YES,
365    /** row doesn't satisfy fuzzy rule, but there's possible greater row that does */
366    NEXT_EXISTS,
367    /** row doesn't satisfy fuzzy rule and there's no greater row that does */
368    NO_NEXT
369  }
370
371  static SatisfiesCode satisfies(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
372    return satisfies(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
373  }
374
375  static SatisfiesCode satisfies(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
376    byte[] fuzzyKeyMeta) {
377    return satisfies(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
378  }
379
380  static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int length,
381    byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
382
383    if (!UNSAFE_UNALIGNED) {
384      return satisfiesNoUnsafe(reverse, row, offset, length, fuzzyKeyBytes, fuzzyKeyMeta);
385    }
386
387    if (row == null) {
388      // do nothing, let scan to proceed
389      return SatisfiesCode.YES;
390    }
391    length = Math.min(length, fuzzyKeyBytes.length);
392    int numWords = length / Bytes.SIZEOF_LONG;
393
394    int j = numWords << 3; // numWords * SIZEOF_LONG;
395
396    for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) {
397      long fuzzyBytes = Bytes.toLong(fuzzyKeyBytes, i);
398      long fuzzyMeta = Bytes.toLong(fuzzyKeyMeta, i);
399      long rowValue = Bytes.toLong(row, offset + i);
400      if ((rowValue & fuzzyMeta) != fuzzyBytes) {
401        // We always return NEXT_EXISTS
402        return SatisfiesCode.NEXT_EXISTS;
403      }
404    }
405
406    int off = j;
407
408    if (length - off >= Bytes.SIZEOF_INT) {
409      int fuzzyBytes = Bytes.toInt(fuzzyKeyBytes, off);
410      int fuzzyMeta = Bytes.toInt(fuzzyKeyMeta, off);
411      int rowValue = Bytes.toInt(row, offset + off);
412      if ((rowValue & fuzzyMeta) != fuzzyBytes) {
413        // We always return NEXT_EXISTS
414        return SatisfiesCode.NEXT_EXISTS;
415      }
416      off += Bytes.SIZEOF_INT;
417    }
418
419    if (length - off >= Bytes.SIZEOF_SHORT) {
420      short fuzzyBytes = Bytes.toShort(fuzzyKeyBytes, off);
421      short fuzzyMeta = Bytes.toShort(fuzzyKeyMeta, off);
422      short rowValue = Bytes.toShort(row, offset + off);
423      if ((rowValue & fuzzyMeta) != fuzzyBytes) {
424        // We always return NEXT_EXISTS
425        // even if it does not (in this case getNextForFuzzyRule
426        // will return null)
427        return SatisfiesCode.NEXT_EXISTS;
428      }
429      off += Bytes.SIZEOF_SHORT;
430    }
431
432    if (length - off >= Bytes.SIZEOF_BYTE) {
433      int fuzzyBytes = fuzzyKeyBytes[off] & 0xff;
434      int fuzzyMeta = fuzzyKeyMeta[off] & 0xff;
435      int rowValue = row[offset + off] & 0xff;
436      if ((rowValue & fuzzyMeta) != fuzzyBytes) {
437        // We always return NEXT_EXISTS
438        return SatisfiesCode.NEXT_EXISTS;
439      }
440    }
441    return SatisfiesCode.YES;
442  }
443
444  static SatisfiesCode satisfiesNoUnsafe(boolean reverse, byte[] row, int offset, int length,
445    byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
446    if (row == null) {
447      // do nothing, let scan to proceed
448      return SatisfiesCode.YES;
449    }
450
451    Order order = Order.orderFor(reverse);
452    boolean nextRowKeyCandidateExists = false;
453
454    for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) {
455      // First, checking if this position is fixed and not equals the given one
456      boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0;
457      boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset];
458      if (fixedByteIncorrect) {
459        // in this case there's another row that satisfies fuzzy rule and bigger than this row
460        if (nextRowKeyCandidateExists) {
461          return SatisfiesCode.NEXT_EXISTS;
462        }
463
464        // If this row byte is less than fixed then there's a byte array bigger than
465        // this row and which satisfies the fuzzy rule. Otherwise there's no such byte array:
466        // this row is simply bigger than any byte array that satisfies the fuzzy rule
467        boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF);
468        if (rowByteLessThanFixed && !reverse) {
469          return SatisfiesCode.NEXT_EXISTS;
470        } else if (!rowByteLessThanFixed && reverse) {
471          return SatisfiesCode.NEXT_EXISTS;
472        } else {
473          return SatisfiesCode.NO_NEXT;
474        }
475      }
476
477      // Second, checking if this position is not fixed and byte value is not the biggest. In this
478      // case there's a byte array bigger than this row and which satisfies the fuzzy rule. To get
479      // bigger byte array that satisfies the rule we need to just increase this byte
480      // (see the code of getNextForFuzzyRule below) by one.
481      // Note: if non-fixed byte is already at biggest value, this doesn't allow us to say there's
482      // bigger one that satisfies the rule as it can't be increased.
483      if (fuzzyKeyMeta[i] == 1 && !order.isMax(fuzzyKeyBytes[i])) {
484        nextRowKeyCandidateExists = true;
485      }
486    }
487    return SatisfiesCode.YES;
488  }
489
490  static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
491    return getNextForFuzzyRule(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
492  }
493
494  static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
495    byte[] fuzzyKeyMeta) {
496    return getNextForFuzzyRule(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
497  }
498
499  /** Abstracts directional comparisons based on scan direction. */
500  private enum Order {
501    ASC {
502      @Override
503      public boolean lt(int lhs, int rhs) {
504        return lhs < rhs;
505      }
506
507      @Override
508      public boolean gt(int lhs, int rhs) {
509        return lhs > rhs;
510      }
511
512      @Override
513      public byte inc(byte val) {
514        // TODO: what about over/underflow?
515        return (byte) (val + 1);
516      }
517
518      @Override
519      public boolean isMax(byte val) {
520        return val == (byte) 0xff;
521      }
522
523      @Override
524      public byte min() {
525        return 0;
526      }
527    },
528    DESC {
529      @Override
530      public boolean lt(int lhs, int rhs) {
531        return lhs > rhs;
532      }
533
534      @Override
535      public boolean gt(int lhs, int rhs) {
536        return lhs < rhs;
537      }
538
539      @Override
540      public byte inc(byte val) {
541        // TODO: what about over/underflow?
542        return (byte) (val - 1);
543      }
544
545      @Override
546      public boolean isMax(byte val) {
547        return val == 0;
548      }
549
550      @Override
551      public byte min() {
552        return (byte) 0xFF;
553      }
554    };
555
556    public static Order orderFor(boolean reverse) {
557      return reverse ? DESC : ASC;
558    }
559
560    /** Returns true when {@code lhs < rhs}. */
561    public abstract boolean lt(int lhs, int rhs);
562
563    /** Returns true when {@code lhs > rhs}. */
564    public abstract boolean gt(int lhs, int rhs);
565
566    /** Returns {@code val} incremented by 1. */
567    public abstract byte inc(byte val);
568
569    /** Return true when {@code val} is the maximum value */
570    public abstract boolean isMax(byte val);
571
572    /** Return the minimum value according to this ordering scheme. */
573    public abstract byte min();
574  }
575
576  /**
577   * @return greater byte array than given (row) which satisfies the fuzzy rule if it exists, null
578   *         otherwise
579   */
580  static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length,
581    byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
582    // To find out the next "smallest" byte array that satisfies fuzzy rule and "greater" than
583    // the given one we do the following:
584    // 1. setting values on all "fixed" positions to the values from fuzzyKeyBytes
585    // 2. if during the first step given row did not increase, then we increase the value at
586    // the first "non-fixed" position (where it is not maximum already)
587
588    // It is easier to perform this by using fuzzyKeyBytes copy and setting "non-fixed" position
589    // values than otherwise.
590    byte[] result =
591      Arrays.copyOf(fuzzyKeyBytes, length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
592    if (reverse && length > fuzzyKeyBytes.length) {
593      // we need trailing 0xff's instead of trailing 0x00's
594      for (int i = fuzzyKeyBytes.length; i < result.length; i++) {
595        result[i] = (byte) 0xFF;
596      }
597    }
598    int toInc = -1;
599    final Order order = Order.orderFor(reverse);
600
601    boolean increased = false;
602    for (int i = 0; i < result.length; i++) {
603      if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0 /* non-fixed */) {
604        result[i] = row[offset + i];
605        if (!order.isMax(row[offset + i])) {
606          // this is "non-fixed" position and is not at max value, hence we can increase it
607          toInc = i;
608        }
609      } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == -1 /* fixed */) {
610        if (order.lt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
611          // if setting value for any fixed position increased the original array,
612          // we are OK
613          increased = true;
614          break;
615        }
616
617        if (order.gt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
618          // if setting value for any fixed position makes array "smaller", then just stop:
619          // in case we found some non-fixed position to increase we will do it, otherwise
620          // there's no "next" row key that satisfies fuzzy rule and "greater" than given row
621          break;
622        }
623      }
624    }
625
626    if (!increased) {
627      if (toInc < 0) {
628        return null;
629      }
630      result[toInc] = order.inc(result[toInc]);
631
632      // Setting all "non-fixed" positions to zeroes to the right of the one we increased so
633      // that found "next" row key is the smallest possible
634      for (int i = toInc + 1; i < result.length; i++) {
635        if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0 /* non-fixed */) {
636          result[i] = order.min();
637        }
638      }
639    }
640
641    return reverse ? result : trimTrailingZeroes(result, fuzzyKeyMeta, toInc);
642  }
643
644  /**
645   * For forward scanner, next cell hint should not contain any trailing zeroes unless they are part
646   * of fuzzyKeyMeta hint = '\x01\x01\x01\x00\x00' will skip valid row '\x01\x01\x01'
647   * @param toInc - position of incremented byte
648   * @return trimmed version of result
649   */
650
651  private static byte[] trimTrailingZeroes(byte[] result, byte[] fuzzyKeyMeta, int toInc) {
652    int off = fuzzyKeyMeta.length >= result.length ? result.length - 1 : fuzzyKeyMeta.length - 1;
653    for (; off >= 0; off--) {
654      if (fuzzyKeyMeta[off] != 0) break;
655    }
656    if (off < toInc) off = toInc;
657    byte[] retValue = new byte[off + 1];
658    System.arraycopy(result, 0, retValue, 0, retValue.length);
659    return retValue;
660  }
661
662  /**
663   * Returns true if and only if the fields of the filter that are serialized are equal to the
664   * corresponding fields in other. Used for testing.
665   */
666  @Override
667  boolean areSerializedFieldsEqual(Filter o) {
668    if (o == this) {
669      return true;
670    }
671    if (!(o instanceof FuzzyRowFilter)) {
672      return false;
673    }
674    FuzzyRowFilter other = (FuzzyRowFilter) o;
675    if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false;
676    for (int i = 0; i < fuzzyKeysData.size(); ++i) {
677      Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i);
678      Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i);
679      if (
680        !(Bytes.equals(thisData.getFirst(), otherData.getFirst())
681          && Bytes.equals(thisData.getSecond(), otherData.getSecond()))
682      ) {
683        return false;
684      }
685    }
686    return true;
687  }
688
689  @Override
690  public boolean equals(Object obj) {
691    return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj);
692  }
693
694  @Override
695  public int hashCode() {
696    return Objects.hash(this.fuzzyKeysData);
697  }
698}