1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.filter;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Comparator;
23 import java.util.List;
24 import java.util.PriorityQueue;
25
26 import org.apache.hadoop.hbase.Cell;
27 import org.apache.hadoop.hbase.KeyValueUtil;
28 import org.apache.hadoop.hbase.classification.InterfaceAudience;
29 import org.apache.hadoop.hbase.classification.InterfaceStability;
30 import org.apache.hadoop.hbase.exceptions.DeserializationException;
31 import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
32 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
33 import org.apache.hadoop.hbase.util.ByteStringer;
34 import org.apache.hadoop.hbase.util.Bytes;
35 import org.apache.hadoop.hbase.util.Pair;
36 import org.apache.hadoop.hbase.util.UnsafeAccess;
37 import org.apache.hadoop.hbase.util.UnsafeAvailChecker;
38
39 import com.google.common.annotations.VisibleForTesting;
40 import com.google.protobuf.InvalidProtocolBufferException;
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60 @InterfaceAudience.Public
61 @InterfaceStability.Evolving
62 public class FuzzyRowFilter extends FilterBase {
63 private static final boolean UNSAFE_UNALIGNED = UnsafeAvailChecker.unaligned();
64 private List<Pair<byte[], byte[]>> fuzzyKeysData;
65 private boolean done = false;
66
67
68
69
70
71
72 private int lastFoundIndex = -1;
73
74
75
76
77 private RowTracker tracker;
78
79 public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
80 Pair<byte[], byte[]> p;
81 for (int i = 0; i < fuzzyKeysData.size(); i++) {
82 p = fuzzyKeysData.get(i);
83 if (p.getFirst().length != p.getSecond().length) {
84 Pair<String, String> readable =
85 new Pair<String, String>(Bytes.toStringBinary(p.getFirst()), Bytes.toStringBinary(p
86 .getSecond()));
87 throw new IllegalArgumentException("Fuzzy pair lengths do not match: " + readable);
88 }
89
90 p.setSecond(preprocessMask(p.getSecond()));
91 preprocessSearchKey(p);
92 }
93 this.fuzzyKeysData = fuzzyKeysData;
94 this.tracker = new RowTracker();
95 }
96
97 private void preprocessSearchKey(Pair<byte[], byte[]> p) {
98 if (!UNSAFE_UNALIGNED) {
99 return;
100 }
101 byte[] key = p.getFirst();
102 byte[] mask = p.getSecond();
103 for (int i = 0; i < mask.length; i++) {
104
105 if (mask[i] == 2) {
106 key[i] = 0;
107 }
108 }
109 }
110
111
112
113
114
115
116
117 private byte[] preprocessMask(byte[] mask) {
118 if (!UNSAFE_UNALIGNED) {
119 return mask;
120 }
121 if (isPreprocessedMask(mask)) return mask;
122 for (int i = 0; i < mask.length; i++) {
123 if (mask[i] == 0) {
124 mask[i] = -1;
125 } else if (mask[i] == 1) {
126 mask[i] = 2;
127 }
128 }
129 return mask;
130 }
131
132 private boolean isPreprocessedMask(byte[] mask) {
133 for (int i = 0; i < mask.length; i++) {
134 if (mask[i] != -1 && mask[i] != 2) {
135 return false;
136 }
137 }
138 return true;
139 }
140
141 @Override
142 public ReturnCode filterKeyValue(Cell c) {
143 final int startIndex = lastFoundIndex >= 0 ? lastFoundIndex : 0;
144 final int size = fuzzyKeysData.size();
145 for (int i = startIndex; i < size + startIndex; i++) {
146 final int index = i % size;
147 Pair<byte[], byte[]> fuzzyData = fuzzyKeysData.get(index);
148
149 for (int j = 0; j < fuzzyData.getSecond().length; j++) {
150 fuzzyData.getSecond()[j] >>= 2;
151 }
152 SatisfiesCode satisfiesCode =
153 satisfies(isReversed(), c.getRowArray(), c.getRowOffset(), c.getRowLength(),
154 fuzzyData.getFirst(), fuzzyData.getSecond());
155 if (satisfiesCode == SatisfiesCode.YES) {
156 lastFoundIndex = index;
157 return ReturnCode.INCLUDE;
158 }
159 }
160
161 lastFoundIndex = -1;
162
163 return ReturnCode.SEEK_NEXT_USING_HINT;
164
165 }
166
167 @Override
168 public Cell getNextCellHint(Cell currentCell) {
169 boolean result = tracker.updateTracker(currentCell);
170 if (result == false) {
171 done = true;
172 return null;
173 }
174 byte[] nextRowKey = tracker.nextRow();
175 return KeyValueUtil.createFirstOnRow(nextRowKey);
176 }
177
178
179
180
181
182
183
184
185
186 private class RowTracker {
187 private final PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>> nextRows;
188 private boolean initialized = false;
189
190 RowTracker() {
191 nextRows =
192 new PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>>(fuzzyKeysData.size(),
193 new Comparator<Pair<byte[], Pair<byte[], byte[]>>>() {
194 @Override
195 public int compare(Pair<byte[], Pair<byte[], byte[]>> o1,
196 Pair<byte[], Pair<byte[], byte[]>> o2) {
197 int compare = Bytes.compareTo(o1.getFirst(), o2.getFirst());
198 if (!isReversed()) {
199 return compare;
200 } else {
201 return -compare;
202 }
203 }
204 });
205 }
206
207 byte[] nextRow() {
208 if (nextRows.isEmpty()) {
209 throw new IllegalStateException(
210 "NextRows should not be empty, make sure to call nextRow() after updateTracker() return true");
211 } else {
212 return nextRows.peek().getFirst();
213 }
214 }
215
216 boolean updateTracker(Cell currentCell) {
217 if (!initialized) {
218 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
219 updateWith(currentCell, fuzzyData);
220 }
221 initialized = true;
222 } else {
223 while (!nextRows.isEmpty() && !lessThan(currentCell, nextRows.peek().getFirst())) {
224 Pair<byte[], Pair<byte[], byte[]>> head = nextRows.poll();
225 Pair<byte[], byte[]> fuzzyData = head.getSecond();
226 updateWith(currentCell, fuzzyData);
227 }
228 }
229 return !nextRows.isEmpty();
230 }
231
232 boolean lessThan(Cell currentCell, byte[] nextRowKey) {
233 int compareResult =
234 Bytes.compareTo(currentCell.getRowArray(), currentCell.getRowOffset(),
235 currentCell.getRowLength(), nextRowKey, 0, nextRowKey.length);
236 return (!isReversed() && compareResult < 0) || (isReversed() && compareResult > 0);
237 }
238
239 void updateWith(Cell currentCell, Pair<byte[], byte[]> fuzzyData) {
240 byte[] nextRowKeyCandidate =
241 getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), currentCell.getRowOffset(),
242 currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond());
243 if (nextRowKeyCandidate != null) {
244 nextRows.add(new Pair<byte[], Pair<byte[], byte[]>>(nextRowKeyCandidate, fuzzyData));
245 }
246 }
247
248 }
249
250 @Override
251 public boolean filterAllRemaining() {
252 return done;
253 }
254
255
256
257
258 public byte[] toByteArray() {
259 FilterProtos.FuzzyRowFilter.Builder builder = FilterProtos.FuzzyRowFilter.newBuilder();
260 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
261 BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder();
262 bbpBuilder.setFirst(ByteStringer.wrap(fuzzyData.getFirst()));
263 bbpBuilder.setSecond(ByteStringer.wrap(fuzzyData.getSecond()));
264 builder.addFuzzyKeysData(bbpBuilder);
265 }
266 return builder.build().toByteArray();
267 }
268
269
270
271
272
273
274
275 public static FuzzyRowFilter parseFrom(final byte[] pbBytes) throws DeserializationException {
276 FilterProtos.FuzzyRowFilter proto;
277 try {
278 proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes);
279 } catch (InvalidProtocolBufferException e) {
280 throw new DeserializationException(e);
281 }
282 int count = proto.getFuzzyKeysDataCount();
283 ArrayList<Pair<byte[], byte[]>> fuzzyKeysData = new ArrayList<Pair<byte[], byte[]>>(count);
284 for (int i = 0; i < count; ++i) {
285 BytesBytesPair current = proto.getFuzzyKeysData(i);
286 byte[] keyBytes = current.getFirst().toByteArray();
287 byte[] keyMeta = current.getSecond().toByteArray();
288 fuzzyKeysData.add(new Pair<byte[], byte[]>(keyBytes, keyMeta));
289 }
290 return new FuzzyRowFilter(fuzzyKeysData);
291 }
292
293 @Override
294 public String toString() {
295 final StringBuilder sb = new StringBuilder();
296 sb.append("FuzzyRowFilter");
297 sb.append("{fuzzyKeysData=");
298 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
299 sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":");
300 sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}');
301 }
302 sb.append("}, ");
303 return sb.toString();
304 }
305
306
307
308 static enum SatisfiesCode {
309
310 YES,
311
312 NEXT_EXISTS,
313
314 NO_NEXT
315 }
316
317 @VisibleForTesting
318 static SatisfiesCode satisfies(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
319 return satisfies(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
320 }
321
322 @VisibleForTesting
323 static SatisfiesCode satisfies(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
324 byte[] fuzzyKeyMeta) {
325 return satisfies(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
326 }
327
328 static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int length,
329 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
330
331 if (!UNSAFE_UNALIGNED) {
332 return satisfiesNoUnsafe(reverse, row, offset, length, fuzzyKeyBytes, fuzzyKeyMeta);
333 }
334
335 if (row == null) {
336
337 return SatisfiesCode.YES;
338 }
339 length = Math.min(length, fuzzyKeyBytes.length);
340 int numWords = length / Bytes.SIZEOF_LONG;
341 int offsetAdj = offset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
342
343 int j = numWords << 3;
344
345 for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) {
346
347 long fuzzyBytes =
348 UnsafeAccess.theUnsafe.getLong(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
349 + (long) i);
350 long fuzzyMeta =
351 UnsafeAccess.theUnsafe.getLong(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
352 + (long) i);
353 long rowValue = UnsafeAccess.theUnsafe.getLong(row, offsetAdj + (long) i);
354 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
355
356 return SatisfiesCode.NEXT_EXISTS;
357 }
358 }
359
360 int off = j;
361
362 if (length - off >= Bytes.SIZEOF_INT) {
363 int fuzzyBytes =
364 UnsafeAccess.theUnsafe.getInt(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
365 + (long) off);
366 int fuzzyMeta =
367 UnsafeAccess.theUnsafe.getInt(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
368 + (long) off);
369 int rowValue = UnsafeAccess.theUnsafe.getInt(row, offsetAdj + (long) off);
370 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
371
372 return SatisfiesCode.NEXT_EXISTS;
373 }
374 off += Bytes.SIZEOF_INT;
375 }
376
377 if (length - off >= Bytes.SIZEOF_SHORT) {
378 short fuzzyBytes =
379 UnsafeAccess.theUnsafe.getShort(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
380 + (long) off);
381 short fuzzyMeta =
382 UnsafeAccess.theUnsafe.getShort(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
383 + (long) off);
384 short rowValue = UnsafeAccess.theUnsafe.getShort(row, offsetAdj + (long) off);
385 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
386
387
388
389 return SatisfiesCode.NEXT_EXISTS;
390 }
391 off += Bytes.SIZEOF_SHORT;
392 }
393
394 if (length - off >= Bytes.SIZEOF_BYTE) {
395 int fuzzyBytes = fuzzyKeyBytes[off] & 0xff;
396 int fuzzyMeta = fuzzyKeyMeta[off] & 0xff;
397 int rowValue = row[offset + off] & 0xff;
398 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
399
400 return SatisfiesCode.NEXT_EXISTS;
401 }
402 }
403 return SatisfiesCode.YES;
404 }
405
406 static SatisfiesCode satisfiesNoUnsafe(boolean reverse, byte[] row, int offset, int length,
407 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
408 if (row == null) {
409
410 return SatisfiesCode.YES;
411 }
412
413 Order order = Order.orderFor(reverse);
414 boolean nextRowKeyCandidateExists = false;
415
416 for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) {
417
418 boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0;
419 boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset];
420 if (fixedByteIncorrect) {
421
422 if (nextRowKeyCandidateExists) {
423 return SatisfiesCode.NEXT_EXISTS;
424 }
425
426
427
428
429 boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF);
430 if (rowByteLessThanFixed && !reverse) {
431 return SatisfiesCode.NEXT_EXISTS;
432 } else if (!rowByteLessThanFixed && reverse) {
433 return SatisfiesCode.NEXT_EXISTS;
434 } else {
435 return SatisfiesCode.NO_NEXT;
436 }
437 }
438
439
440
441
442
443
444
445 if (fuzzyKeyMeta[i] == 1 && !order.isMax(fuzzyKeyBytes[i])) {
446 nextRowKeyCandidateExists = true;
447 }
448 }
449 return SatisfiesCode.YES;
450 }
451
452 @VisibleForTesting
453 static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
454 return getNextForFuzzyRule(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
455 }
456
457 @VisibleForTesting
458 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
459 byte[] fuzzyKeyMeta) {
460 return getNextForFuzzyRule(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
461 }
462
463
464 private enum Order {
465 ASC {
466 public boolean lt(int lhs, int rhs) {
467 return lhs < rhs;
468 }
469
470 public boolean gt(int lhs, int rhs) {
471 return lhs > rhs;
472 }
473
474 public byte inc(byte val) {
475
476 return (byte) (val + 1);
477 }
478
479 public boolean isMax(byte val) {
480 return val == (byte) 0xff;
481 }
482
483 public byte min() {
484 return 0;
485 }
486 },
487 DESC {
488 public boolean lt(int lhs, int rhs) {
489 return lhs > rhs;
490 }
491
492 public boolean gt(int lhs, int rhs) {
493 return lhs < rhs;
494 }
495
496 public byte inc(byte val) {
497
498 return (byte) (val - 1);
499 }
500
501 public boolean isMax(byte val) {
502 return val == 0;
503 }
504
505 public byte min() {
506 return (byte) 0xFF;
507 }
508 };
509
510 public static Order orderFor(boolean reverse) {
511 return reverse ? DESC : ASC;
512 }
513
514
515 public abstract boolean lt(int lhs, int rhs);
516
517
518 public abstract boolean gt(int lhs, int rhs);
519
520
521 public abstract byte inc(byte val);
522
523
524 public abstract boolean isMax(byte val);
525
526
527 public abstract byte min();
528 }
529
530
531
532
533
534 @VisibleForTesting
535 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length,
536 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
537
538
539
540
541
542
543
544
545 byte[] result =
546 Arrays.copyOf(fuzzyKeyBytes, length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
547 if (reverse && length > fuzzyKeyBytes.length) {
548
549 for (int i = fuzzyKeyBytes.length; i < result.length; i++) {
550 result[i] = (byte) 0xFF;
551 }
552 }
553 int toInc = -1;
554 final Order order = Order.orderFor(reverse);
555
556 boolean increased = false;
557 for (int i = 0; i < result.length; i++) {
558 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0
559 result[i] = row[offset + i];
560 if (!order.isMax(row[offset + i])) {
561
562 toInc = i;
563 }
564 } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == -1
565 if (order.lt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
566
567
568 increased = true;
569 break;
570 }
571
572 if (order.gt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
573
574
575
576 break;
577 }
578 }
579 }
580
581 if (!increased) {
582 if (toInc < 0) {
583 return null;
584 }
585 result[toInc] = order.inc(result[toInc]);
586
587
588
589 for (int i = toInc + 1; i < result.length; i++) {
590 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0
591 result[i] = order.min();
592 }
593 }
594 }
595
596 return reverse? result: trimTrailingZeroes(result, fuzzyKeyMeta, toInc);
597 }
598
599
600
601
602
603
604
605
606
607
608
609
610
611 private static byte[] trimTrailingZeroes(byte[] result, byte[] fuzzyKeyMeta, int toInc) {
612 int off = fuzzyKeyMeta.length >= result.length? result.length -1:
613 fuzzyKeyMeta.length -1;
614 for( ; off >= 0; off--){
615 if(fuzzyKeyMeta[off] != 0) break;
616 }
617 if (off < toInc) off = toInc;
618 byte[] retValue = new byte[off+1];
619 System.arraycopy(result, 0, retValue, 0, retValue.length);
620 return retValue;
621 }
622
623
624
625
626
627 boolean areSerializedFieldsEqual(Filter o) {
628 if (o == this) return true;
629 if (!(o instanceof FuzzyRowFilter)) return false;
630
631 FuzzyRowFilter other = (FuzzyRowFilter) o;
632 if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false;
633 for (int i = 0; i < fuzzyKeysData.size(); ++i) {
634 Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i);
635 Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i);
636 if (!(Bytes.equals(thisData.getFirst(), otherData.getFirst()) && Bytes.equals(
637 thisData.getSecond(), otherData.getSecond()))) {
638 return false;
639 }
640 }
641 return true;
642 }
643 }