1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
20
21 import java.util.ArrayList;
22 import java.util.List;
23
24 import org.apache.hadoop.hbase.classification.InterfaceAudience;
25 import org.apache.hadoop.hbase.util.ByteRange;
26 import org.apache.hadoop.hbase.util.ByteRangeUtils;
27 import org.apache.hadoop.hbase.util.Bytes;
28 import org.apache.hadoop.hbase.util.CollectionUtils;
29 import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
30 import org.apache.hadoop.hbase.util.Strings;
31
32 import com.google.common.collect.Lists;
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63 @InterfaceAudience.Private
64 public class TokenizerNode{
65
66
67
68
69 protected Tokenizer builder;
70
71
72
73
74
75
76
77
78 protected TokenizerNode parent;
79
80
81
82
83 protected int nodeDepth;
84
85
86
87
88 protected int tokenStartOffset;
89
90
91
92
93 protected ByteRange token;
94
95
96
97
98
99
100 protected int numOccurrences;
101
102
103
104
105
106 protected ArrayList<TokenizerNode> children;
107
108
109
110
111
112
113
114
115
116
117
118 protected long id;
119
120
121
122
123 protected int firstInsertionIndex = -1;
124
125
126
127
128
129 protected int negativeIndex = 0;
130
131
132
133
134
135 protected int outputArrayOffset = -1;
136
137
138
139
140 public TokenizerNode(Tokenizer builder, TokenizerNode parent, int nodeDepth,
141 int tokenStartOffset, int tokenOffset, int tokenLength) {
142 this.token = new SimpleMutableByteRange();
143 reconstruct(builder, parent, nodeDepth, tokenStartOffset, tokenOffset, tokenLength);
144 this.children = Lists.newArrayList();
145 }
146
147
148
149
150
151 public void reconstruct(Tokenizer builder, TokenizerNode parent, int nodeDepth,
152 int tokenStartOffset, int tokenOffset, int tokenLength) {
153 this.builder = builder;
154 this.id = builder.nextNodeId();
155 this.parent = parent;
156 this.nodeDepth = nodeDepth;
157 builder.submitMaxNodeDepthCandidate(nodeDepth);
158 this.tokenStartOffset = tokenStartOffset;
159 this.token.set(builder.tokens, tokenOffset, tokenLength);
160 this.numOccurrences = 1;
161 }
162
163
164
165
166 public void reset() {
167 builder = null;
168 parent = null;
169 nodeDepth = 0;
170 tokenStartOffset = 0;
171 token.unset();
172 numOccurrences = 0;
173 children.clear();
174
175
176 id = 0;
177 firstInsertionIndex = -1;
178 negativeIndex = 0;
179 outputArrayOffset = -1;
180 }
181
182
183
184
185
186
187
188
189
190
191 public void addSorted(final ByteRange bytes) {
192
193
194
195
196 if (matchesToken(bytes) && CollectionUtils.notEmpty(children)) {
197 TokenizerNode lastChild = CollectionUtils.getLast(children);
198 if (lastChild.partiallyMatchesToken(bytes)) {
199 lastChild.addSorted(bytes);
200 return;
201 }
202 }
203
204
205
206
207
208
209
210
211
212 int numIdenticalTokenBytes = numIdenticalBytes(bytes);
213 int tailOffset = tokenStartOffset + numIdenticalTokenBytes;
214 int tailLength = bytes.getLength() - tailOffset;
215
216 if (numIdenticalTokenBytes == token.getLength()) {
217 if (tailLength == 0) {
218 incrementNumOccurrences(1);
219 } else {
220 int childNodeDepth = nodeDepth + 1;
221 int childTokenStartOffset = tokenStartOffset + numIdenticalTokenBytes;
222 TokenizerNode newChildNode = builder.addNode(this, childNodeDepth, childTokenStartOffset,
223 bytes, tailOffset);
224 addChild(newChildNode);
225 }
226 } else {
227 split(numIdenticalTokenBytes, bytes);
228 }
229 }
230
231
232 protected void addChild(TokenizerNode node) {
233 node.setParent(this);
234 children.add(node);
235 }
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251 protected void split(int numTokenBytesToRetain, final ByteRange bytes) {
252 int childNodeDepth = nodeDepth;
253 int childTokenStartOffset = tokenStartOffset + numTokenBytesToRetain;
254
255
256 TokenizerNode firstChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
257 token, numTokenBytesToRetain);
258 firstChild.setNumOccurrences(numOccurrences);
259 token.setLength(numTokenBytesToRetain);
260 numOccurrences = 0;
261
262 moveChildrenToDifferentParent(firstChild);
263 addChild(firstChild);
264
265
266 TokenizerNode secondChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
267 bytes, tokenStartOffset + numTokenBytesToRetain);
268 addChild(secondChild);
269
270
271
272 firstChild.incrementNodeDepthRecursively();
273 secondChild.incrementNodeDepthRecursively();
274 }
275
276
277 protected void incrementNodeDepthRecursively() {
278 ++nodeDepth;
279 builder.submitMaxNodeDepthCandidate(nodeDepth);
280 for (int i = 0; i < children.size(); ++i) {
281 children.get(i).incrementNodeDepthRecursively();
282 }
283 }
284
285
286 protected void moveChildrenToDifferentParent(TokenizerNode newParent) {
287 for (int i = 0; i < children.size(); ++i) {
288 TokenizerNode child = children.get(i);
289 child.setParent(newParent);
290 newParent.children.add(child);
291 }
292 children.clear();
293 }
294
295
296
297
298 protected boolean partiallyMatchesToken(ByteRange bytes) {
299 return numIdenticalBytes(bytes) > 0;
300 }
301
302 protected boolean matchesToken(ByteRange bytes) {
303 return numIdenticalBytes(bytes) == getTokenLength();
304 }
305
306 protected int numIdenticalBytes(ByteRange bytes) {
307 return ByteRangeUtils.numEqualPrefixBytes(token, bytes, tokenStartOffset);
308 }
309
310
311
312
313 public void appendNodesToExternalList(List<TokenizerNode> appendTo, boolean includeNonLeaves,
314 boolean includeLeaves) {
315 if (includeNonLeaves && !isLeaf() || includeLeaves && isLeaf()) {
316 appendTo.add(this);
317 }
318 for (int i = 0; i < children.size(); ++i) {
319 TokenizerNode child = children.get(i);
320 child.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
321 }
322 }
323
324 public int setInsertionIndexes(int nextIndex) {
325 int newNextIndex = nextIndex;
326 if (hasOccurrences()) {
327 setFirstInsertionIndex(nextIndex);
328 newNextIndex += numOccurrences;
329 }
330 for (int i = 0; i < children.size(); ++i) {
331 TokenizerNode child = children.get(i);
332 newNextIndex = child.setInsertionIndexes(newNextIndex);
333 }
334 return newNextIndex;
335 }
336
337 public void appendOutputArrayOffsets(List<Integer> offsets) {
338 if (hasOccurrences()) {
339 offsets.add(outputArrayOffset);
340 }
341 for (int i = 0; i < children.size(); ++i) {
342 TokenizerNode child = children.get(i);
343 child.appendOutputArrayOffsets(offsets);
344 }
345 }
346
347
348
349
350
351
352
353
354
355
356 public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
357 int keyLength) {
358 int thisNodeDepthPlusLength = tokenStartOffset + token.getLength();
359
360
361 if (CollectionUtils.isEmpty(children)) {
362 if (thisNodeDepthPlusLength < keyLength) {
363 resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
364 return;
365 }
366 }
367
368
369 for (int i = 0; i < token.getLength(); ++i) {
370 if (key[tokenStartOffset + keyOffset + i] != token.get(i)) {
371
372 resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
373 return;
374 }
375 }
376
377 if (thisNodeDepthPlusLength == keyLength && numOccurrences > 0) {
378 resultHolder.set(TokenizerRowSearchPosition.MATCH, this);
379 return;
380 }
381
382 if (CollectionUtils.notEmpty(children)) {
383
384 for (int i = 0; i < children.size(); ++i) {
385 TokenizerNode child = children.get(i);
386 child.getNode(resultHolder, key, keyOffset, keyLength);
387 if (resultHolder.isMatch()) {
388 return;
389 } else if (resultHolder.getDifference() == TokenizerRowSearchPosition.BEFORE) {
390
391 resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
392 return;
393 }
394
395 }
396 }
397
398
399 resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
400 return;
401 }
402
403
404
405
406 public byte[] getNewByteArray() {
407 byte[] arrayToFill = new byte[tokenStartOffset + token.getLength()];
408 fillInBytes(arrayToFill);
409 return arrayToFill;
410 }
411
412 public void fillInBytes(byte[] arrayToFill) {
413 for (int i = 0; i < token.getLength(); ++i) {
414 arrayToFill[tokenStartOffset + i] = token.get(i);
415 }
416 if (parent != null) {
417 parent.fillInBytes(arrayToFill);
418 }
419 }
420
421
422
423
424 @Override
425 public String toString() {
426 String s = "";
427 if (parent == null) {
428 s += "R ";
429 } else {
430 s += getBnlIndicator(false) + " " + Bytes.toString(parent.getNewByteArray());
431 }
432 s += "[" + Bytes.toString(token.deepCopyToNewArray()) + "]";
433 if (numOccurrences > 0) {
434 s += "x" + numOccurrences;
435 }
436 return s;
437 }
438
439 public String getPaddedTokenAndOccurrenceString() {
440 StringBuilder sb = new StringBuilder();
441 sb.append(getBnlIndicator(true));
442 sb.append(Strings.padFront(numOccurrences + "", ' ', 3));
443 sb.append(Strings.padFront(nodeDepth + "", ' ', 3));
444 if (outputArrayOffset >= 0) {
445 sb.append(Strings.padFront(outputArrayOffset + "", ' ', 3));
446 }
447 sb.append(" ");
448 for (int i = 0; i < tokenStartOffset; ++i) {
449 sb.append(" ");
450 }
451 sb.append(Bytes.toString(token.deepCopyToNewArray()).replaceAll(" ", "_"));
452 return sb.toString();
453 }
454
455 public String getBnlIndicator(boolean indent) {
456 if (indent) {
457 if (isNub()) {
458 return " N ";
459 }
460 return isBranch() ? "B " : " L";
461 }
462 if (isNub()) {
463 return "N";
464 }
465 return isBranch() ? "B" : "L";
466 }
467
468
469
470
471 public int getNumBranchNodesIncludingThisNode() {
472 if (isLeaf()) {
473 return 0;
474 }
475 int totalFromThisPlusChildren = isBranch() ? 1 : 0;
476 for (int i = 0; i < children.size(); ++i) {
477 TokenizerNode child = children.get(i);
478 totalFromThisPlusChildren += child.getNumBranchNodesIncludingThisNode();
479 }
480 return totalFromThisPlusChildren;
481 }
482
483 public int getNumNubNodesIncludingThisNode() {
484 if (isLeaf()) {
485 return 0;
486 }
487 int totalFromThisPlusChildren = isNub() ? 1 : 0;
488 for (int i = 0; i < children.size(); ++i) {
489 TokenizerNode child = children.get(i);
490 totalFromThisPlusChildren += child.getNumNubNodesIncludingThisNode();
491 }
492 return totalFromThisPlusChildren;
493 }
494
495 public int getNumLeafNodesIncludingThisNode() {
496 if (isLeaf()) {
497 return 1;
498 }
499 int totalFromChildren = 0;
500 for (int i = 0; i < children.size(); ++i) {
501 TokenizerNode child = children.get(i);
502 totalFromChildren += child.getNumLeafNodesIncludingThisNode();
503 }
504 return totalFromChildren;
505 }
506
507
508
509
510 public int getNodeDepth() {
511 return nodeDepth;
512 }
513
514 public int getTokenLength() {
515 return token.getLength();
516 }
517
518 public boolean hasOccurrences() {
519 return numOccurrences > 0;
520 }
521
522 public boolean isRoot() {
523 return this.parent == null;
524 }
525
526 public int getNumChildren() {
527 return CollectionUtils.nullSafeSize(children);
528 }
529
530 public TokenizerNode getLastChild() {
531 if (CollectionUtils.isEmpty(children)) {
532 return null;
533 }
534 return CollectionUtils.getLast(children);
535 }
536
537 public boolean isLeaf() {
538 return CollectionUtils.isEmpty(children) && hasOccurrences();
539 }
540
541 public boolean isBranch() {
542 return CollectionUtils.notEmpty(children) && !hasOccurrences();
543 }
544
545 public boolean isNub() {
546 return CollectionUtils.notEmpty(children) && hasOccurrences();
547 }
548
549
550
551
552
553
554
555
556
557
558
559
560 public void incrementNumOccurrences(int d) {
561 numOccurrences += d;
562 }
563
564
565
566
567 public int getTokenOffset() {
568 return tokenStartOffset;
569 }
570
571 public TokenizerNode getParent() {
572 return parent;
573 }
574
575 public ByteRange getToken() {
576 return token;
577 }
578
579 public int getNumOccurrences() {
580 return numOccurrences;
581 }
582
583 public void setParent(TokenizerNode parent) {
584 this.parent = parent;
585 }
586
587 public void setNumOccurrences(int numOccurrences) {
588 this.numOccurrences = numOccurrences;
589 }
590
591 public ArrayList<TokenizerNode> getChildren() {
592 return children;
593 }
594
595 public long getId() {
596 return id;
597 }
598
599 public int getFirstInsertionIndex() {
600 return firstInsertionIndex;
601 }
602
603 public void setFirstInsertionIndex(int firstInsertionIndex) {
604 this.firstInsertionIndex = firstInsertionIndex;
605 }
606
607 public int getNegativeIndex() {
608 return negativeIndex;
609 }
610
611 public void setNegativeIndex(int negativeIndex) {
612 this.negativeIndex = negativeIndex;
613 }
614
615 public int getOutputArrayOffset() {
616 return outputArrayOffset;
617 }
618
619 public void setOutputArrayOffset(int outputArrayOffset) {
620 this.outputArrayOffset = outputArrayOffset;
621 }
622
623 public void setId(long id) {
624 this.id = id;
625 }
626
627 public void setBuilder(Tokenizer builder) {
628 this.builder = builder;
629 }
630
631 public void setTokenOffset(int tokenOffset) {
632 this.tokenStartOffset = tokenOffset;
633 }
634
635 public void setToken(ByteRange token) {
636 this.token = token;
637 }
638
639 }