1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
20
21 import java.util.ArrayList;
22 import java.util.List;
23
24 import org.apache.hadoop.hbase.classification.InterfaceAudience;
25 import org.apache.hadoop.hbase.util.ByteRange;
26 import org.apache.hadoop.hbase.util.ByteRangeUtils;
27 import org.apache.hadoop.hbase.util.Bytes;
28 import org.apache.hadoop.hbase.util.CollectionUtils;
29 import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
30 import org.apache.hadoop.hbase.util.Strings;
31
32 import com.google.common.collect.Lists;
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61 @InterfaceAudience.Private
62 public class TokenizerNode{
63
64
65
66
67 protected Tokenizer builder;
68
69
70
71
72
73
74
75
76 protected TokenizerNode parent;
77
78
79
80
81 protected int nodeDepth;
82
83
84
85
86 protected int tokenStartOffset;
87
88
89
90
91 protected ByteRange token;
92
93
94
95
96
97
98 protected int numOccurrences;
99
100
101
102
103
104 protected ArrayList<TokenizerNode> children;
105
106
107
108
109
110
111
112
113
114
115
116 protected long id;
117
118
119
120
121 protected int firstInsertionIndex = -1;
122
123
124
125
126
127 protected int negativeIndex = 0;
128
129
130
131
132
133 protected int outputArrayOffset = -1;
134
135
136
137
138 public TokenizerNode(Tokenizer builder, TokenizerNode parent, int nodeDepth,
139 int tokenStartOffset, int tokenOffset, int tokenLength) {
140 this.token = new SimpleMutableByteRange();
141 reconstruct(builder, parent, nodeDepth, tokenStartOffset, tokenOffset, tokenLength);
142 this.children = Lists.newArrayList();
143 }
144
145
146
147
148
149 public void reconstruct(Tokenizer builder, TokenizerNode parent, int nodeDepth,
150 int tokenStartOffset, int tokenOffset, int tokenLength) {
151 this.builder = builder;
152 this.id = builder.nextNodeId();
153 this.parent = parent;
154 this.nodeDepth = nodeDepth;
155 builder.submitMaxNodeDepthCandidate(nodeDepth);
156 this.tokenStartOffset = tokenStartOffset;
157 this.token.set(builder.tokens, tokenOffset, tokenLength);
158 this.numOccurrences = 1;
159 }
160
161
162
163
164 public void reset() {
165 builder = null;
166 parent = null;
167 nodeDepth = 0;
168 tokenStartOffset = 0;
169 token.unset();
170 numOccurrences = 0;
171 children.clear();
172
173
174 id = 0;
175 firstInsertionIndex = -1;
176 negativeIndex = 0;
177 outputArrayOffset = -1;
178 }
179
180
181
182
183
184
185
186
187
188
189 public void addSorted(final ByteRange bytes) {
190
191
192
193
194 if (matchesToken(bytes) && CollectionUtils.notEmpty(children)) {
195 TokenizerNode lastChild = CollectionUtils.getLast(children);
196 if (lastChild.partiallyMatchesToken(bytes)) {
197 lastChild.addSorted(bytes);
198 return;
199 }
200 }
201
202
203
204
205
206
207
208
209
210 int numIdenticalTokenBytes = numIdenticalBytes(bytes);
211 int tailOffset = tokenStartOffset + numIdenticalTokenBytes;
212 int tailLength = bytes.getLength() - tailOffset;
213
214 if (numIdenticalTokenBytes == token.getLength()) {
215 if (tailLength == 0) {
216 incrementNumOccurrences(1);
217 } else {
218 int childNodeDepth = nodeDepth + 1;
219 int childTokenStartOffset = tokenStartOffset + numIdenticalTokenBytes;
220 TokenizerNode newChildNode = builder.addNode(this, childNodeDepth, childTokenStartOffset,
221 bytes, tailOffset);
222 addChild(newChildNode);
223 }
224 } else {
225 split(numIdenticalTokenBytes, bytes);
226 }
227 }
228
229
230 protected void addChild(TokenizerNode node) {
231 node.setParent(this);
232 children.add(node);
233 }
234
235
236
237
238
239
240
241
242
243
244
245
246
247 protected void split(int numTokenBytesToRetain, final ByteRange bytes) {
248 int childNodeDepth = nodeDepth;
249 int childTokenStartOffset = tokenStartOffset + numTokenBytesToRetain;
250
251
252 TokenizerNode firstChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
253 token, numTokenBytesToRetain);
254 firstChild.setNumOccurrences(numOccurrences);
255 token.setLength(numTokenBytesToRetain);
256 numOccurrences = 0;
257
258 moveChildrenToDifferentParent(firstChild);
259 addChild(firstChild);
260
261
262 TokenizerNode secondChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
263 bytes, tokenStartOffset + numTokenBytesToRetain);
264 addChild(secondChild);
265
266
267
268 firstChild.incrementNodeDepthRecursively();
269 secondChild.incrementNodeDepthRecursively();
270 }
271
272
273 protected void incrementNodeDepthRecursively() {
274 ++nodeDepth;
275 builder.submitMaxNodeDepthCandidate(nodeDepth);
276 for (int i = 0; i < children.size(); ++i) {
277 children.get(i).incrementNodeDepthRecursively();
278 }
279 }
280
281
282 protected void moveChildrenToDifferentParent(TokenizerNode newParent) {
283 for (int i = 0; i < children.size(); ++i) {
284 TokenizerNode child = children.get(i);
285 child.setParent(newParent);
286 newParent.children.add(child);
287 }
288 children.clear();
289 }
290
291
292
293
294 protected boolean partiallyMatchesToken(ByteRange bytes) {
295 return numIdenticalBytes(bytes) > 0;
296 }
297
298 protected boolean matchesToken(ByteRange bytes) {
299 return numIdenticalBytes(bytes) == getTokenLength();
300 }
301
302 protected int numIdenticalBytes(ByteRange bytes) {
303 return ByteRangeUtils.numEqualPrefixBytes(token, bytes, tokenStartOffset);
304 }
305
306
307
308
309 public void appendNodesToExternalList(List<TokenizerNode> appendTo, boolean includeNonLeaves,
310 boolean includeLeaves) {
311 if (includeNonLeaves && !isLeaf() || includeLeaves && isLeaf()) {
312 appendTo.add(this);
313 }
314 for (int i = 0; i < children.size(); ++i) {
315 TokenizerNode child = children.get(i);
316 child.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
317 }
318 }
319
320 public int setInsertionIndexes(int nextIndex) {
321 int newNextIndex = nextIndex;
322 if (hasOccurrences()) {
323 setFirstInsertionIndex(nextIndex);
324 newNextIndex += numOccurrences;
325 }
326 for (int i = 0; i < children.size(); ++i) {
327 TokenizerNode child = children.get(i);
328 newNextIndex = child.setInsertionIndexes(newNextIndex);
329 }
330 return newNextIndex;
331 }
332
333 public void appendOutputArrayOffsets(List<Integer> offsets) {
334 if (hasOccurrences()) {
335 offsets.add(outputArrayOffset);
336 }
337 for (int i = 0; i < children.size(); ++i) {
338 TokenizerNode child = children.get(i);
339 child.appendOutputArrayOffsets(offsets);
340 }
341 }
342
343
344
345
346
347
348
349
350
351
352 public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
353 int keyLength) {
354 int thisNodeDepthPlusLength = tokenStartOffset + token.getLength();
355
356
357 if (CollectionUtils.isEmpty(children)) {
358 if (thisNodeDepthPlusLength < keyLength) {
359 resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
360 return;
361 }
362 }
363
364
365 for (int i = 0; i < token.getLength(); ++i) {
366 if (key[tokenStartOffset + keyOffset + i] != token.get(i)) {
367
368 resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
369 return;
370 }
371 }
372
373 if (thisNodeDepthPlusLength == keyLength && numOccurrences > 0) {
374 resultHolder.set(TokenizerRowSearchPosition.MATCH, this);
375 return;
376 }
377
378 if (CollectionUtils.notEmpty(children)) {
379
380 for (int i = 0; i < children.size(); ++i) {
381 TokenizerNode child = children.get(i);
382 child.getNode(resultHolder, key, keyOffset, keyLength);
383 if (resultHolder.isMatch()) {
384 return;
385 } else if (resultHolder.getDifference() == TokenizerRowSearchPosition.BEFORE) {
386
387 resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
388 return;
389 }
390
391 }
392 }
393
394
395 resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
396 return;
397 }
398
399
400
401
402 public byte[] getNewByteArray() {
403 byte[] arrayToFill = new byte[tokenStartOffset + token.getLength()];
404 fillInBytes(arrayToFill);
405 return arrayToFill;
406 }
407
408 public void fillInBytes(byte[] arrayToFill) {
409 for (int i = 0; i < token.getLength(); ++i) {
410 arrayToFill[tokenStartOffset + i] = token.get(i);
411 }
412 if (parent != null) {
413 parent.fillInBytes(arrayToFill);
414 }
415 }
416
417
418
419
420 @Override
421 public String toString() {
422 String s = "";
423 if (parent == null) {
424 s += "R ";
425 } else {
426 s += getBnlIndicator(false) + " " + Bytes.toString(parent.getNewByteArray());
427 }
428 s += "[" + Bytes.toString(token.deepCopyToNewArray()) + "]";
429 if (numOccurrences > 0) {
430 s += "x" + numOccurrences;
431 }
432 return s;
433 }
434
435 public String getPaddedTokenAndOccurrenceString() {
436 StringBuilder sb = new StringBuilder();
437 sb.append(getBnlIndicator(true));
438 sb.append(Strings.padFront(numOccurrences + "", ' ', 3));
439 sb.append(Strings.padFront(nodeDepth + "", ' ', 3));
440 if (outputArrayOffset >= 0) {
441 sb.append(Strings.padFront(outputArrayOffset + "", ' ', 3));
442 }
443 sb.append(" ");
444 for (int i = 0; i < tokenStartOffset; ++i) {
445 sb.append(" ");
446 }
447 sb.append(Bytes.toString(token.deepCopyToNewArray()).replaceAll(" ", "_"));
448 return sb.toString();
449 }
450
451 public String getBnlIndicator(boolean indent) {
452 if (indent) {
453 if (isNub()) {
454 return " N ";
455 }
456 return isBranch() ? "B " : " L";
457 }
458 if (isNub()) {
459 return "N";
460 }
461 return isBranch() ? "B" : "L";
462 }
463
464
465
466
467 public int getNumBranchNodesIncludingThisNode() {
468 if (isLeaf()) {
469 return 0;
470 }
471 int totalFromThisPlusChildren = isBranch() ? 1 : 0;
472 for (int i = 0; i < children.size(); ++i) {
473 TokenizerNode child = children.get(i);
474 totalFromThisPlusChildren += child.getNumBranchNodesIncludingThisNode();
475 }
476 return totalFromThisPlusChildren;
477 }
478
479 public int getNumNubNodesIncludingThisNode() {
480 if (isLeaf()) {
481 return 0;
482 }
483 int totalFromThisPlusChildren = isNub() ? 1 : 0;
484 for (int i = 0; i < children.size(); ++i) {
485 TokenizerNode child = children.get(i);
486 totalFromThisPlusChildren += child.getNumNubNodesIncludingThisNode();
487 }
488 return totalFromThisPlusChildren;
489 }
490
491 public int getNumLeafNodesIncludingThisNode() {
492 if (isLeaf()) {
493 return 1;
494 }
495 int totalFromChildren = 0;
496 for (int i = 0; i < children.size(); ++i) {
497 TokenizerNode child = children.get(i);
498 totalFromChildren += child.getNumLeafNodesIncludingThisNode();
499 }
500 return totalFromChildren;
501 }
502
503
504
505
506 public int getNodeDepth() {
507 return nodeDepth;
508 }
509
510 public int getTokenLength() {
511 return token.getLength();
512 }
513
514 public boolean hasOccurrences() {
515 return numOccurrences > 0;
516 }
517
518 public boolean isRoot() {
519 return this.parent == null;
520 }
521
522 public int getNumChildren() {
523 return CollectionUtils.nullSafeSize(children);
524 }
525
526 public TokenizerNode getLastChild() {
527 if (CollectionUtils.isEmpty(children)) {
528 return null;
529 }
530 return CollectionUtils.getLast(children);
531 }
532
533 public boolean isLeaf() {
534 return CollectionUtils.isEmpty(children) && hasOccurrences();
535 }
536
537 public boolean isBranch() {
538 return CollectionUtils.notEmpty(children) && !hasOccurrences();
539 }
540
541 public boolean isNub() {
542 return CollectionUtils.notEmpty(children) && hasOccurrences();
543 }
544
545
546
547
548
549
550
551
552
553
554
555 public void incrementNumOccurrences(int d) {
556 numOccurrences += d;
557 }
558
559
560
561
562 public int getTokenOffset() {
563 return tokenStartOffset;
564 }
565
566 public TokenizerNode getParent() {
567 return parent;
568 }
569
570 public ByteRange getToken() {
571 return token;
572 }
573
574 public int getNumOccurrences() {
575 return numOccurrences;
576 }
577
578 public void setParent(TokenizerNode parent) {
579 this.parent = parent;
580 }
581
582 public void setNumOccurrences(int numOccurrences) {
583 this.numOccurrences = numOccurrences;
584 }
585
586 public ArrayList<TokenizerNode> getChildren() {
587 return children;
588 }
589
590 public long getId() {
591 return id;
592 }
593
594 public int getFirstInsertionIndex() {
595 return firstInsertionIndex;
596 }
597
598 public void setFirstInsertionIndex(int firstInsertionIndex) {
599 this.firstInsertionIndex = firstInsertionIndex;
600 }
601
602 public int getNegativeIndex() {
603 return negativeIndex;
604 }
605
606 public void setNegativeIndex(int negativeIndex) {
607 this.negativeIndex = negativeIndex;
608 }
609
610 public int getOutputArrayOffset() {
611 return outputArrayOffset;
612 }
613
614 public void setOutputArrayOffset(int outputArrayOffset) {
615 this.outputArrayOffset = outputArrayOffset;
616 }
617
618 public void setId(long id) {
619 this.id = id;
620 }
621
622 public void setBuilder(Tokenizer builder) {
623 this.builder = builder;
624 }
625
626 public void setTokenOffset(int tokenOffset) {
627 this.tokenStartOffset = tokenOffset;
628 }
629
630 public void setToken(ByteRange token) {
631 this.token = token;
632 }
633
634 }