@InterfaceAudience.Private public class TokenizerNode extends Object
Modifier and Type | Field and Description |
---|---|
protected Tokenizer |
builder |
protected ArrayList<TokenizerNode> |
children |
protected int |
firstInsertionIndex |
protected long |
id |
protected int |
negativeIndex |
protected int |
nodeDepth |
protected int |
numOccurrences |
protected int |
outputArrayOffset |
protected TokenizerNode |
parent
Tree content/structure used during tokenization
|
protected ByteRange |
token |
protected int |
tokenStartOffset |
Constructor and Description |
---|
TokenizerNode(Tokenizer builder,
TokenizerNode parent,
int nodeDepth,
int tokenStartOffset,
int tokenOffset,
int tokenLength)
construct
|
Modifier and Type | Method and Description |
---|---|
protected void |
addChild(TokenizerNode node) |
void |
addSorted(ByteRange bytes)
building
|
void |
appendNodesToExternalList(List<TokenizerNode> appendTo,
boolean includeNonLeaves,
boolean includeLeaves)
moving nodes around
|
void |
appendOutputArrayOffsets(List<Integer> offsets) |
void |
fillInBytes(byte[] arrayToFill) |
String |
getBnlIndicator(boolean indent) |
ArrayList<TokenizerNode> |
getChildren() |
int |
getFirstInsertionIndex() |
long |
getId() |
TokenizerNode |
getLastChild() |
int |
getNegativeIndex() |
byte[] |
getNewByteArray()
writing back to byte[]'s
|
void |
getNode(TokenizerRowSearchResult resultHolder,
byte[] key,
int keyOffset,
int keyLength)
searching
|
int |
getNodeDepth()
simple read-only methods
|
int |
getNumBranchNodesIncludingThisNode()
count different node types
|
int |
getNumChildren() |
int |
getNumLeafNodesIncludingThisNode() |
int |
getNumNubNodesIncludingThisNode() |
int |
getNumOccurrences() |
int |
getOutputArrayOffset() |
String |
getPaddedTokenAndOccurrenceString() |
TokenizerNode |
getParent() |
ByteRange |
getToken() |
int |
getTokenLength() |
int |
getTokenOffset()
autogenerated get/set
|
boolean |
hasOccurrences() |
protected void |
incrementNodeDepthRecursively() |
void |
incrementNumOccurrences(int d)
Each occurrence > 1 indicates a repeat of the previous entry.
|
boolean |
isBranch() |
boolean |
isLeaf() |
boolean |
isNub() |
boolean |
isRoot() |
protected boolean |
matchesToken(ByteRange bytes) |
protected void |
moveChildrenToDifferentParent(TokenizerNode newParent) |
protected int |
numIdenticalBytes(ByteRange bytes) |
protected boolean |
partiallyMatchesToken(ByteRange bytes)
byte[] utils
|
void |
reconstruct(Tokenizer builder,
TokenizerNode parent,
int nodeDepth,
int tokenStartOffset,
int tokenOffset,
int tokenLength) |
void |
reset() |
void |
setBuilder(Tokenizer builder) |
void |
setFirstInsertionIndex(int firstInsertionIndex) |
void |
setId(long id) |
int |
setInsertionIndexes(int nextIndex) |
void |
setNegativeIndex(int negativeIndex) |
void |
setNumOccurrences(int numOccurrences) |
void |
setOutputArrayOffset(int outputArrayOffset) |
void |
setParent(TokenizerNode parent) |
void |
setToken(ByteRange token) |
void |
setTokenOffset(int tokenOffset) |
protected void |
split(int numTokenBytesToRetain,
ByteRange bytes)
Called when we need to convert a leaf node into a branch with 2 leaves.
|
String |
toString()
printing
|
protected Tokenizer builder
protected TokenizerNode parent
protected int nodeDepth
protected int tokenStartOffset
protected ByteRange token
protected int numOccurrences
protected ArrayList<TokenizerNode> children
protected long id
protected int firstInsertionIndex
protected int negativeIndex
protected int outputArrayOffset
public TokenizerNode(Tokenizer builder, TokenizerNode parent, int nodeDepth, int tokenStartOffset, int tokenOffset, int tokenLength)
public void reconstruct(Tokenizer builder, TokenizerNode parent, int nodeDepth, int tokenStartOffset, int tokenOffset, int tokenLength)
public void reset()
public void addSorted(ByteRange bytes)
protected void addChild(TokenizerNode node)
protected void split(int numTokenBytesToRetain, ByteRange bytes)
numTokenBytesToRetain
- => 1 (the B)bytes
- => BOOprotected void incrementNodeDepthRecursively()
protected void moveChildrenToDifferentParent(TokenizerNode newParent)
protected boolean partiallyMatchesToken(ByteRange bytes)
protected boolean matchesToken(ByteRange bytes)
protected int numIdenticalBytes(ByteRange bytes)
public void appendNodesToExternalList(List<TokenizerNode> appendTo, boolean includeNonLeaves, boolean includeLeaves)
public int setInsertionIndexes(int nextIndex)
public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset, int keyLength)
public byte[] getNewByteArray()
public void fillInBytes(byte[] arrayToFill)
public String getPaddedTokenAndOccurrenceString()
public String getBnlIndicator(boolean indent)
public int getNumBranchNodesIncludingThisNode()
public int getNumNubNodesIncludingThisNode()
public int getNumLeafNodesIncludingThisNode()
public int getNodeDepth()
public int getTokenLength()
public boolean hasOccurrences()
public boolean isRoot()
public int getNumChildren()
public TokenizerNode getLastChild()
public boolean isLeaf()
public boolean isBranch()
public boolean isNub()
public void incrementNumOccurrences(int d)
d
- increment by this amountpublic int getTokenOffset()
public TokenizerNode getParent()
public ByteRange getToken()
public int getNumOccurrences()
public void setParent(TokenizerNode parent)
public void setNumOccurrences(int numOccurrences)
public ArrayList<TokenizerNode> getChildren()
public long getId()
public int getFirstInsertionIndex()
public void setFirstInsertionIndex(int firstInsertionIndex)
public int getNegativeIndex()
public void setNegativeIndex(int negativeIndex)
public int getOutputArrayOffset()
public void setOutputArrayOffset(int outputArrayOffset)
public void setId(long id)
public void setBuilder(Tokenizer builder)
public void setTokenOffset(int tokenOffset)
public void setToken(ByteRange token)
Copyright © 2007–2019 The Apache Software Foundation. All rights reserved.