1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.codec.prefixtree.encode.column;
20
21 import java.io.IOException;
22 import java.io.OutputStream;
23 import java.util.ArrayList;
24 import java.util.List;
25
26 import org.apache.hadoop.hbase.classification.InterfaceAudience;
27 import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
28 import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
29 import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
30 import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
31 import org.apache.hadoop.hbase.util.CollectionUtils;
32 import org.apache.hadoop.hbase.util.vint.UFIntTool;
33
34 import com.google.common.collect.Lists;
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55 @InterfaceAudience.Private
56 public class ColumnSectionWriter {
57
58 public static final int EXPECTED_NUBS_PLUS_LEAVES = 100;
59
60
61
62 private PrefixTreeBlockMeta blockMeta;
63
64 private ColumnNodeType nodeType;
65 private Tokenizer tokenizer;
66 private int numBytes = 0;
67 private ArrayList<TokenizerNode> nonLeaves;
68 private ArrayList<TokenizerNode> leaves;
69 private ArrayList<TokenizerNode> allNodes;
70 private ArrayList<ColumnNodeWriter> columnNodeWriters;
71 private List<Integer> outputArrayOffsets;
72
73
74
75
76 public ColumnSectionWriter() {
77 this.nonLeaves = Lists.newArrayList();
78 this.leaves = Lists.newArrayList();
79 this.outputArrayOffsets = Lists.newArrayList();
80 }
81
82 public ColumnSectionWriter(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
83 ColumnNodeType nodeType) {
84 this();
85 reconstruct(blockMeta, builder, nodeType);
86 }
87
88 public void reconstruct(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
89 ColumnNodeType nodeType) {
90 this.blockMeta = blockMeta;
91 this.tokenizer = builder;
92 this.nodeType = nodeType;
93 }
94
95 public void reset() {
96 numBytes = 0;
97 nonLeaves.clear();
98 leaves.clear();
99 outputArrayOffsets.clear();
100 }
101
102
103
104
105 public ColumnSectionWriter compile() {
106 if (this.nodeType == ColumnNodeType.FAMILY) {
107
108 } else if (this.nodeType == ColumnNodeType.QUALIFIER) {
109 blockMeta.setMaxQualifierLength(tokenizer.getMaxElementLength());
110 } else {
111 blockMeta.setMaxTagsLength(tokenizer.getMaxElementLength());
112 }
113 compilerInternals();
114 return this;
115 }
116
117 protected void compilerInternals() {
118 tokenizer.setNodeFirstInsertionIndexes();
119 tokenizer.appendNodes(nonLeaves, true, false);
120
121 tokenizer.appendNodes(leaves, false, true);
122
123 allNodes = Lists.newArrayListWithCapacity(nonLeaves.size() + leaves.size());
124 allNodes.addAll(nonLeaves);
125 allNodes.addAll(leaves);
126
127 columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes));
128 for (int i = 0; i < allNodes.size(); ++i) {
129 TokenizerNode node = allNodes.get(i);
130 columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, this.nodeType));
131 }
132
133
134 int totalBytesWithoutOffsets = 0;
135 for (int i = allNodes.size() - 1; i >= 0; --i) {
136 ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
137
138 totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0);
139 }
140
141
142 int parentOffsetWidth = 0;
143 while (true) {
144 ++parentOffsetWidth;
145 int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size();
146 if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) {
147 numBytes = numBytesFinder;
148 break;
149 }
150 }
151 if (this.nodeType == ColumnNodeType.FAMILY) {
152 blockMeta.setFamilyOffsetWidth(parentOffsetWidth);
153 } else if (this.nodeType == ColumnNodeType.QUALIFIER) {
154 blockMeta.setQualifierOffsetWidth(parentOffsetWidth);
155 } else {
156 blockMeta.setTagsOffsetWidth(parentOffsetWidth);
157 }
158
159 int forwardIndex = 0;
160 for (int i = 0; i < allNodes.size(); ++i) {
161 TokenizerNode node = allNodes.get(i);
162 ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
163 int fullNodeWidth = columnNodeWriter
164 .getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth);
165 node.setOutputArrayOffset(forwardIndex);
166 columnNodeWriter.setTokenBytes(node.getToken());
167 if (node.isRoot()) {
168 columnNodeWriter.setParentStartPosition(0);
169 } else {
170 columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset());
171 }
172 forwardIndex += fullNodeWidth;
173 }
174
175 tokenizer.appendOutputArrayOffsets(outputArrayOffsets);
176 }
177
178 public void writeBytes(OutputStream os) throws IOException {
179 for (ColumnNodeWriter columnNodeWriter : columnNodeWriters) {
180 columnNodeWriter.writeBytes(os);
181 }
182 }
183
184
185
186
187 public ArrayList<ColumnNodeWriter> getColumnNodeWriters() {
188 return columnNodeWriters;
189 }
190
191 public int getNumBytes() {
192 return numBytes;
193 }
194
195 public int getOutputArrayOffset(int sortedIndex) {
196 return outputArrayOffsets.get(sortedIndex);
197 }
198
199 public ArrayList<TokenizerNode> getNonLeaves() {
200 return nonLeaves;
201 }
202
203 public ArrayList<TokenizerNode> getLeaves() {
204 return leaves;
205 }
206
207 }