1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.codec.prefixtree.encode.column;
20
21 import java.io.IOException;
22 import java.io.OutputStream;
23 import java.util.ArrayList;
24 import java.util.List;
25
26 import org.apache.hadoop.hbase.classification.InterfaceAudience;
27 import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
28 import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
29 import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
30 import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
31 import org.apache.hadoop.hbase.util.CollectionUtils;
32 import org.apache.hadoop.hbase.util.vint.UFIntTool;
33
34 import com.google.common.collect.Lists;
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57 @InterfaceAudience.Private
58 public class ColumnSectionWriter {
59
60 public static final int EXPECTED_NUBS_PLUS_LEAVES = 100;
61
62
63
64 private PrefixTreeBlockMeta blockMeta;
65
66 private ColumnNodeType nodeType;
67 private Tokenizer tokenizer;
68 private int numBytes = 0;
69 private ArrayList<TokenizerNode> nonLeaves;
70 private ArrayList<TokenizerNode> leaves;
71 private ArrayList<TokenizerNode> allNodes;
72 private ArrayList<ColumnNodeWriter> columnNodeWriters;
73 private List<Integer> outputArrayOffsets;
74
75
76
77
78 public ColumnSectionWriter() {
79 this.nonLeaves = Lists.newArrayList();
80 this.leaves = Lists.newArrayList();
81 this.outputArrayOffsets = Lists.newArrayList();
82 }
83
84 public ColumnSectionWriter(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
85 ColumnNodeType nodeType) {
86 this();
87 reconstruct(blockMeta, builder, nodeType);
88 }
89
90 public void reconstruct(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
91 ColumnNodeType nodeType) {
92 this.blockMeta = blockMeta;
93 this.tokenizer = builder;
94 this.nodeType = nodeType;
95 }
96
97 public void reset() {
98 numBytes = 0;
99 nonLeaves.clear();
100 leaves.clear();
101 outputArrayOffsets.clear();
102 }
103
104
105
106
107 public ColumnSectionWriter compile() {
108 if (this.nodeType == ColumnNodeType.FAMILY) {
109
110 } else if (this.nodeType == ColumnNodeType.QUALIFIER) {
111 blockMeta.setMaxQualifierLength(tokenizer.getMaxElementLength());
112 } else {
113 blockMeta.setMaxTagsLength(tokenizer.getMaxElementLength());
114 }
115 compilerInternals();
116 return this;
117 }
118
119 protected void compilerInternals() {
120 tokenizer.setNodeFirstInsertionIndexes();
121 tokenizer.appendNodes(nonLeaves, true, false);
122
123 tokenizer.appendNodes(leaves, false, true);
124
125 allNodes = Lists.newArrayListWithCapacity(nonLeaves.size() + leaves.size());
126 allNodes.addAll(nonLeaves);
127 allNodes.addAll(leaves);
128
129 columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes));
130 for (int i = 0; i < allNodes.size(); ++i) {
131 TokenizerNode node = allNodes.get(i);
132 columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, this.nodeType));
133 }
134
135
136 int totalBytesWithoutOffsets = 0;
137 for (int i = allNodes.size() - 1; i >= 0; --i) {
138 ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
139
140 totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0);
141 }
142
143
144 int parentOffsetWidth = 0;
145 while (true) {
146 ++parentOffsetWidth;
147 int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size();
148 if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) {
149 numBytes = numBytesFinder;
150 break;
151 }
152 }
153 if (this.nodeType == ColumnNodeType.FAMILY) {
154 blockMeta.setFamilyOffsetWidth(parentOffsetWidth);
155 } else if (this.nodeType == ColumnNodeType.QUALIFIER) {
156 blockMeta.setQualifierOffsetWidth(parentOffsetWidth);
157 } else {
158 blockMeta.setTagsOffsetWidth(parentOffsetWidth);
159 }
160
161 int forwardIndex = 0;
162 for (int i = 0; i < allNodes.size(); ++i) {
163 TokenizerNode node = allNodes.get(i);
164 ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
165 int fullNodeWidth = columnNodeWriter
166 .getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth);
167 node.setOutputArrayOffset(forwardIndex);
168 columnNodeWriter.setTokenBytes(node.getToken());
169 if (node.isRoot()) {
170 columnNodeWriter.setParentStartPosition(0);
171 } else {
172 columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset());
173 }
174 forwardIndex += fullNodeWidth;
175 }
176
177 tokenizer.appendOutputArrayOffsets(outputArrayOffsets);
178 }
179
180 public void writeBytes(OutputStream os) throws IOException {
181 for (ColumnNodeWriter columnNodeWriter : columnNodeWriters) {
182 columnNodeWriter.writeBytes(os);
183 }
184 }
185
186
187
188
189 public ArrayList<ColumnNodeWriter> getColumnNodeWriters() {
190 return columnNodeWriters;
191 }
192
193 public int getNumBytes() {
194 return numBytes;
195 }
196
197 public int getOutputArrayOffset(int sortedIndex) {
198 return outputArrayOffsets.get(sortedIndex);
199 }
200
201 public ArrayList<TokenizerNode> getNonLeaves() {
202 return nonLeaves;
203 }
204
205 public ArrayList<TokenizerNode> getLeaves() {
206 return leaves;
207 }
208
209 }