View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.codec.prefixtree.encode.row;
20  
21  import java.io.IOException;
22  import java.io.OutputStream;
23  import java.util.ArrayList;
24  import java.util.List;
25  
26  import org.apache.hadoop.hbase.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
28  import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
29  import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
30  import org.apache.hadoop.hbase.util.vint.UFIntTool;
31  
32  import com.google.common.collect.Lists;
33  
34  /**
35   * Most of the complexity of the PrefixTree is contained in the "row section". It contains the row
36   * key trie structure used to search and recreate all the row keys. Each nub and leaf in this trie
37   * also contains references to offsets in the other sections of the data block that enable the
38   * decoder to match a row key with its qualifier, timestamp, type, value, etc.
39   * <p>
40   * The row section is a concatenated collection of {@link RowNodeWriter}s. See that class for the
41   * internals of each row node.
42   */
43  @InterfaceAudience.Private
44  public class RowSectionWriter {
45  
46    /***************** fields **************************/
47  
48    protected PrefixTreeEncoder prefixTreeEncoder;
49  
50    protected PrefixTreeBlockMeta blockMeta;
51  
52    protected int numBytes;
53  
54    protected ArrayList<TokenizerNode> nonLeaves;
55    protected ArrayList<TokenizerNode> leaves;
56  
57    protected ArrayList<RowNodeWriter> leafWriters;
58    protected ArrayList<RowNodeWriter> nonLeafWriters;
59  
60    protected int numLeafWriters;
61    protected int numNonLeafWriters;
62  
63  
64    /********************* construct **********************/
65  
66    public RowSectionWriter() {
67      this.nonLeaves = Lists.newArrayList();
68      this.leaves = Lists.newArrayList();
69      this.leafWriters = Lists.newArrayList();
70      this.nonLeafWriters = Lists.newArrayList();
71    }
72  
73    public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) {
74      reconstruct(prefixTreeEncoder);
75    }
76  
77    public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) {
78      this.prefixTreeEncoder = prefixTreeEncoder;
79      this.blockMeta = prefixTreeEncoder.getBlockMeta();
80      reset();
81    }
82  
83    public void reset() {
84      numBytes = 0;
85      nonLeaves.clear();
86      leaves.clear();
87      numLeafWriters = 0;
88      numNonLeafWriters = 0;
89    }
90  
91  
92    /****************** methods *******************************/
93  
94    public RowSectionWriter compile() {
95      blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength());
96      prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes();
97  
98      prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false);
99      prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true);
100 
101     // track the starting position of each node in final output
102     int negativeIndex = 0;
103 
104     // create leaf writer nodes
105     // leaf widths are known at this point, so add them up
106     int totalLeafBytes = 0;
107     for (int i = leaves.size() - 1; i >= 0; --i) {
108       TokenizerNode leaf = leaves.get(i);
109       RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf);
110       ++numLeafWriters;
111       // leaves store all but their first token byte
112       int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0);
113       totalLeafBytes += leafNodeWidth;
114       negativeIndex += leafNodeWidth;
115       leaf.setNegativeIndex(negativeIndex);
116     }
117 
118     int totalNonLeafBytesWithoutOffsets = 0;
119     int totalChildPointers = 0;
120     for (int i = nonLeaves.size() - 1; i >= 0; --i) {
121       TokenizerNode nonLeaf = nonLeaves.get(i);
122       RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf);
123       ++numNonLeafWriters;
124       totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0);
125       totalChildPointers += nonLeaf.getNumChildren();
126     }
127 
128     // figure out how wide our offset FInts are
129     int offsetWidth = 0;
130     while (true) {
131       ++offsetWidth;
132       int offsetBytes = totalChildPointers * offsetWidth;
133       int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes;
134       if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) {
135         // it fits
136         numBytes = totalRowBytes;
137         break;
138       }
139     }
140     blockMeta.setNextNodeOffsetWidth(offsetWidth);
141 
142     // populate negativeIndexes
143     for (int i = nonLeaves.size() - 1; i >= 0; --i) {
144       TokenizerNode nonLeaf = nonLeaves.get(i);
145       int writerIndex = nonLeaves.size() - i - 1;
146       RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex);
147       int nodeWidth = nonLeafWriter.calculateWidth();
148       negativeIndex += nodeWidth;
149       nonLeaf.setNegativeIndex(negativeIndex);
150     }
151 
152     return this;
153   }
154 
155   protected RowNodeWriter initializeWriter(List<RowNodeWriter> list, int index,
156       TokenizerNode builderNode) {
157     RowNodeWriter rowNodeWriter = null;
158     //check if there is an existing node we can recycle
159     if (index >= list.size()) {
160       //there are not enough existing nodes, so add a new one which will be retrieved below
161       list.add(new RowNodeWriter(prefixTreeEncoder, builderNode));
162     }
163     rowNodeWriter = list.get(index);
164     rowNodeWriter.reset(builderNode);
165     return rowNodeWriter;
166   }
167 
168 
169   public void writeBytes(OutputStream os) throws IOException {
170     for (int i = numNonLeafWriters - 1; i >= 0; --i) {
171       RowNodeWriter nonLeafWriter = nonLeafWriters.get(i);
172       nonLeafWriter.write(os);
173     }
174     // duplicates above... written more for clarity right now
175     for (int i = numLeafWriters - 1; i >= 0; --i) {
176       RowNodeWriter leafWriter = leafWriters.get(i);
177       leafWriter.write(os);
178     }
179   }
180 
181 
182   /***************** static ******************************/
183 
184   protected static ArrayList<TokenizerNode> filterByLeafAndReverse(
185       ArrayList<TokenizerNode> ins, boolean leaves) {
186     ArrayList<TokenizerNode> outs = Lists.newArrayList();
187     for (int i = ins.size() - 1; i >= 0; --i) {
188       TokenizerNode n = ins.get(i);
189       if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) {
190         outs.add(ins.get(i));
191       }
192     }
193     return outs;
194   }
195 
196 
197   /************* get/set **************************/
198 
199   public int getNumBytes() {
200     return numBytes;
201   }
202 
203   public ArrayList<TokenizerNode> getNonLeaves() {
204     return nonLeaves;
205   }
206 
207   public ArrayList<TokenizerNode> getLeaves() {
208     return leaves;
209   }
210 
211   public ArrayList<RowNodeWriter> getNonLeafWriters() {
212     return nonLeafWriters;
213   }
214 
215   public ArrayList<RowNodeWriter> getLeafWriters() {
216     return leafWriters;
217   }
218 
219 }