View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.codec.prefixtree.decode;
20  
21  import org.apache.hadoop.classification.InterfaceAudience;
22  import org.apache.hadoop.hbase.Cell;
23  import org.apache.hadoop.hbase.CellUtil;
24  import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
25  import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
26  import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
27  
28  import com.google.common.primitives.UnsignedBytes;
29  
30  /**
31   * Searcher extends the capabilities of the Scanner + ReversibleScanner to add the ability to
32   * position itself on a requested Cell without scanning through cells before it. The PrefixTree is
33   * set up to be a Trie of rows, so finding a particular row is extremely cheap.
34   * <p/>
35   * Once it finds the row, it does a binary search through the cells inside the row, which is not as
36   * fast as the trie search, but faster than iterating through every cell like existing block
37   * formats
38   * do. For this reason, this implementation is targeted towards schemas where rows are narrow
39   * enough
40   * to have several or many per block, and where you are generally looking for the entire row or
41   * the
42   * first cell. It will still be fast for wide rows or point queries, but could be improved upon.
43   */
44  @InterfaceAudience.Private
45  public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner implements
46      CellSearcher {
47  
48    /*************** construct ******************************/
49  
50    public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
51        int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) {
52      super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength, tagsBufferLength);
53    }
54  
55  
56    /********************* CellSearcher methods *******************/
57  
58    @Override
59    public boolean positionAt(Cell key) {
60      return CellScannerPosition.AT == positionAtOrAfter(key);
61    }
62  
63    @Override
64    public CellScannerPosition positionAtOrBefore(Cell key) {
65      reInitFirstNode();
66      int fanIndex = -1;
67  
68      while(true){
69        //detect row mismatch.  break loop if mismatch
70        int currentNodeDepth = rowLength;
71        int rowTokenComparison = compareToCurrentToken(key);
72        if(rowTokenComparison != 0){
73          return fixRowTokenMissReverse(rowTokenComparison);
74        }
75  
76        //exact row found, move on to qualifier & ts
77        if(rowMatchesAfterCurrentPosition(key)){
78          return positionAtQualifierTimestamp(key, true);
79        }
80  
81        //detect dead end (no fan to descend into)
82        if(!currentRowNode.hasFan()){
83          if(hasOccurrences()){//must be leaf or nub
84            populateLastNonRowFields();
85            return CellScannerPosition.BEFORE;
86          }else{
87            //TODO i don't think this case is exercised by any tests
88            return fixRowFanMissReverse(0);
89          }
90        }
91  
92        //keep hunting for the rest of the row
93        byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth);
94        fanIndex = currentRowNode.whichFanNode(searchForByte);
95        if(fanIndex < 0){//no matching row.  return early
96          int insertionPoint = -fanIndex;
97          return fixRowFanMissReverse(insertionPoint);
98        }
99        //found a match, so dig deeper into the tree
100       followFan(fanIndex);
101     }
102   }
103 
104   /**
105    * Identical workflow as positionAtOrBefore, but split them to avoid having ~10 extra
106    * if-statements. Priority on readability and debugability.
107    */
108   @Override
109   public CellScannerPosition positionAtOrAfter(Cell key) {
110     reInitFirstNode();
111     int fanIndex = -1;
112 
113     while(true){
114       //detect row mismatch.  break loop if mismatch
115       int currentNodeDepth = rowLength;
116       int rowTokenComparison = compareToCurrentToken(key);
117       if(rowTokenComparison != 0){
118         return fixRowTokenMissForward(rowTokenComparison);
119       }
120 
121       //exact row found, move on to qualifier & ts
122       if(rowMatchesAfterCurrentPosition(key)){
123         return positionAtQualifierTimestamp(key, false);
124       }
125 
126       //detect dead end (no fan to descend into)
127       if(!currentRowNode.hasFan()){
128         if(hasOccurrences()){
129           if (rowLength < key.getRowLength()) {
130             nextRow();
131           } else {
132             populateFirstNonRowFields();
133           }
134           return CellScannerPosition.AFTER;
135         }else{
136           //TODO i don't think this case is exercised by any tests
137           return fixRowFanMissForward(0);
138         }
139       }
140 
141       //keep hunting for the rest of the row
142       byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth);
143       fanIndex = currentRowNode.whichFanNode(searchForByte);
144       if(fanIndex < 0){//no matching row.  return early
145         int insertionPoint = -fanIndex;
146         return fixRowFanMissForward(insertionPoint);
147       }
148       //found a match, so dig deeper into the tree
149       followFan(fanIndex);
150     }
151   }
152 
153   @Override
154   public boolean seekForwardTo(Cell key) {
155     if(currentPositionIsAfter(key)){
156       //our position is after the requested key, so can't do anything
157       return false;
158     }
159     return positionAt(key);
160   }
161 
162   @Override
163   public CellScannerPosition seekForwardToOrBefore(Cell key) {
164     //Do we even need this check or should upper layers avoid this situation.  It's relatively
165     //expensive compared to the rest of the seek operation.
166     if(currentPositionIsAfter(key)){
167       //our position is after the requested key, so can't do anything
168       return CellScannerPosition.AFTER;
169     }
170 
171     return positionAtOrBefore(key);
172   }
173 
174   @Override
175   public CellScannerPosition seekForwardToOrAfter(Cell key) {
176     //Do we even need this check or should upper layers avoid this situation.  It's relatively
177     //expensive compared to the rest of the seek operation.
178     if(currentPositionIsAfter(key)){
179       //our position is after the requested key, so can't do anything
180       return CellScannerPosition.AFTER;
181     }
182 
183     return positionAtOrAfter(key);
184   }
185 
186   /**
187    * The content of the buffers doesn't matter here, only that afterLast=true and beforeFirst=false
188    */
189   @Override
190   public void positionAfterLastCell() {
191     resetToBeforeFirstEntry();
192     beforeFirst = false;
193     afterLast = true;
194   }
195 
196 
197   /***************** Object methods ***************************/
198 
199   @Override
200   public boolean equals(Object obj) {
201     //trivial override to confirm intent (findbugs)
202     return super.equals(obj);
203   }
204 
205 
206   /****************** internal methods ************************/
207 
208   protected boolean currentPositionIsAfter(Cell cell){
209     return compareTo(cell) > 0;
210   }
211 
212   protected CellScannerPosition positionAtQualifierTimestamp(Cell key, boolean beforeOnMiss) {
213     int minIndex = 0;
214     int maxIndex = currentRowNode.getLastCellIndex();
215     int diff;
216     while (true) {
217       int midIndex = (maxIndex + minIndex) / 2;//don't worry about overflow
218       diff = populateNonRowFieldsAndCompareTo(midIndex, key);
219 
220       if (diff == 0) {// found exact match
221         return CellScannerPosition.AT;
222       } else if (minIndex == maxIndex) {// even termination case
223         break;
224       } else if ((minIndex + 1) == maxIndex) {// odd termination case
225         diff = populateNonRowFieldsAndCompareTo(maxIndex, key);
226         if(diff > 0){
227           diff = populateNonRowFieldsAndCompareTo(minIndex, key);
228         }
229         break;
230       } else if (diff < 0) {// keep going forward
231         minIndex = currentCellIndex;
232       } else {// went past it, back up
233         maxIndex = currentCellIndex;
234       }
235     }
236 
237     if (diff == 0) {
238       return CellScannerPosition.AT;
239 
240     } else if (diff < 0) {// we are before key
241       if (beforeOnMiss) {
242         return CellScannerPosition.BEFORE;
243       }
244       if (advance()) {
245         return CellScannerPosition.AFTER;
246       }
247       return CellScannerPosition.AFTER_LAST;
248 
249     } else {// we are after key
250       if (!beforeOnMiss) {
251         return CellScannerPosition.AFTER;
252       }
253       if (previous()) {
254         return CellScannerPosition.BEFORE;
255       }
256       return CellScannerPosition.BEFORE_FIRST;
257     }
258   }
259 
260   /**
261    * compare this.row to key.row but starting at the current rowLength
262    * @param key Cell being searched for
263    * @return true if row buffer contents match key.row
264    */
265   protected boolean rowMatchesAfterCurrentPosition(Cell key) {
266     if (!currentRowNode.hasOccurrences()) {
267       return false;
268     }
269     int thatRowLength = key.getRowLength();
270     if (rowLength != thatRowLength) {
271       return false;
272     }
273     return true;
274   }
275 
276   // TODO move part of this to Cell comparator?
277   /**
278    * Compare only the bytes within the window of the current token
279    * @param key
280    * @return return -1 if key is lessThan (before) this, 0 if equal, and 1 if key is after
281    */
282   protected int compareToCurrentToken(Cell key) {
283     int startIndex = rowLength - currentRowNode.getTokenLength();
284     int endIndexExclusive = startIndex + currentRowNode.getTokenLength();
285     for (int i = startIndex; i < endIndexExclusive; ++i) {
286       if (i >= key.getRowLength()) {// key was shorter, so it's first
287         return -1;
288       }
289       byte keyByte = CellUtil.getRowByte(key, i);
290       byte thisByte = rowBuffer[i];
291       if (keyByte == thisByte) {
292         continue;
293       }
294       return UnsignedBytes.compare(keyByte, thisByte);
295     }
296     return 0;
297   }
298 
299   protected void followLastFansUntilExhausted(){
300     while(currentRowNode.hasFan()){
301       followLastFan();
302     }
303   }
304 
305 
306 	/****************** complete seek when token mismatch ******************/
307 
308   /**
309    * @param searcherIsAfterInputKey <0: input key is before the searcher's position<br/>
310    *          >0: input key is after the searcher's position
311    */
312   protected CellScannerPosition fixRowTokenMissReverse(int searcherIsAfterInputKey) {
313     if (searcherIsAfterInputKey < 0) {//searcher position is after the input key, so back up
314       boolean foundPreviousRow = previousRow(true);
315       if(foundPreviousRow){
316         populateLastNonRowFields();
317         return CellScannerPosition.BEFORE;
318       }else{
319         return CellScannerPosition.BEFORE_FIRST;
320       }
321 
322     }else{//searcher position is before the input key
323       if(currentRowNode.hasOccurrences()){
324         populateFirstNonRowFields();
325         return CellScannerPosition.BEFORE;
326       }
327       boolean foundNextRow = nextRow();
328       if(foundNextRow){
329         return CellScannerPosition.AFTER;
330       }else{
331         return CellScannerPosition.AFTER_LAST;
332       }
333     }
334   }
335 
336   /**
337    * @param searcherIsAfterInputKey <0: input key is before the searcher's position<br/>
338    *                   >0: input key is after the searcher's position
339    */
340   protected CellScannerPosition fixRowTokenMissForward(int searcherIsAfterInputKey) {
341     if (searcherIsAfterInputKey < 0) {//searcher position is after the input key
342       if(currentRowNode.hasOccurrences()){
343         populateFirstNonRowFields();
344         return CellScannerPosition.AFTER;
345       }
346       boolean foundNextRow = nextRow();
347       if(foundNextRow){
348         return CellScannerPosition.AFTER;
349       }else{
350         return CellScannerPosition.AFTER_LAST;
351       }
352 
353     }else{//searcher position is before the input key, so go forward
354       discardCurrentRowNode(true);
355       boolean foundNextRow = nextRow();
356       if(foundNextRow){
357         return CellScannerPosition.AFTER;
358       }else{
359         return CellScannerPosition.AFTER_LAST;
360       }
361     }
362   }
363 
364 
365   /****************** complete seek when fan mismatch ******************/
366 
367   protected CellScannerPosition fixRowFanMissReverse(int fanInsertionPoint){
368     if(fanInsertionPoint == 0){//we need to back up a row
369       boolean foundPreviousRow = previousRow(true);//true -> position on last cell in row
370       if(foundPreviousRow){
371         populateLastNonRowFields();
372         return CellScannerPosition.BEFORE;
373       }
374       return CellScannerPosition.BEFORE_FIRST;
375     }
376 
377     //follow the previous fan, but then descend recursively forward
378     followFan(fanInsertionPoint - 1);
379     followLastFansUntilExhausted();
380     populateLastNonRowFields();
381     return CellScannerPosition.BEFORE;
382   }
383 
384   protected CellScannerPosition fixRowFanMissForward(int fanInsertionPoint){
385     if(fanInsertionPoint >= currentRowNode.getFanOut()){
386       discardCurrentRowNode(true);
387       if (!nextRow()) {
388         return CellScannerPosition.AFTER_LAST;
389       } else {
390         return CellScannerPosition.AFTER;
391       }
392     }
393 
394     followFan(fanInsertionPoint);
395     if(hasOccurrences()){
396       populateFirstNonRowFields();
397       return CellScannerPosition.AFTER;
398     }
399 
400     if(nextRowInternal()){
401       populateFirstNonRowFields();
402       return CellScannerPosition.AFTER;
403 
404     }else{
405       return CellScannerPosition.AFTER_LAST;
406     }
407   }
408 
409 }