View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.codec.prefixtree.decode;
20  
21  import org.apache.hadoop.hbase.classification.InterfaceAudience;
22  import org.apache.hadoop.hbase.Cell;
23  import org.apache.hadoop.hbase.CellUtil;
24  import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
25  import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
26  import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
27  
28  import com.google.common.primitives.UnsignedBytes;
29  
30  /**
31   * Searcher extends the capabilities of the Scanner + ReversibleScanner to add the ability to
32   * position itself on a requested Cell without scanning through cells before it. The PrefixTree is
33   * set up to be a Trie of rows, so finding a particular row is extremely cheap.
34   * <p/>
35   * Once it finds the row, it does a binary search through the cells inside the row, which is not as
36   * fast as the trie search, but faster than iterating through every cell like existing block
37   * formats
38   * do. For this reason, this implementation is targeted towards schemas where rows are narrow
39   * enough
40   * to have several or many per block, and where you are generally looking for the entire row or
41   * the
42   * first cell. It will still be fast for wide rows or point queries, but could be improved upon.
43   */
44  @InterfaceAudience.Private
45  public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner implements
46      CellSearcher {
47  
48    /*************** construct ******************************/
49  
50    public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
51        int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) {
52      super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength, tagsBufferLength);
53    }
54  
55  
56    /********************* CellSearcher methods *******************/
57  
58    @Override
59    public boolean positionAt(Cell key) {
60      return CellScannerPosition.AT == positionAtOrAfter(key);
61    }
62  
63    @Override
64    public CellScannerPosition positionAtOrBefore(Cell key) {
65      reInitFirstNode();
66      int fanIndex = -1;
67  
68      while(true){
69        //detect row mismatch.  break loop if mismatch
70        int currentNodeDepth = rowLength;
71        int rowTokenComparison = compareToCurrentToken(key);
72        if(rowTokenComparison != 0){
73          return fixRowTokenMissReverse(rowTokenComparison);
74        }
75  
76        //exact row found, move on to qualifier & ts
77        if(rowMatchesAfterCurrentPosition(key)){
78          return positionAtQualifierTimestamp(key, true);
79        }
80  
81        //detect dead end (no fan to descend into)
82        if(!currentRowNode.hasFan()){
83          if(hasOccurrences()){//must be leaf or nub
84            populateLastNonRowFields();
85            return CellScannerPosition.BEFORE;
86          }else{
87            //TODO i don't think this case is exercised by any tests
88            return fixRowFanMissReverse(0);
89          }
90        }
91  
92        //keep hunting for the rest of the row
93        byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth);
94        fanIndex = currentRowNode.whichFanNode(searchForByte);
95        if(fanIndex < 0){//no matching row.  return early
96          int insertionPoint = -fanIndex - 1;
97          return fixRowFanMissReverse(insertionPoint);
98        }
99        //found a match, so dig deeper into the tree
100       followFan(fanIndex);
101     }
102   }
103 
104   /**
105    * Identical workflow as positionAtOrBefore, but split them to avoid having ~10 extra
106    * if-statements. Priority on readability and debugability.
107    */
108   @Override
109   public CellScannerPosition positionAtOrAfter(Cell key) {
110     reInitFirstNode();
111     int fanIndex = -1;
112 
113     while(true){
114       //detect row mismatch.  break loop if mismatch
115       int currentNodeDepth = rowLength;
116       int rowTokenComparison = compareToCurrentToken(key);
117       if(rowTokenComparison != 0){
118         return fixRowTokenMissForward(rowTokenComparison);
119       }
120 
121       //exact row found, move on to qualifier & ts
122       if(rowMatchesAfterCurrentPosition(key)){
123         return positionAtQualifierTimestamp(key, false);
124       }
125 
126       //detect dead end (no fan to descend into)
127       if(!currentRowNode.hasFan()){
128         if(hasOccurrences()){
129           if (rowLength < key.getRowLength()) {
130             nextRow();
131           } else {
132             populateFirstNonRowFields();
133           }
134           return CellScannerPosition.AFTER;
135         }else{
136           //TODO i don't think this case is exercised by any tests
137           return fixRowFanMissForward(0);
138         }
139       }
140 
141       //keep hunting for the rest of the row
142       byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth);
143       fanIndex = currentRowNode.whichFanNode(searchForByte);
144       if(fanIndex < 0){//no matching row.  return early
145         int insertionPoint = -fanIndex - 1;
146         return fixRowFanMissForward(insertionPoint);
147       }
148       //found a match, so dig deeper into the tree
149       followFan(fanIndex);
150     }
151   }
152 
153   @Override
154   public boolean seekForwardTo(Cell key) {
155     if(currentPositionIsAfter(key)){
156       //our position is after the requested key, so can't do anything
157       return false;
158     }
159     return positionAt(key);
160   }
161 
162   @Override
163   public CellScannerPosition seekForwardToOrBefore(Cell key) {
164     //Do we even need this check or should upper layers avoid this situation.  It's relatively
165     //expensive compared to the rest of the seek operation.
166     if(currentPositionIsAfter(key)){
167       //our position is after the requested key, so can't do anything
168       return CellScannerPosition.AFTER;
169     }
170 
171     return positionAtOrBefore(key);
172   }
173 
174   @Override
175   public CellScannerPosition seekForwardToOrAfter(Cell key) {
176     //Do we even need this check or should upper layers avoid this situation.  It's relatively
177     //expensive compared to the rest of the seek operation.
178     if(currentPositionIsAfter(key)){
179       //our position is after the requested key, so can't do anything
180       return CellScannerPosition.AFTER;
181     }
182 
183     return positionAtOrAfter(key);
184   }
185 
186   /**
187    * The content of the buffers doesn't matter here, only that afterLast=true and beforeFirst=false
188    */
189   @Override
190   public void positionAfterLastCell() {
191     resetToBeforeFirstEntry();
192     beforeFirst = false;
193     afterLast = true;
194   }
195 
196 
197   /***************** Object methods ***************************/
198 
199   @Override
200   public boolean equals(Object obj) {
201     //trivial override to confirm intent (findbugs)
202     return super.equals(obj);
203   }
204 
205 
206   /****************** internal methods ************************/
207 
208   protected boolean currentPositionIsAfter(Cell cell){
209     return compareTo(cell) > 0;
210   }
211 
212   protected CellScannerPosition positionAtQualifierTimestamp(Cell key, boolean beforeOnMiss) {
213     int minIndex = 0;
214     int maxIndex = currentRowNode.getLastCellIndex();
215     int diff;
216     while (true) {
217       int midIndex = (maxIndex + minIndex) / 2;//don't worry about overflow
218       diff = populateNonRowFieldsAndCompareTo(midIndex, key);
219 
220       if (diff == 0) {// found exact match
221         return CellScannerPosition.AT;
222       } else if (minIndex == maxIndex) {// even termination case
223         break;
224       } else if ((minIndex + 1) == maxIndex) {// odd termination case
225         diff = populateNonRowFieldsAndCompareTo(maxIndex, key);
226         if(diff > 0){
227           diff = populateNonRowFieldsAndCompareTo(minIndex, key);
228         }
229         break;
230       } else if (diff < 0) {// keep going forward
231         minIndex = currentCellIndex;
232       } else {// went past it, back up
233         maxIndex = currentCellIndex;
234       }
235     }
236 
237     if (diff == 0) {
238       return CellScannerPosition.AT;
239 
240     } else if (diff < 0) {// we are before key
241       if (beforeOnMiss) {
242         return CellScannerPosition.BEFORE;
243       }
244       if (advance()) {
245         return CellScannerPosition.AFTER;
246       }
247       return CellScannerPosition.AFTER_LAST;
248 
249     } else {// we are after key
250       if (!beforeOnMiss) {
251         return CellScannerPosition.AFTER;
252       }
253       if (previous()) {
254         return CellScannerPosition.BEFORE;
255       }
256       return CellScannerPosition.BEFORE_FIRST;
257     }
258   }
259 
260   /**
261    * compare this.row to key.row but starting at the current rowLength
262    * @param key Cell being searched for
263    * @return true if row buffer contents match key.row
264    */
265   protected boolean rowMatchesAfterCurrentPosition(Cell key) {
266     if (!currentRowNode.hasOccurrences()) {
267       return false;
268     }
269     int thatRowLength = key.getRowLength();
270     if (rowLength != thatRowLength) {
271       return false;
272     }
273     return true;
274   }
275 
276   // TODO move part of this to Cell comparator?
277   /**
278    * Compare only the bytes within the window of the current token
279    * @param key
280    * @return return -1 if key is lessThan (before) this, 0 if equal, and 1 if key is after
281    */
282   protected int compareToCurrentToken(Cell key) {
283     int startIndex = rowLength - currentRowNode.getTokenLength();
284     int endIndexExclusive = startIndex + currentRowNode.getTokenLength();
285     for (int i = startIndex; i < endIndexExclusive; ++i) {
286       if (i >= key.getRowLength()) {// key was shorter, so it's first
287         return -1;
288       }
289       byte keyByte = CellUtil.getRowByte(key, i);
290       byte thisByte = rowBuffer[i];
291       if (keyByte == thisByte) {
292         continue;
293       }
294       return UnsignedBytes.compare(keyByte, thisByte);
295     }
296     if (!currentRowNode.hasOccurrences() && rowLength >= key.getRowLength()) { // key was shorter
297         return -1;
298     }
299     return 0;
300   }
301 
302   protected void followLastFansUntilExhausted(){
303     while(currentRowNode.hasFan()){
304       followLastFan();
305     }
306   }
307 
308 
309   /****************** complete seek when token mismatch ******************/
310 
311   /**
312    * @param searcherIsAfterInputKey <0: input key is before the searcher's position<br/>
313    *          >0: input key is after the searcher's position
314    */
315   protected CellScannerPosition fixRowTokenMissReverse(int searcherIsAfterInputKey) {
316     if (searcherIsAfterInputKey < 0) {//searcher position is after the input key, so back up
317       boolean foundPreviousRow = previousRow(true);
318       if(foundPreviousRow){
319         populateLastNonRowFields();
320         return CellScannerPosition.BEFORE;
321       }else{
322         return CellScannerPosition.BEFORE_FIRST;
323       }
324 
325     }else{//searcher position is before the input key
326       if(currentRowNode.hasOccurrences()){
327         populateFirstNonRowFields();
328         return CellScannerPosition.BEFORE;
329       }
330       boolean foundNextRow = nextRow();
331       if(foundNextRow){
332         return CellScannerPosition.AFTER;
333       }else{
334         return CellScannerPosition.AFTER_LAST;
335       }
336     }
337   }
338 
339   /**
340    * @param searcherIsAfterInputKey <0: input key is before the searcher's position<br/>
341    *                   >0: input key is after the searcher's position
342    */
343   protected CellScannerPosition fixRowTokenMissForward(int searcherIsAfterInputKey) {
344     if (searcherIsAfterInputKey < 0) {//searcher position is after the input key
345       if(currentRowNode.hasOccurrences()){
346         populateFirstNonRowFields();
347         return CellScannerPosition.AFTER;
348       }
349       boolean foundNextRow = nextRow();
350       if(foundNextRow){
351         return CellScannerPosition.AFTER;
352       }else{
353         return CellScannerPosition.AFTER_LAST;
354       }
355 
356     }else{//searcher position is before the input key, so go forward
357       discardCurrentRowNode(true);
358       boolean foundNextRow = nextRow();
359       if(foundNextRow){
360         return CellScannerPosition.AFTER;
361       }else{
362         return CellScannerPosition.AFTER_LAST;
363       }
364     }
365   }
366 
367 
368   /****************** complete seek when fan mismatch ******************/
369 
370   protected CellScannerPosition fixRowFanMissReverse(int fanInsertionPoint){
371     if(fanInsertionPoint == 0){//we need to back up a row
372       if (currentRowNode.hasOccurrences()) {
373         populateLastNonRowFields();
374         return CellScannerPosition.BEFORE;
375       }
376       boolean foundPreviousRow = previousRow(true);//true -> position on last cell in row
377       if(foundPreviousRow){
378         populateLastNonRowFields();
379         return CellScannerPosition.BEFORE;
380       }
381       return CellScannerPosition.BEFORE_FIRST;
382     }
383 
384     //follow the previous fan, but then descend recursively forward
385     followFan(fanInsertionPoint - 1);
386     followLastFansUntilExhausted();
387     populateLastNonRowFields();
388     return CellScannerPosition.BEFORE;
389   }
390 
391   protected CellScannerPosition fixRowFanMissForward(int fanInsertionPoint){
392     if(fanInsertionPoint >= currentRowNode.getFanOut()){
393       discardCurrentRowNode(true);
394       if (!nextRow()) {
395         return CellScannerPosition.AFTER_LAST;
396       } else {
397         return CellScannerPosition.AFTER;
398       }
399     }
400 
401     followFan(fanInsertionPoint);
402     if(hasOccurrences()){
403       populateFirstNonRowFields();
404       return CellScannerPosition.AFTER;
405     }
406 
407     if(nextRowInternal()){
408       populateFirstNonRowFields();
409       return CellScannerPosition.AFTER;
410 
411     }else{
412       return CellScannerPosition.AFTER_LAST;
413     }
414   }
415 
416 }