View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hbase.codec.prefixtree.decode;
20  
21  import org.apache.hadoop.classification.InterfaceAudience;
22  import org.apache.hbase.Cell;
23  import org.apache.hbase.cell.CellScannerPosition;
24  import org.apache.hbase.cell.CellTool;
25  import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
26  import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
27  
28  import com.google.common.primitives.UnsignedBytes;
29  
30  /**
31   * Searcher extends the capabilities of the Scanner + ReversibleScanner to add the ability to
32   * position itself on a requested Cell without scanning through cells before it. The PrefixTree is
33   * set up to be a Trie of rows, so finding a particular row is extremely cheap.
34   * <p/>
35   * Once it finds the row, it does a binary search through the cells inside the row, which is not as
36   * fast as the trie search, but faster than iterating through every cell like existing block formats
37   * do. For this reason, this implementation is targeted towards schemas where rows are narrow enough
38   * to have several or many per block, and where you are generally looking for the entire row or the
39   * first cell. It will still be fast for wide rows or point queries, but could be improved upon.
40   */
41  @InterfaceAudience.Private
42  public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner implements
43      CellSearcher {
44  
45    /*************** construct ******************************/
46  
47    public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
48        int rowBufferLength, int qualifierBufferLength) {
49      super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength);
50    }
51  
52  
53    /********************* CellSearcher methods *******************/
54  
55    @Override
56    public boolean positionAt(Cell key) {
57      return CellScannerPosition.AT == positionAtOrAfter(key);
58    }
59  
60    @Override
61    public CellScannerPosition positionAtOrBefore(Cell key) {
62      reInitFirstNode();
63      int fanIndex = -1;
64  
65      while(true){
66        //detect row mismatch.  break loop if mismatch
67        int currentNodeDepth = rowLength;
68        int rowTokenComparison = compareToCurrentToken(key);
69        if(rowTokenComparison != 0){
70          return fixRowTokenMissReverse(rowTokenComparison);
71        }
72  
73        //exact row found, move on to qualifier & ts
74        if(rowMatchesAfterCurrentPosition(key)){
75          return positionAtQualifierTimestamp(key, true);
76        }
77  
78        //detect dead end (no fan to descend into)
79        if(!currentRowNode.hasFan()){
80          if(hasOccurrences()){//must be leaf or nub
81            populateLastNonRowFields();
82            return CellScannerPosition.BEFORE;
83          }else{
84            //TODO i don't think this case is exercised by any tests
85            return fixRowFanMissReverse(0);
86          }
87        }
88  
89        //keep hunting for the rest of the row
90        byte searchForByte = CellTool.getRowByte(key, currentNodeDepth);
91        fanIndex = currentRowNode.whichFanNode(searchForByte);
92        if(fanIndex < 0){//no matching row.  return early
93          int insertionPoint = -fanIndex;
94          return fixRowFanMissReverse(insertionPoint);
95        }
96        //found a match, so dig deeper into the tree
97        followFan(fanIndex);
98      }
99    }
100 
101   /**
102    * Identical workflow as positionAtOrBefore, but split them to avoid having ~10 extra
103    * if-statements. Priority on readability and debugability.
104    */
105   @Override
106   public CellScannerPosition positionAtOrAfter(Cell key) {
107     reInitFirstNode();
108     int fanIndex = -1;
109 
110     while(true){
111       //detect row mismatch.  break loop if mismatch
112       int currentNodeDepth = rowLength;
113       int rowTokenComparison = compareToCurrentToken(key);
114       if(rowTokenComparison != 0){
115         return fixRowTokenMissForward(rowTokenComparison);
116       }
117 
118       //exact row found, move on to qualifier & ts
119       if(rowMatchesAfterCurrentPosition(key)){
120         return positionAtQualifierTimestamp(key, false);
121       }
122 
123       //detect dead end (no fan to descend into)
124       if(!currentRowNode.hasFan()){
125         if(hasOccurrences()){
126           populateFirstNonRowFields();
127           return CellScannerPosition.AFTER;
128         }else{
129           //TODO i don't think this case is exercised by any tests
130           return fixRowFanMissForward(0);
131         }
132       }
133 
134       //keep hunting for the rest of the row
135       byte searchForByte = CellTool.getRowByte(key, currentNodeDepth);
136       fanIndex = currentRowNode.whichFanNode(searchForByte);
137       if(fanIndex < 0){//no matching row.  return early
138         int insertionPoint = -fanIndex;
139         return fixRowFanMissForward(insertionPoint);
140       }
141       //found a match, so dig deeper into the tree
142       followFan(fanIndex);
143     }
144   }
145 
146   @Override
147   public boolean seekForwardTo(Cell key) {
148     if(currentPositionIsAfter(key)){
149       //our position is after the requested key, so can't do anything
150       return false;
151     }
152     return positionAt(key);
153   }
154 
155   @Override
156   public CellScannerPosition seekForwardToOrBefore(Cell key) {
157     //Do we even need this check or should upper layers avoid this situation.  It's relatively
158     //expensive compared to the rest of the seek operation.
159     if(currentPositionIsAfter(key)){
160       //our position is after the requested key, so can't do anything
161       return CellScannerPosition.AFTER;
162     }
163 
164     return positionAtOrBefore(key);
165   }
166 
167   @Override
168   public CellScannerPosition seekForwardToOrAfter(Cell key) {
169     //Do we even need this check or should upper layers avoid this situation.  It's relatively
170     //expensive compared to the rest of the seek operation.
171     if(currentPositionIsAfter(key)){
172       //our position is after the requested key, so can't do anything
173       return CellScannerPosition.AFTER;
174     }
175 
176     return positionAtOrAfter(key);
177   }
178 
179   /**
180    * The content of the buffers doesn't matter here, only that afterLast=true and beforeFirst=false
181    */
182   @Override
183   public void positionAfterLastCell() {
184     resetToBeforeFirstEntry();
185     beforeFirst = false;
186     afterLast = true;
187   }
188 
189 
190   /***************** Object methods ***************************/
191 
192   @Override
193   public boolean equals(Object obj) {
194     //trivial override to confirm intent (findbugs)
195     return super.equals(obj);
196   }
197 
198 
199   /****************** internal methods ************************/
200 
201   protected boolean currentPositionIsAfter(Cell cell){
202     return compareTo(cell) > 0;
203   }
204 
205   protected CellScannerPosition positionAtQualifierTimestamp(Cell key, boolean beforeOnMiss) {
206     int minIndex = 0;
207     int maxIndex = currentRowNode.getLastCellIndex();
208     int diff;
209     while (true) {
210       int midIndex = (maxIndex + minIndex) / 2;//don't worry about overflow
211       diff = populateNonRowFieldsAndCompareTo(midIndex, key);
212 
213       if (diff == 0) {// found exact match
214         return CellScannerPosition.AT;
215       } else if (minIndex == maxIndex) {// even termination case
216         break;
217       } else if ((minIndex + 1) == maxIndex) {// odd termination case
218         diff = populateNonRowFieldsAndCompareTo(maxIndex, key);
219         if(diff > 0){
220           diff = populateNonRowFieldsAndCompareTo(minIndex, key);
221         }
222         break;
223       } else if (diff < 0) {// keep going forward
224         minIndex = currentCellIndex;
225       } else {// went past it, back up
226         maxIndex = currentCellIndex;
227       }
228     }
229 
230     if (diff == 0) {
231       return CellScannerPosition.AT;
232 
233     } else if (diff < 0) {// we are before key
234       if (beforeOnMiss) {
235         return CellScannerPosition.BEFORE;
236       }
237       if (next()) {
238         return CellScannerPosition.AFTER;
239       }
240       return CellScannerPosition.AFTER_LAST;
241 
242     } else {// we are after key
243       if (!beforeOnMiss) {
244         return CellScannerPosition.AFTER;
245       }
246       if (previous()) {
247         return CellScannerPosition.BEFORE;
248       }
249       return CellScannerPosition.BEFORE_FIRST;
250     }
251   }
252 
253   /**
254    * compare this.row to key.row but starting at the current rowLength
255    * @param key Cell being searched for
256    * @return true if row buffer contents match key.row
257    */
258   protected boolean rowMatchesAfterCurrentPosition(Cell key) {
259     if (!currentRowNode.hasOccurrences()) {
260       return false;
261     }
262     int thatRowLength = key.getRowLength();
263     if (rowLength != thatRowLength) {
264       return false;
265     }
266     return true;
267   }
268 
269   // TODO move part of this to Cell comparator?
270   /**
271    * Compare only the bytes within the window of the current token
272    * @param key
273    * @return return -1 if key is lessThan (before) this, 0 if equal, and 1 if key is after
274    */
275   protected int compareToCurrentToken(Cell key) {
276     int startIndex = rowLength - currentRowNode.getTokenLength();
277     int endIndexExclusive = startIndex + currentRowNode.getTokenLength();
278     for (int i = startIndex; i < endIndexExclusive; ++i) {
279       if (i >= key.getRowLength()) {// key was shorter, so it's first
280         return -1;
281       }
282       byte keyByte = CellTool.getRowByte(key, i);
283       byte thisByte = rowBuffer[i];
284       if (keyByte == thisByte) {
285         continue;
286       }
287       return UnsignedBytes.compare(keyByte, thisByte);
288     }
289     return 0;
290   }
291 
292   protected void followLastFansUntilExhausted(){
293     while(currentRowNode.hasFan()){
294       followLastFan();
295     }
296   }
297 
298 
299 	/****************** complete seek when token mismatch ******************/
300 
301   /**
302    * @param searcherIsAfterInputKey <0: input key is before the searcher's position<br/>
303    *          >0: input key is after the searcher's position
304    */
305   protected CellScannerPosition fixRowTokenMissReverse(int searcherIsAfterInputKey) {
306     if (searcherIsAfterInputKey < 0) {//searcher position is after the input key, so back up
307       boolean foundPreviousRow = previousRow(true);
308       if(foundPreviousRow){
309         populateLastNonRowFields();
310         return CellScannerPosition.BEFORE;
311       }else{
312         return CellScannerPosition.BEFORE_FIRST;
313       }
314 
315     }else{//searcher position is before the input key
316       if(currentRowNode.hasOccurrences()){
317         populateFirstNonRowFields();
318         return CellScannerPosition.BEFORE;
319       }
320       boolean foundNextRow = nextRow();
321       if(foundNextRow){
322         return CellScannerPosition.AFTER;
323       }else{
324         return CellScannerPosition.AFTER_LAST;
325       }
326     }
327   }
328 
329   /**
330    * @param searcherIsAfterInputKey <0: input key is before the searcher's position<br/>
331    *                   >0: input key is after the searcher's position
332    */
333   protected CellScannerPosition fixRowTokenMissForward(int searcherIsAfterInputKey) {
334     if (searcherIsAfterInputKey < 0) {//searcher position is after the input key
335       if(currentRowNode.hasOccurrences()){
336         populateFirstNonRowFields();
337         return CellScannerPosition.AFTER;
338       }
339       boolean foundNextRow = nextRow();
340       if(foundNextRow){
341         return CellScannerPosition.AFTER;
342       }else{
343         return CellScannerPosition.AFTER_LAST;
344       }
345 
346     }else{//searcher position is before the input key, so go forward
347       discardCurrentRowNode(true);
348       boolean foundNextRow = nextRow();
349       if(foundNextRow){
350         return CellScannerPosition.AFTER;
351       }else{
352         return CellScannerPosition.AFTER_LAST;
353       }
354     }
355   }
356 
357 
358   /****************** complete seek when fan mismatch ******************/
359 
360   protected CellScannerPosition fixRowFanMissReverse(int fanInsertionPoint){
361     if(fanInsertionPoint == 0){//we need to back up a row
362       boolean foundPreviousRow = previousRow(true);//true -> position on last cell in row
363       if(foundPreviousRow){
364         populateLastNonRowFields();
365         return CellScannerPosition.BEFORE;
366       }
367       return CellScannerPosition.BEFORE_FIRST;
368     }
369 
370     //follow the previous fan, but then descend recursively forward
371     followFan(fanInsertionPoint - 1);
372     followLastFansUntilExhausted();
373     populateLastNonRowFields();
374     return CellScannerPosition.BEFORE;
375   }
376 
377   protected CellScannerPosition fixRowFanMissForward(int fanInsertionPoint){
378     if(fanInsertionPoint >= currentRowNode.getFanOut()){
379       discardCurrentRowNode(true);
380       if (!nextRow()) {
381         return CellScannerPosition.AFTER_LAST;
382       } else {
383         return CellScannerPosition.AFTER;
384       }
385     }
386 
387     followFan(fanInsertionPoint);
388     if(hasOccurrences()){
389       populateFirstNonRowFields();
390       return CellScannerPosition.AFTER;
391     }
392 
393     if(nextRowInternal()){
394       populateFirstNonRowFields();
395       return CellScannerPosition.AFTER;
396 
397     }else{
398       return CellScannerPosition.AFTER_LAST;
399     }
400   }
401 
402 }