View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.codec.prefixtree.decode;
20  
21  import org.apache.hadoop.hbase.classification.InterfaceAudience;
22  import org.apache.hadoop.hbase.Cell;
23  import org.apache.hadoop.hbase.CellUtil;
24  import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
25  import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
26  import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
27  
28  import com.google.common.primitives.UnsignedBytes;
29  
30  /**
31   * <p>
32   * Searcher extends the capabilities of the Scanner + ReversibleScanner to add the ability to
33   * position itself on a requested Cell without scanning through cells before it. The PrefixTree is
34   * set up to be a Trie of rows, so finding a particular row is extremely cheap.
35   * </p>
36   * Once it finds the row, it does a binary search through the cells inside the row, which is not as
37   * fast as the trie search, but faster than iterating through every cell like existing block
38   * formats
39   * do. For this reason, this implementation is targeted towards schemas where rows are narrow
40   * enough
41   * to have several or many per block, and where you are generally looking for the entire row or
42   * the
43   * first cell. It will still be fast for wide rows or point queries, but could be improved upon.
44   */
45  @InterfaceAudience.Private
46  public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner implements
47      CellSearcher {
48  
49    /*************** construct ******************************/
50  
51    public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
52        int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) {
53      super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength, tagsBufferLength);
54    }
55  
56  
57    /********************* CellSearcher methods *******************/
58  
59    @Override
60    public boolean positionAt(Cell key) {
61      return CellScannerPosition.AT == positionAtOrAfter(key);
62    }
63  
64    @Override
65    public CellScannerPosition positionAtOrBefore(Cell key) {
66      reInitFirstNode();
67      int fanIndex = -1;
68  
69      while(true){
70        //detect row mismatch.  break loop if mismatch
71        int currentNodeDepth = rowLength;
72        int rowTokenComparison = compareToCurrentToken(key);
73        if(rowTokenComparison != 0){
74          return fixRowTokenMissReverse(rowTokenComparison);
75        }
76  
77        //exact row found, move on to qualifier & ts
78        if(rowMatchesAfterCurrentPosition(key)){
79          return positionAtQualifierTimestamp(key, true);
80        }
81  
82        //detect dead end (no fan to descend into)
83        if(!currentRowNode.hasFan()){
84          if(hasOccurrences()){//must be leaf or nub
85            populateLastNonRowFields();
86            return CellScannerPosition.BEFORE;
87          }else{
88            //TODO i don't think this case is exercised by any tests
89            return fixRowFanMissReverse(0);
90          }
91        }
92  
93        //keep hunting for the rest of the row
94        byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth);
95        fanIndex = currentRowNode.whichFanNode(searchForByte);
96        if(fanIndex < 0){//no matching row.  return early
97          int insertionPoint = -fanIndex - 1;
98          return fixRowFanMissReverse(insertionPoint);
99        }
100       //found a match, so dig deeper into the tree
101       followFan(fanIndex);
102     }
103   }
104 
105   /**
106    * Identical workflow as positionAtOrBefore, but split them to avoid having ~10 extra
107    * if-statements. Priority on readability and debugability.
108    */
109   @Override
110   public CellScannerPosition positionAtOrAfter(Cell key) {
111     reInitFirstNode();
112     int fanIndex = -1;
113 
114     while(true){
115       //detect row mismatch.  break loop if mismatch
116       int currentNodeDepth = rowLength;
117       int rowTokenComparison = compareToCurrentToken(key);
118       if(rowTokenComparison != 0){
119         return fixRowTokenMissForward(rowTokenComparison);
120       }
121 
122       //exact row found, move on to qualifier & ts
123       if(rowMatchesAfterCurrentPosition(key)){
124         return positionAtQualifierTimestamp(key, false);
125       }
126 
127       //detect dead end (no fan to descend into)
128       if(!currentRowNode.hasFan()){
129         if(hasOccurrences()){
130           if (rowLength < key.getRowLength()) {
131             nextRow();
132           } else {
133             populateFirstNonRowFields();
134           }
135           return CellScannerPosition.AFTER;
136         }else{
137           //TODO i don't think this case is exercised by any tests
138           return fixRowFanMissForward(0);
139         }
140       }
141 
142       //keep hunting for the rest of the row
143       byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth);
144       fanIndex = currentRowNode.whichFanNode(searchForByte);
145       if(fanIndex < 0){//no matching row.  return early
146         int insertionPoint = -fanIndex - 1;
147         return fixRowFanMissForward(insertionPoint);
148       }
149       //found a match, so dig deeper into the tree
150       followFan(fanIndex);
151     }
152   }
153 
154   @Override
155   public boolean seekForwardTo(Cell key) {
156     if(currentPositionIsAfter(key)){
157       //our position is after the requested key, so can't do anything
158       return false;
159     }
160     return positionAt(key);
161   }
162 
163   @Override
164   public CellScannerPosition seekForwardToOrBefore(Cell key) {
165     //Do we even need this check or should upper layers avoid this situation.  It's relatively
166     //expensive compared to the rest of the seek operation.
167     if(currentPositionIsAfter(key)){
168       //our position is after the requested key, so can't do anything
169       return CellScannerPosition.AFTER;
170     }
171 
172     return positionAtOrBefore(key);
173   }
174 
175   @Override
176   public CellScannerPosition seekForwardToOrAfter(Cell key) {
177     //Do we even need this check or should upper layers avoid this situation.  It's relatively
178     //expensive compared to the rest of the seek operation.
179     if(currentPositionIsAfter(key)){
180       //our position is after the requested key, so can't do anything
181       return CellScannerPosition.AFTER;
182     }
183 
184     return positionAtOrAfter(key);
185   }
186 
187   /**
188    * The content of the buffers doesn't matter here, only that afterLast=true and beforeFirst=false
189    */
190   @Override
191   public void positionAfterLastCell() {
192     resetToBeforeFirstEntry();
193     beforeFirst = false;
194     afterLast = true;
195   }
196 
197 
198   /***************** Object methods ***************************/
199 
200   @Override
201   public boolean equals(Object obj) {
202     //trivial override to confirm intent (findbugs)
203     return super.equals(obj);
204   }
205 
206 
207   /****************** internal methods ************************/
208 
209   protected boolean currentPositionIsAfter(Cell cell){
210     return compareTo(cell) > 0;
211   }
212 
213   protected CellScannerPosition positionAtQualifierTimestamp(Cell key, boolean beforeOnMiss) {
214     int minIndex = 0;
215     int maxIndex = currentRowNode.getLastCellIndex();
216     int diff;
217     while (true) {
218       int midIndex = (maxIndex + minIndex) / 2;//don't worry about overflow
219       diff = populateNonRowFieldsAndCompareTo(midIndex, key);
220 
221       if (diff == 0) {// found exact match
222         return CellScannerPosition.AT;
223       } else if (minIndex == maxIndex) {// even termination case
224         break;
225       } else if ((minIndex + 1) == maxIndex) {// odd termination case
226         diff = populateNonRowFieldsAndCompareTo(maxIndex, key);
227         if(diff > 0){
228           diff = populateNonRowFieldsAndCompareTo(minIndex, key);
229         }
230         break;
231       } else if (diff < 0) {// keep going forward
232         minIndex = currentCellIndex;
233       } else {// went past it, back up
234         maxIndex = currentCellIndex;
235       }
236     }
237 
238     if (diff == 0) {
239       return CellScannerPosition.AT;
240 
241     } else if (diff < 0) {// we are before key
242       if (beforeOnMiss) {
243         return CellScannerPosition.BEFORE;
244       }
245       if (advance()) {
246         return CellScannerPosition.AFTER;
247       }
248       return CellScannerPosition.AFTER_LAST;
249 
250     } else {// we are after key
251       if (!beforeOnMiss) {
252         return CellScannerPosition.AFTER;
253       }
254       if (previous()) {
255         return CellScannerPosition.BEFORE;
256       }
257       return CellScannerPosition.BEFORE_FIRST;
258     }
259   }
260 
261   /**
262    * compare this.row to key.row but starting at the current rowLength
263    * @param key Cell being searched for
264    * @return true if row buffer contents match key.row
265    */
266   protected boolean rowMatchesAfterCurrentPosition(Cell key) {
267     if (!currentRowNode.hasOccurrences()) {
268       return false;
269     }
270     int thatRowLength = key.getRowLength();
271     if (rowLength != thatRowLength) {
272       return false;
273     }
274     return true;
275   }
276 
277   // TODO move part of this to Cell comparator?
278   /**
279    * Compare only the bytes within the window of the current token
280    * @param key
281    * @return return -1 if key is lessThan (before) this, 0 if equal, and 1 if key is after
282    */
283   protected int compareToCurrentToken(Cell key) {
284     int startIndex = rowLength - currentRowNode.getTokenLength();
285     int endIndexExclusive = startIndex + currentRowNode.getTokenLength();
286     for (int i = startIndex; i < endIndexExclusive; ++i) {
287       if (i >= key.getRowLength()) {// key was shorter, so it's first
288         return -1;
289       }
290       byte keyByte = CellUtil.getRowByte(key, i);
291       byte thisByte = rowBuffer[i];
292       if (keyByte == thisByte) {
293         continue;
294       }
295       return UnsignedBytes.compare(keyByte, thisByte);
296     }
297     if (!currentRowNode.hasOccurrences() && rowLength >= key.getRowLength()) { // key was shorter
298         return -1;
299     }
300     return 0;
301   }
302 
303   protected void followLastFansUntilExhausted(){
304     while(currentRowNode.hasFan()){
305       followLastFan();
306     }
307   }
308 
309 
310   /****************** complete seek when token mismatch ******************/
311 
312   /**
313    * @param searcherIsAfterInputKey &lt;0: input key is before the searcher's position<br>
314    *          &gt;0: input key is after the searcher's position
315    */
316   protected CellScannerPosition fixRowTokenMissReverse(int searcherIsAfterInputKey) {
317     if (searcherIsAfterInputKey < 0) {//searcher position is after the input key, so back up
318       boolean foundPreviousRow = previousRow(true);
319       if(foundPreviousRow){
320         populateLastNonRowFields();
321         return CellScannerPosition.BEFORE;
322       }else{
323         return CellScannerPosition.BEFORE_FIRST;
324       }
325 
326     }else{//searcher position is before the input key
327       if(currentRowNode.hasOccurrences()){
328         populateFirstNonRowFields();
329         return CellScannerPosition.BEFORE;
330       }
331       boolean foundNextRow = nextRow();
332       if(foundNextRow){
333         return CellScannerPosition.AFTER;
334       }else{
335         return CellScannerPosition.AFTER_LAST;
336       }
337     }
338   }
339 
340   /**
341    * @param searcherIsAfterInputKey &lt;0: input key is before the searcher's position<br>
342    *                   &gt;0: input key is after the searcher's position
343    */
344   protected CellScannerPosition fixRowTokenMissForward(int searcherIsAfterInputKey) {
345     if (searcherIsAfterInputKey < 0) {//searcher position is after the input key
346       if(currentRowNode.hasOccurrences()){
347         populateFirstNonRowFields();
348         return CellScannerPosition.AFTER;
349       }
350       boolean foundNextRow = nextRow();
351       if(foundNextRow){
352         return CellScannerPosition.AFTER;
353       }else{
354         return CellScannerPosition.AFTER_LAST;
355       }
356 
357     }else{//searcher position is before the input key, so go forward
358       discardCurrentRowNode(true);
359       boolean foundNextRow = nextRow();
360       if(foundNextRow){
361         return CellScannerPosition.AFTER;
362       }else{
363         return CellScannerPosition.AFTER_LAST;
364       }
365     }
366   }
367 
368 
369   /****************** complete seek when fan mismatch ******************/
370 
371   protected CellScannerPosition fixRowFanMissReverse(int fanInsertionPoint){
372     if(fanInsertionPoint == 0){//we need to back up a row
373       if (currentRowNode.hasOccurrences()) {
374         populateLastNonRowFields();
375         return CellScannerPosition.BEFORE;
376       }
377       boolean foundPreviousRow = previousRow(true);//true -> position on last cell in row
378       if(foundPreviousRow){
379         populateLastNonRowFields();
380         return CellScannerPosition.BEFORE;
381       }
382       return CellScannerPosition.BEFORE_FIRST;
383     }
384 
385     //follow the previous fan, but then descend recursively forward
386     followFan(fanInsertionPoint - 1);
387     followLastFansUntilExhausted();
388     populateLastNonRowFields();
389     return CellScannerPosition.BEFORE;
390   }
391 
392   protected CellScannerPosition fixRowFanMissForward(int fanInsertionPoint){
393     if(fanInsertionPoint >= currentRowNode.getFanOut()){
394       discardCurrentRowNode(true);
395       if (!nextRow()) {
396         return CellScannerPosition.AFTER_LAST;
397       } else {
398         return CellScannerPosition.AFTER;
399       }
400     }
401 
402     followFan(fanInsertionPoint);
403     if(hasOccurrences()){
404       populateFirstNonRowFields();
405       return CellScannerPosition.AFTER;
406     }
407 
408     if(nextRowInternal()){
409       populateFirstNonRowFields();
410       return CellScannerPosition.AFTER;
411 
412     }else{
413       return CellScannerPosition.AFTER_LAST;
414     }
415   }
416 
417 }