View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.util.NavigableSet;
24  
25  import org.apache.hadoop.hbase.KeyValue.Type;
26  import org.apache.hadoop.hbase.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.Cell;
28  import org.apache.hadoop.hbase.CellUtil;
29  import org.apache.hadoop.hbase.HConstants;
30  import org.apache.hadoop.hbase.KeepDeletedCells;
31  import org.apache.hadoop.hbase.KeyValue;
32  import org.apache.hadoop.hbase.KeyValueUtil;
33  import org.apache.hadoop.hbase.client.Scan;
34  import org.apache.hadoop.hbase.filter.Filter;
35  import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
36  import org.apache.hadoop.hbase.io.TimeRange;
37  import org.apache.hadoop.hbase.regionserver.DeleteTracker.DeleteResult;
38  import org.apache.hadoop.hbase.util.Bytes;
39  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
40  
41  import com.google.common.base.Preconditions;
42  
43  /**
44   * A query matcher that is specifically designed for the scan case.
45   */
46  @InterfaceAudience.Private
47  public class ScanQueryMatcher {
48    // Optimization so we can skip lots of compares when we decide to skip
49    // to the next row.
50    private boolean stickyNextRow;
51    private final byte[] stopRow;
52  
53    private final TimeRange tr;
54  
55    private final Filter filter;
56  
57    /** Keeps track of deletes */
58    private final DeleteTracker deletes;
59  
60    /*
61     * The following three booleans define how we deal with deletes.
62     * There are three different aspects:
63     * 1. Whether to keep delete markers. This is used in compactions.
64     *    Minor compactions always keep delete markers.
65     * 2. Whether to keep deleted rows. This is also used in compactions,
66     *    if the store is set to keep deleted rows. This implies keeping
67     *    the delete markers as well.
68     *    In this case deleted rows are subject to the normal max version
69     *    and TTL/min version rules just like "normal" rows.
70     * 3. Whether a scan can do time travel queries even before deleted
71     *    marker to reach deleted rows.
72     */
73    /** whether to retain delete markers */
74    private boolean retainDeletesInOutput;
75  
76    /** whether to return deleted rows */
77    private final KeepDeletedCells keepDeletedCells;
78    /** whether time range queries can see rows "behind" a delete */
79    private final boolean seePastDeleteMarkers;
80  
81  
82    /** Keeps track of columns and versions */
83    private final ColumnTracker columns;
84  
85    /** Key to seek to in memstore and StoreFiles */
86    private final Cell startKey;
87  
88    /** Row comparator for the region this query is for */
89    private final KeyValue.KVComparator rowComparator;
90  
91    /* row is not private for tests */
92    /** Row the query is on */
93    byte [] row;
94    int rowOffset;
95    short rowLength;
96    
97    /**
98     * Oldest put in any of the involved store files
99     * Used to decide whether it is ok to delete
100    * family delete marker of this store keeps
101    * deleted KVs.
102    */
103   private final long earliestPutTs;
104   private final long ttl;
105 
106   /** The oldest timestamp we are interested in, based on TTL */
107   private final long oldestUnexpiredTS;
108   private final long now;
109 
110   /** readPoint over which the KVs are unconditionally included */
111   protected long maxReadPointToTrackVersions;
112 
113   private byte[] dropDeletesFromRow = null, dropDeletesToRow = null;
114 
115   /**
116    * This variable shows whether there is an null column in the query. There
117    * always exists a null column in the wildcard column query.
118    * There maybe exists a null column in the explicit column query based on the
119    * first column.
120    * */
121   private boolean hasNullColumn = true;
122   
123   private RegionCoprocessorHost regionCoprocessorHost= null;
124 
125   // By default, when hbase.hstore.time.to.purge.deletes is 0ms, a delete
126   // marker is always removed during a major compaction. If set to non-zero
127   // value then major compaction will try to keep a delete marker around for
128   // the given number of milliseconds. We want to keep the delete markers
129   // around a bit longer because old puts might appear out-of-order. For
130   // example, during log replication between two clusters.
131   //
132   // If the delete marker has lived longer than its column-family's TTL then
133   // the delete marker will be removed even if time.to.purge.deletes has not
134   // passed. This is because all the Puts that this delete marker can influence
135   // would have also expired. (Removing of delete markers on col family TTL will
136   // not happen if min-versions is set to non-zero)
137   //
138   // But, if time.to.purge.deletes has not expired then a delete
139   // marker will not be removed just because there are no Puts that it is
140   // currently influencing. This is because Puts, that this delete can
141   // influence.  may appear out of order.
142   private final long timeToPurgeDeletes;
143   
144   private final boolean isUserScan;
145 
146   private final boolean isReversed;
147 
148   /**
149    * Construct a QueryMatcher for a scan
150    * @param scan
151    * @param scanInfo The store's immutable scan info
152    * @param columns
153    * @param scanType Type of the scan
154    * @param earliestPutTs Earliest put seen in any of the store files.
155    * @param oldestUnexpiredTS the oldest timestamp we are interested in,
156    *  based on TTL
157    * @param regionCoprocessorHost 
158    * @throws IOException 
159    */
160   public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet<byte[]> columns,
161       ScanType scanType, long readPointToUse, long earliestPutTs, long oldestUnexpiredTS,
162       long now, RegionCoprocessorHost regionCoprocessorHost) throws IOException {
163     TimeRange timeRange = scan.getColumnFamilyTimeRange().get(scanInfo.getFamily());
164     if (timeRange == null) {
165       this.tr = scan.getTimeRange();
166     } else {
167       this.tr = timeRange;
168     }
169     this.rowComparator = scanInfo.getComparator();
170     this.regionCoprocessorHost = regionCoprocessorHost;
171     this.deletes =  instantiateDeleteTracker();
172     this.stopRow = scan.getStopRow();
173     this.startKey = KeyValueUtil.createFirstDeleteFamilyOnRow(scan.getStartRow(),
174         scanInfo.getFamily());
175     this.filter = scan.getFilter();
176     this.earliestPutTs = earliestPutTs;
177     this.oldestUnexpiredTS = oldestUnexpiredTS;
178     this.now = now;
179 
180     this.maxReadPointToTrackVersions = readPointToUse;
181     this.timeToPurgeDeletes = scanInfo.getTimeToPurgeDeletes();
182     this.ttl = oldestUnexpiredTS;
183 
184     /* how to deal with deletes */
185     this.isUserScan = scanType == ScanType.USER_SCAN;
186     // keep deleted cells: if compaction or raw scan
187     this.keepDeletedCells = scan.isRaw() ? KeepDeletedCells.TRUE :
188       isUserScan ? KeepDeletedCells.FALSE : scanInfo.getKeepDeletedCells();
189     // retain deletes: if minor compaction or raw scanisDone
190     this.retainDeletesInOutput = scanType == ScanType.COMPACT_RETAIN_DELETES || scan.isRaw();
191     // seePastDeleteMarker: user initiated scans
192     this.seePastDeleteMarkers =
193         scanInfo.getKeepDeletedCells() != KeepDeletedCells.FALSE && isUserScan;
194 
195     int maxVersions =
196         scan.isRaw() ? scan.getMaxVersions() : Math.min(scan.getMaxVersions(),
197           scanInfo.getMaxVersions());
198 
199     // Single branch to deal with two types of reads (columns vs all in family)
200     if (columns == null || columns.size() == 0) {
201       // there is always a null column in the wildcard column query.
202       hasNullColumn = true;
203 
204       // use a specialized scan for wildcard column tracker.
205       this.columns = new ScanWildcardColumnTracker(
206           scanInfo.getMinVersions(), maxVersions, oldestUnexpiredTS);
207     } else {
208       // whether there is null column in the explicit column query
209       hasNullColumn = (columns.first().length == 0);
210 
211       // We can share the ExplicitColumnTracker, diff is we reset
212       // between rows, not between storefiles.
213       this.columns = new ExplicitColumnTracker(columns, scanInfo.getMinVersions(), maxVersions,
214           oldestUnexpiredTS);
215     }
216     this.isReversed = scan.isReversed();
217   }
218 
219   private DeleteTracker instantiateDeleteTracker() throws IOException {
220     DeleteTracker tracker = new ScanDeleteTracker();
221     if (regionCoprocessorHost != null) {
222       tracker = regionCoprocessorHost.postInstantiateDeleteTracker(tracker);
223     }
224     return tracker;
225   }
226 
227   /**
228    * Construct a QueryMatcher for a scan that drop deletes from a limited range of rows.
229    * @param scan
230    * @param scanInfo The store's immutable scan info
231    * @param columns
232    * @param earliestPutTs Earliest put seen in any of the store files.
233    * @param oldestUnexpiredTS the oldest timestamp we are interested in, based on TTL
234    * @param now the current server time
235    * @param dropDeletesFromRow The inclusive left bound of the range; can be EMPTY_START_ROW.
236    * @param dropDeletesToRow The exclusive right bound of the range; can be EMPTY_END_ROW.
237    * @param regionCoprocessorHost 
238    * @throws IOException 
239    */
240   public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet<byte[]> columns,
241       long readPointToUse, long earliestPutTs, long oldestUnexpiredTS, long now,
242       byte[] dropDeletesFromRow, byte[] dropDeletesToRow,
243       RegionCoprocessorHost regionCoprocessorHost) throws IOException {
244     this(scan, scanInfo, columns, ScanType.COMPACT_RETAIN_DELETES, readPointToUse, earliestPutTs,
245         oldestUnexpiredTS, now, regionCoprocessorHost);
246     Preconditions.checkArgument((dropDeletesFromRow != null) && (dropDeletesToRow != null));
247     this.dropDeletesFromRow = dropDeletesFromRow;
248     this.dropDeletesToRow = dropDeletesToRow;
249   }
250 
251   /*
252    * Constructor for tests
253    */
254   ScanQueryMatcher(Scan scan, ScanInfo scanInfo,
255       NavigableSet<byte[]> columns, long oldestUnexpiredTS, long now) throws IOException {
256     this(scan, scanInfo, columns, ScanType.USER_SCAN,
257           Long.MAX_VALUE, /* max Readpoint to track versions */
258         HConstants.LATEST_TIMESTAMP, oldestUnexpiredTS, now, null);
259   }
260 
261   /**
262    *
263    * @return  whether there is an null column in the query
264    */
265   public boolean hasNullColumnInQuery() {
266     return hasNullColumn;
267   }
268 
269   /**
270    * Determines if the caller should do one of several things:
271    * - seek/skip to the next row (MatchCode.SEEK_NEXT_ROW)
272    * - seek/skip to the next column (MatchCode.SEEK_NEXT_COL)
273    * - include the current KeyValue (MatchCode.INCLUDE)
274    * - ignore the current KeyValue (MatchCode.SKIP)
275    * - got to the next row (MatchCode.DONE)
276    *
277    * @param cell KeyValue to check
278    * @return The match code instance.
279    * @throws IOException in case there is an internal consistency problem
280    *      caused by a data corruption.
281    */
282   public MatchCode match(Cell cell) throws IOException {
283       if (filter != null && filter.filterAllRemaining()) {
284       return MatchCode.DONE_SCAN;
285     }
286     if (row != null) {
287       int ret = this.rowComparator.compareRows(row, this.rowOffset, this.rowLength,
288         cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
289       if (!this.isReversed) {
290         if (ret <= -1) {
291           return MatchCode.DONE;
292         } else if (ret >= 1) {
293           // could optimize this, if necessary?
294           // Could also be called SEEK_TO_CURRENT_ROW, but this
295           // should be rare/never happens.
296           return MatchCode.SEEK_NEXT_ROW;
297         }
298       } else {
299         if (ret <= -1) {
300           return MatchCode.SEEK_NEXT_ROW;
301         } else if (ret >= 1) {
302           return MatchCode.DONE;
303         }
304       }
305     } else {
306       return MatchCode.DONE;
307     }
308 
309     // optimize case.
310     if (this.stickyNextRow)
311       return MatchCode.SEEK_NEXT_ROW;
312 
313     if (this.columns.done()) {
314       stickyNextRow = true;
315       return MatchCode.SEEK_NEXT_ROW;
316     }
317 
318     int qualifierOffset = cell.getQualifierOffset();
319     int qualifierLength = cell.getQualifierLength();
320 
321     long timestamp = cell.getTimestamp();
322     // check for early out based on timestamp alone
323     if (columns.isDone(timestamp)) {
324       return columns.getNextRowOrNextColumn(cell.getQualifierArray(), qualifierOffset,
325           qualifierLength);
326     }
327     // check if the cell is expired by cell TTL
328     if (HStore.isCellTTLExpired(cell, this.oldestUnexpiredTS, this.now)) {
329       return MatchCode.SKIP;
330     }    
331 
332     /*
333      * The delete logic is pretty complicated now.
334      * This is corroborated by the following:
335      * 1. The store might be instructed to keep deleted rows around.
336      * 2. A scan can optionally see past a delete marker now.
337      * 3. If deleted rows are kept, we have to find out when we can
338      *    remove the delete markers.
339      * 4. Family delete markers are always first (regardless of their TS)
340      * 5. Delete markers should not be counted as version
341      * 6. Delete markers affect puts of the *same* TS
342      * 7. Delete marker need to be version counted together with puts
343      *    they affect
344      */
345     byte typeByte = cell.getTypeByte();
346     long mvccVersion = cell.getMvccVersion();
347     if (CellUtil.isDelete(cell)) {
348       if (keepDeletedCells == KeepDeletedCells.FALSE
349           || (keepDeletedCells == KeepDeletedCells.TTL && timestamp < ttl)) {
350         // first ignore delete markers if the scanner can do so, and the
351         // range does not include the marker
352         //
353         // during flushes and compactions also ignore delete markers newer
354         // than the readpoint of any open scanner, this prevents deleted
355         // rows that could still be seen by a scanner from being collected
356         boolean includeDeleteMarker = seePastDeleteMarkers ?
357             tr.withinTimeRange(timestamp) :
358             tr.withinOrAfterTimeRange(timestamp);
359         if (includeDeleteMarker
360             && mvccVersion <= maxReadPointToTrackVersions) {
361           this.deletes.add(cell);
362         }
363         // Can't early out now, because DelFam come before any other keys
364       }
365      
366       if ((!isUserScan)
367           && timeToPurgeDeletes > 0
368           && (EnvironmentEdgeManager.currentTime() - timestamp) 
369             <= timeToPurgeDeletes) {
370         return MatchCode.INCLUDE;
371       } else if (retainDeletesInOutput || mvccVersion > maxReadPointToTrackVersions) {
372         // always include or it is not time yet to check whether it is OK
373         // to purge deltes or not
374         if (!isUserScan) {
375           // if this is not a user scan (compaction), we can filter this deletemarker right here
376           // otherwise (i.e. a "raw" scan) we fall through to normal version and timerange checking
377           return MatchCode.INCLUDE;
378         }
379       } else if (keepDeletedCells == KeepDeletedCells.TRUE
380           || (keepDeletedCells == KeepDeletedCells.TTL && timestamp >= ttl)) {
381         if (timestamp < earliestPutTs) {
382           // keeping delete rows, but there are no puts older than
383           // this delete in the store files.
384           return columns.getNextRowOrNextColumn(cell.getQualifierArray(),
385               qualifierOffset, qualifierLength);
386         }
387         // else: fall through and do version counting on the
388         // delete markers
389       } else {
390         return MatchCode.SKIP;
391       }
392       // note the following next else if...
393       // delete marker are not subject to other delete markers
394     } else if (!this.deletes.isEmpty()) {
395       DeleteResult deleteResult = deletes.isDeleted(cell);
396       switch (deleteResult) {
397         case FAMILY_DELETED:
398         case COLUMN_DELETED:
399           return columns.getNextRowOrNextColumn(cell.getQualifierArray(),
400               qualifierOffset, qualifierLength);
401         case VERSION_DELETED:
402         case FAMILY_VERSION_DELETED:
403           return MatchCode.SKIP;
404         case NOT_DELETED:
405           break;
406         default:
407           throw new RuntimeException("UNEXPECTED");
408         }
409     }
410 
411     int timestampComparison = tr.compare(timestamp);
412     if (timestampComparison >= 1) {
413       return MatchCode.SKIP;
414     } else if (timestampComparison <= -1) {
415       return columns.getNextRowOrNextColumn(cell.getQualifierArray(), qualifierOffset,
416           qualifierLength);
417     }
418 
419     // STEP 1: Check if the column is part of the requested columns
420     MatchCode colChecker = columns.checkColumn(cell.getQualifierArray(), 
421         qualifierOffset, qualifierLength, typeByte);
422     if (colChecker == MatchCode.INCLUDE) {
423       ReturnCode filterResponse = ReturnCode.SKIP;
424       // STEP 2: Yes, the column is part of the requested columns. Check if filter is present
425       if (filter != null) {
426         // STEP 3: Filter the key value and return if it filters out
427         filterResponse = filter.filterKeyValue(cell);
428         switch (filterResponse) {
429         case SKIP:
430           return MatchCode.SKIP;
431         case NEXT_COL:
432           return columns.getNextRowOrNextColumn(cell.getQualifierArray(), 
433               qualifierOffset, qualifierLength);
434         case NEXT_ROW:
435           stickyNextRow = true;
436           return MatchCode.SEEK_NEXT_ROW;
437         case SEEK_NEXT_USING_HINT:
438           return MatchCode.SEEK_NEXT_USING_HINT;
439         default:
440           //It means it is either include or include and seek next
441           break;
442         }
443       }
444       /*
445        * STEP 4: Reaching this step means the column is part of the requested columns and either
446        * the filter is null or the filter has returned INCLUDE or INCLUDE_AND_NEXT_COL response.
447        * Now check the number of versions needed. This method call returns SKIP, INCLUDE,
448        * INCLUDE_AND_SEEK_NEXT_ROW, INCLUDE_AND_SEEK_NEXT_COL.
449        *
450        * FilterResponse            ColumnChecker               Desired behavior
451        * INCLUDE                   SKIP                        row has already been included, SKIP.
452        * INCLUDE                   INCLUDE                     INCLUDE
453        * INCLUDE                   INCLUDE_AND_SEEK_NEXT_COL   INCLUDE_AND_SEEK_NEXT_COL
454        * INCLUDE                   INCLUDE_AND_SEEK_NEXT_ROW   INCLUDE_AND_SEEK_NEXT_ROW
455        * INCLUDE_AND_SEEK_NEXT_COL SKIP                        row has already been included, SKIP.
456        * INCLUDE_AND_SEEK_NEXT_COL INCLUDE                     INCLUDE_AND_SEEK_NEXT_COL
457        * INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_COL   INCLUDE_AND_SEEK_NEXT_COL
458        * INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW   INCLUDE_AND_SEEK_NEXT_ROW
459        *
460        * In all the above scenarios, we return the column checker return value except for
461        * FilterResponse (INCLUDE_AND_SEEK_NEXT_COL) and ColumnChecker(INCLUDE)
462        */
463       colChecker =
464           columns.checkVersions(cell.getQualifierArray(), qualifierOffset,
465               qualifierLength, timestamp, typeByte,
466             mvccVersion > maxReadPointToTrackVersions);
467       //Optimize with stickyNextRow
468       stickyNextRow = colChecker == MatchCode.INCLUDE_AND_SEEK_NEXT_ROW ? true : stickyNextRow;
469       return (filterResponse == ReturnCode.INCLUDE_AND_NEXT_COL &&
470           colChecker == MatchCode.INCLUDE) ? MatchCode.INCLUDE_AND_SEEK_NEXT_COL
471           : colChecker;
472     }
473     stickyNextRow = (colChecker == MatchCode.SEEK_NEXT_ROW) ? true
474         : stickyNextRow;
475     return colChecker;
476   }
477 
478   /** Handle partial-drop-deletes. As we match keys in order, when we have a range from which
479    * we can drop deletes, we can set retainDeletesInOutput to false for the duration of this
480    * range only, and maintain consistency. */
481   private void checkPartialDropDeleteRange(byte [] row, int offset, short length) {
482     // If partial-drop-deletes are used, initially, dropDeletesFromRow and dropDeletesToRow
483     // are both set, and the matcher is set to retain deletes. We assume ordered keys. When
484     // dropDeletesFromRow is leq current kv, we start dropping deletes and reset
485     // dropDeletesFromRow; thus the 2nd "if" starts to apply.
486     if ((dropDeletesFromRow != null)
487         && ((dropDeletesFromRow == HConstants.EMPTY_START_ROW)
488           || (Bytes.compareTo(row, offset, length,
489               dropDeletesFromRow, 0, dropDeletesFromRow.length) >= 0))) {
490       retainDeletesInOutput = false;
491       dropDeletesFromRow = null;
492     }
493     // If dropDeletesFromRow is null and dropDeletesToRow is set, we are inside the partial-
494     // drop-deletes range. When dropDeletesToRow is leq current kv, we stop dropping deletes,
495     // and reset dropDeletesToRow so that we don't do any more compares.
496     if ((dropDeletesFromRow == null)
497         && (dropDeletesToRow != null) && (dropDeletesToRow != HConstants.EMPTY_END_ROW)
498         && (Bytes.compareTo(row, offset, length,
499             dropDeletesToRow, 0, dropDeletesToRow.length) >= 0)) {
500       retainDeletesInOutput = true;
501       dropDeletesToRow = null;
502     }
503   }
504 
505   public boolean moreRowsMayExistAfter(Cell kv) {
506     if (this.isReversed) {
507       if (rowComparator.compareRows(kv.getRowArray(), kv.getRowOffset(),
508           kv.getRowLength(), stopRow, 0, stopRow.length) <= 0) {
509         return false;
510       } else {
511         return true;
512       }
513     }
514     if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) &&
515         rowComparator.compareRows(kv.getRowArray(),kv.getRowOffset(),
516             kv.getRowLength(), stopRow, 0, stopRow.length) >= 0) {
517       // KV >= STOPROW
518       // then NO there is nothing left.
519       return false;
520     } else {
521       return true;
522     }
523   }
524 
525   /**
526    * Set current row
527    * @param row
528    */
529   public void setRow(byte [] row, int offset, short length) {
530     checkPartialDropDeleteRange(row, offset, length);
531     this.row = row;
532     this.rowOffset = offset;
533     this.rowLength = length;
534     reset();
535   }
536 
537   public void reset() {
538     this.deletes.reset();
539     this.columns.reset();
540 
541     stickyNextRow = false;
542   }
543 
544   /**
545    *
546    * @return the start key
547    */
548   public Cell getStartKey() {
549     return this.startKey;
550   }
551 
552   /**
553    *
554    * @return the Filter
555    */
556   Filter getFilter() {
557     return this.filter;
558   }
559 
560   public Cell getNextKeyHint(Cell kv) throws IOException {
561     if (filter == null) {
562       return null;
563     } else {
564       return filter.getNextCellHint(kv);
565     }
566   }
567 
568   public Cell getKeyForNextColumn(Cell kv) {
569     // We aren't sure whether any DeleteFamily cells exist, so we can't skip to next column.
570     // see HBASE-18471
571     // see TestFromClientSide3#testScanAfterDeletingSpecifiedRow
572     if (kv.getQualifierLength() == 0) {
573       Cell nextKey = createNextOnRowCol(kv);
574       if (nextKey != kv) {
575         return nextKey;
576       }
577       // The cell is at the end of row/family/qualifier, so it is impossible to find any DeleteFamily cells.
578       // Let us seek to next column.
579     }
580     ColumnCount nextColumn = columns.getColumnHint();
581     if (nextColumn == null) {
582       return KeyValueUtil.createLastOnRow(
583           kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
584           kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(),
585           kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength());
586     } else {
587       return KeyValueUtil.createFirstOnRow(
588           kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
589           kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(),
590           nextColumn.getBuffer(), nextColumn.getOffset(), nextColumn.getLength());
591     }
592   }
593 
594   public Cell getKeyForNextRow(Cell kv) {
595     return KeyValueUtil.createLastOnRow(
596         kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
597         null, 0, 0,
598         null, 0, 0);
599   }
600 
601   /**
602    * @param nextIndexed the key of the next entry in the block index (if any)
603    * @param kv The Cell we're using to calculate the seek key
604    * @return result of the compare between the indexed key and the key portion of the passed cell
605    */
606   public int compareKeyForNextRow(Cell nextIndexed, Cell kv) {
607     return rowComparator.compareKey(nextIndexed,
608       kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
609       null, 0, 0,
610       null, 0, 0,
611       HConstants.OLDEST_TIMESTAMP, Type.Minimum.getCode());
612   }
613 
614   /**
615    * @param nextIndexed the key of the next entry in the block index (if any)
616    * @param kv The Cell we're using to calculate the seek key
617    * @return result of the compare between the indexed key and the key portion of the passed cell
618    */
619   public int compareKeyForNextColumn(Cell nextIndexed, Cell kv) {
620     ColumnCount nextColumn = columns.getColumnHint();
621     if (nextColumn == null) {
622       return rowComparator.compareKey(nextIndexed,
623         kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
624         kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(),
625         kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength(),
626         HConstants.OLDEST_TIMESTAMP, Type.Minimum.getCode());
627     } else {
628       return rowComparator.compareKey(nextIndexed,
629         kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
630         kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(),
631         nextColumn.getBuffer(), nextColumn.getOffset(), nextColumn.getLength(),
632         HConstants.LATEST_TIMESTAMP, Type.Maximum.getCode());
633     }
634   }
635 
636   //Used only for testing purposes
637   static MatchCode checkColumn(ColumnTracker columnTracker, byte[] bytes, int offset,
638       int length, long ttl, byte type, boolean ignoreCount) throws IOException {
639     MatchCode matchCode = columnTracker.checkColumn(bytes, offset, length, type);
640     if (matchCode == MatchCode.INCLUDE) {
641       return columnTracker.checkVersions(bytes, offset, length, ttl, type, ignoreCount);
642     }
643     return matchCode;
644   }
645 
646   /**
647    * {@link #match} return codes.  These instruct the scanner moving through
648    * memstores and StoreFiles what to do with the current KeyValue.
649    * <p>
650    * Additionally, this contains "early-out" language to tell the scanner to
651    * move on to the next File (memstore or Storefile), or to return immediately.
652    */
653   public static enum MatchCode {
654     /**
655      * Include KeyValue in the returned result
656      */
657     INCLUDE,
658 
659     /**
660      * Do not include KeyValue in the returned result
661      */
662     SKIP,
663 
664     /**
665      * Do not include, jump to next StoreFile or memstore (in time order)
666      */
667     NEXT,
668 
669     /**
670      * Do not include, return current result
671      */
672     DONE,
673 
674     /**
675      * These codes are used by the ScanQueryMatcher
676      */
677 
678     /**
679      * Done with the row, seek there.
680      */
681     SEEK_NEXT_ROW,
682     /**
683      * Done with column, seek to next.
684      */
685     SEEK_NEXT_COL,
686 
687     /**
688      * Done with scan, thanks to the row filter.
689      */
690     DONE_SCAN,
691 
692     /*
693      * Seek to next key which is given as hint.
694      */
695     SEEK_NEXT_USING_HINT,
696 
697     /**
698      * Include KeyValue and done with column, seek to next.
699      */
700     INCLUDE_AND_SEEK_NEXT_COL,
701 
702     /**
703      * Include KeyValue and done with row, seek to next.
704      */
705     INCLUDE_AND_SEEK_NEXT_ROW,
706   }
707 
708   /**
709    * @return An new cell is located following input cell. If both of type and timestamp are
710    *         minimum, the input cell will be returned directly.
711    */
712   private static Cell createNextOnRowCol(Cell cell) {
713     long ts = cell.getTimestamp();
714     byte type = cell.getTypeByte();
715     if (type != Type.Minimum.getCode()) {
716       type = KeyValue.Type.values()[KeyValue.Type.codeToType(type).ordinal() - 1].getCode();
717     } else if (ts != HConstants.OLDEST_TIMESTAMP) {
718       ts = ts - 1;
719       type = Type.Maximum.getCode();
720     } else {
721       return cell;
722     }
723     return createNextOnRowCol(cell, ts, type);
724   }
725 
726   private static Cell createNextOnRowCol(final Cell cell, final long ts, final byte type) {
727     return new Cell() {
728       @Override
729       public byte[] getRowArray() { return cell.getRowArray(); }
730 
731       @Override
732       public int getRowOffset() { return cell.getRowOffset(); }
733 
734       @Override
735       public short getRowLength() { return cell.getRowLength(); }
736 
737       @Override
738       public byte[] getFamilyArray() { return cell.getFamilyArray(); }
739 
740       @Override
741       public int getFamilyOffset() { return cell.getFamilyOffset(); }
742 
743       @Override
744       public byte getFamilyLength() { return cell.getFamilyLength(); }
745 
746       @Override
747       public byte[] getQualifierArray() { return cell.getQualifierArray(); }
748 
749       @Override
750       public int getQualifierOffset() { return cell.getQualifierOffset(); }
751 
752       @Override
753       public int getQualifierLength() { return cell.getQualifierLength(); }
754 
755       @Override
756       public long getTimestamp() { return ts; }
757 
758       @Override
759       public byte getTypeByte() {return type; }
760 
761       @Override
762       public long getMvccVersion() { return cell.getMvccVersion(); }
763 
764       @Override
765       public long getSequenceId() { return cell.getSequenceId(); }
766 
767       @Override
768       public byte[] getValueArray() { return cell.getValueArray(); }
769 
770       @Override
771       public int getValueOffset() { return cell.getValueOffset(); }
772 
773       @Override
774       public int getValueLength() { return cell.getValueLength(); }
775 
776       @Override
777       public byte[] getTagsArray() { return cell.getTagsArray(); }
778 
779       @Override
780       public int getTagsOffset() { return cell.getTagsOffset(); }
781 
782       @Override
783       public int getTagsLength() { return cell.getTagsLength(); }
784 
785       @Override
786       public byte[] getValue() { return cell.getValue(); }
787 
788       @Override
789       public byte[] getFamily() { return cell.getFamily(); }
790 
791       @Override
792       public byte[] getQualifier() { return cell.getQualifier(); }
793 
794       @Override
795       public byte[] getRow() {return cell.getRow(); }
796     };
797   }
798 }