View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  
23  import org.apache.hadoop.hbase.classification.InterfaceAudience;
24  import org.apache.hadoop.hbase.Cell;
25  import org.apache.hadoop.hbase.KeyValue;
26  import org.apache.hadoop.hbase.client.Scan;
27  
28  /**
29   * Scanner that returns the next KeyValue.
30   */
31  @InterfaceAudience.Private
32  public interface KeyValueScanner {
33    /**
34     * The byte array represents for NO_NEXT_INDEXED_KEY;
35     * The actual value is irrelevant because this is always compared by reference.
36     */
37    public static final Cell NO_NEXT_INDEXED_KEY = new KeyValue();
38  
39    /**
40     * Look at the next Cell in this scanner, but do not iterate scanner.
41     * @return the next Cell
42     */
43    Cell peek();
44  
45    /**
46     * Return the next Cell in this scanner, iterating the scanner
47     * @return the next Cell
48     */
49    Cell next() throws IOException;
50  
51    /**
52     * Seek the scanner at or after the specified KeyValue.
53     * @param key seek value
54     * @return true if scanner has values left, false if end of scanner
55     */
56    boolean seek(Cell key) throws IOException;
57  
58    /**
59     * Reseek the scanner at or after the specified KeyValue.
60     * This method is guaranteed to seek at or after the required key only if the
61     * key comes after the current position of the scanner. Should not be used
62     * to seek to a key which may come before the current position.
63     * @param key seek value (should be non-null)
64     * @return true if scanner has values left, false if end of scanner
65     */
66    boolean reseek(Cell key) throws IOException;
67  
68    /**
69     * Get the sequence id associated with this KeyValueScanner. This is required
70     * for comparing multiple files to find out which one has the latest data.
71     * The default implementation for this would be to return 0. A file having
72     * lower sequence id will be considered to be the older one.
73     */
74    long getSequenceID();
75  
76    /**
77     * Close the KeyValue scanner.
78     */
79    void close();
80  
81    /**
82     * Allows to filter out scanners (both StoreFile and memstore) that we don't
83     * want to use based on criteria such as Bloom filters and timestamp ranges.
84     * @param scan the scan that we are selecting scanners for
85     * @param store the store we are performing the scan on.
86     * @param oldestUnexpiredTS the oldest timestamp we are interested in for
87     *          this query, based on TTL
88     * @return true if the scanner should be included in the query
89     */
90    boolean shouldUseScanner(Scan scan, Store store, long oldestUnexpiredTS);
91  
92    // "Lazy scanner" optimizations
93  
94    /**
95     * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only
96     * does a seek operation after checking that it is really necessary for the
97     * row/column combination specified by the kv parameter. This function was
98     * added to avoid unnecessary disk seeks by checking row-column Bloom filters
99     * before a seek on multi-column get/scan queries, and to optimize by looking
100    * up more recent files first.
101    * @param forward do a forward-only "reseek" instead of a random-access seek
102    * @param useBloom whether to enable multi-column Bloom filter optimization
103    */
104   boolean requestSeek(Cell kv, boolean forward, boolean useBloom)
105       throws IOException;
106 
107   /**
108    * We optimize our store scanners by checking the most recent store file
109    * first, so we sometimes pretend we have done a seek but delay it until the
110    * store scanner bubbles up to the top of the key-value heap. This method is
111    * then used to ensure the top store file scanner has done a seek operation.
112    */
113   boolean realSeekDone();
114 
115   /**
116    * Does the real seek operation in case it was skipped by
117    * seekToRowCol(KeyValue, boolean) (TODO: Whats this?). Note that this function should
118    * be never called on scanners that always do real seek operations (i.e. most
119    * of the scanners). The easiest way to achieve this is to call
120    * {@link #realSeekDone()} first.
121    */
122   void enforceSeek() throws IOException;
123 
124   /**
125    * @return true if this is a file scanner. Otherwise a memory scanner is
126    *         assumed.
127    */
128   boolean isFileScanner();
129 
130   // Support for "Reversed Scanner"
131   /**
132    * Seek the scanner at or before the row of specified Cell, it firstly
133    * tries to seek the scanner at or after the specified Cell, return if
134    * peek KeyValue of scanner has the same row with specified Cell,
135    * otherwise seek the scanner at the first Cell of the row which is the
136    * previous row of specified KeyValue
137    * 
138    * @param key seek KeyValue
139    * @return true if the scanner is at the valid KeyValue, false if such
140    *         KeyValue does not exist
141    * 
142    */
143   public boolean backwardSeek(Cell key) throws IOException;
144 
145   /**
146    * Seek the scanner at the first Cell of the row which is the previous row
147    * of specified key
148    * @param key seek value
149    * @return true if the scanner at the first valid Cell of previous row,
150    *         false if not existing such Cell
151    */
152   public boolean seekToPreviousRow(Cell key) throws IOException;
153 
154   /**
155    * Seek the scanner at the first KeyValue of last row
156    * 
157    * @return true if scanner has values left, false if the underlying data is
158    *         empty
159    * @throws IOException
160    */
161   public boolean seekToLastRow() throws IOException;
162 
163   /**
164    * @return the next key in the index (the key to seek to the next block)
165    * if known, or null otherwise
166    */
167   public Cell getNextIndexedKey();
168 }