001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.regionserver;
020
021import java.io.Closeable;
022import java.io.IOException;
023
024import org.apache.yetus.audience.InterfaceAudience;
025import org.apache.hadoop.fs.Path;
026import org.apache.hadoop.hbase.Cell;
027import org.apache.hadoop.hbase.KeyValue;
028import org.apache.hadoop.hbase.client.Scan;
029
030/**
031 * Scanner that returns the next KeyValue.
032 */
033@InterfaceAudience.Private
034// TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner
035// so this should be something else altogether, a decoration on our base CellScanner. TODO.
036// This class shows in CPs so do it all in one swell swoop. HBase-2.0.0.
037public interface KeyValueScanner extends Shipper, Closeable {
038  /**
039   * The byte array represents for NO_NEXT_INDEXED_KEY;
040   * The actual value is irrelevant because this is always compared by reference.
041   */
042  public static final Cell NO_NEXT_INDEXED_KEY = new KeyValue();
043
044  /**
045   * Look at the next Cell in this scanner, but do not iterate scanner.
046   * NOTICE: The returned cell has not been passed into ScanQueryMatcher. So it may not be what the
047   * user need.
048   * @return the next Cell
049   */
050  Cell peek();
051
052  /**
053   * Return the next Cell in this scanner, iterating the scanner
054   * @return the next Cell
055   */
056  Cell next() throws IOException;
057
058  /**
059   * Seek the scanner at or after the specified KeyValue.
060   * @param key seek value
061   * @return true if scanner has values left, false if end of scanner
062   */
063  boolean seek(Cell key) throws IOException;
064
065  /**
066   * Reseek the scanner at or after the specified KeyValue.
067   * This method is guaranteed to seek at or after the required key only if the
068   * key comes after the current position of the scanner. Should not be used
069   * to seek to a key which may come before the current position.
070   * @param key seek value (should be non-null)
071   * @return true if scanner has values left, false if end of scanner
072   */
073  boolean reseek(Cell key) throws IOException;
074
075  /**
076   * Get the order of this KeyValueScanner. This is only relevant for StoreFileScanners and
077   * MemStoreScanners (other scanners simply return 0). This is required for comparing multiple
078   * files to find out which one has the latest data. StoreFileScanners are ordered from 0
079   * (oldest) to newest in increasing order. MemStoreScanner gets LONG.max since it always
080   * contains freshest data.
081   */
082  long getScannerOrder();
083
084  /**
085   * Close the KeyValue scanner.
086   */
087  @Override
088  void close();
089
090  /**
091   * Allows to filter out scanners (both StoreFile and memstore) that we don't
092   * want to use based on criteria such as Bloom filters and timestamp ranges.
093   * @param scan the scan that we are selecting scanners for
094   * @param store the store we are performing the scan on.
095   * @param oldestUnexpiredTS the oldest timestamp we are interested in for
096   *          this query, based on TTL
097   * @return true if the scanner should be included in the query
098   */
099  boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS);
100
101  // "Lazy scanner" optimizations
102
103  /**
104   * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only
105   * does a seek operation after checking that it is really necessary for the
106   * row/column combination specified by the kv parameter. This function was
107   * added to avoid unnecessary disk seeks by checking row-column Bloom filters
108   * before a seek on multi-column get/scan queries, and to optimize by looking
109   * up more recent files first.
110   * @param forward do a forward-only "reseek" instead of a random-access seek
111   * @param useBloom whether to enable multi-column Bloom filter optimization
112   */
113  boolean requestSeek(Cell kv, boolean forward, boolean useBloom)
114      throws IOException;
115
116  /**
117   * We optimize our store scanners by checking the most recent store file
118   * first, so we sometimes pretend we have done a seek but delay it until the
119   * store scanner bubbles up to the top of the key-value heap. This method is
120   * then used to ensure the top store file scanner has done a seek operation.
121   */
122  boolean realSeekDone();
123
124  /**
125   * Does the real seek operation in case it was skipped by
126   * seekToRowCol(KeyValue, boolean) (TODO: Whats this?). Note that this function should
127   * be never called on scanners that always do real seek operations (i.e. most
128   * of the scanners). The easiest way to achieve this is to call
129   * {@link #realSeekDone()} first.
130   */
131  void enforceSeek() throws IOException;
132
133  /**
134   * @return true if this is a file scanner. Otherwise a memory scanner is
135   *         assumed.
136   */
137  boolean isFileScanner();
138
139  /**
140   * @return the file path if this is a file scanner, otherwise null.
141   * @see #isFileScanner()
142   */
143  Path getFilePath();
144
145  // Support for "Reversed Scanner"
146  /**
147   * Seek the scanner at or before the row of specified Cell, it firstly
148   * tries to seek the scanner at or after the specified Cell, return if
149   * peek KeyValue of scanner has the same row with specified Cell,
150   * otherwise seek the scanner at the first Cell of the row which is the
151   * previous row of specified KeyValue
152   *
153   * @param key seek KeyValue
154   * @return true if the scanner is at the valid KeyValue, false if such
155   *         KeyValue does not exist
156   *
157   */
158  public boolean backwardSeek(Cell key) throws IOException;
159
160  /**
161   * Seek the scanner at the first Cell of the row which is the previous row
162   * of specified key
163   * @param key seek value
164   * @return true if the scanner at the first valid Cell of previous row,
165   *         false if not existing such Cell
166   */
167  public boolean seekToPreviousRow(Cell key) throws IOException;
168
169  /**
170   * Seek the scanner at the first KeyValue of last row
171   *
172   * @return true if scanner has values left, false if the underlying data is
173   *         empty
174   * @throws IOException
175   */
176  public boolean seekToLastRow() throws IOException;
177
178  /**
179   * @return the next key in the index, usually the first key of next block OR a key that falls
180   * between last key of current block and first key of next block..
181   * see HFileWriterImpl#getMidpoint, or null if not known.
182   */
183  public Cell getNextIndexedKey();
184}