001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.Closeable;
021import java.io.IOException;
022import org.apache.hadoop.fs.Path;
023import org.apache.hadoop.hbase.Cell;
024import org.apache.hadoop.hbase.KeyValue;
025import org.apache.hadoop.hbase.client.Scan;
026import org.apache.yetus.audience.InterfaceAudience;
027
028/**
029 * Scanner that returns the next KeyValue.
030 */
031@InterfaceAudience.Private
032// TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner
033// so this should be something else altogether, a decoration on our base CellScanner. TODO.
034// This class shows in CPs so do it all in one swell swoop. HBase-2.0.0.
035public interface KeyValueScanner extends Shipper, Closeable {
036  /**
037   * The byte array represents for NO_NEXT_INDEXED_KEY; The actual value is irrelevant because this
038   * is always compared by reference.
039   */
040  public static final Cell NO_NEXT_INDEXED_KEY = new KeyValue();
041
042  /**
043   * Look at the next Cell in this scanner, but do not iterate scanner. NOTICE: The returned cell
044   * has not been passed into ScanQueryMatcher. So it may not be what the user need.
045   * @return the next Cell
046   */
047  Cell peek();
048
049  /**
050   * Return the next Cell in this scanner, iterating the scanner
051   * @return the next Cell
052   */
053  Cell next() throws IOException;
054
055  /**
056   * Seek the scanner at or after the specified KeyValue.
057   * @param key seek value
058   * @return true if scanner has values left, false if end of scanner
059   */
060  boolean seek(Cell key) throws IOException;
061
062  /**
063   * Reseek the scanner at or after the specified KeyValue. This method is guaranteed to seek at or
064   * after the required key only if the key comes after the current position of the scanner. Should
065   * not be used to seek to a key which may come before the current position.
066   * @param key seek value (should be non-null)
067   * @return true if scanner has values left, false if end of scanner
068   */
069  boolean reseek(Cell key) throws IOException;
070
071  /**
072   * Get the order of this KeyValueScanner. This is only relevant for StoreFileScanners. This is
073   * required for comparing multiple files to find out which one has the latest data.
074   * StoreFileScanners are ordered from 0 (oldest) to newest in increasing order.
075   */
076  default long getScannerOrder() {
077    return 0;
078  }
079
080  /**
081   * Close the KeyValue scanner.
082   */
083  @Override
084  void close();
085
086  /**
087   * Allows to filter out scanners (both StoreFile and memstore) that we don't want to use based on
088   * criteria such as Bloom filters and timestamp ranges.
089   * @param scan              the scan that we are selecting scanners for
090   * @param store             the store we are performing the scan on.
091   * @param oldestUnexpiredTS the oldest timestamp we are interested in for this query, based on TTL
092   * @return true if the scanner should be included in the query
093   */
094  boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS);
095
096  // "Lazy scanner" optimizations
097
098  /**
099   * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only does a seek operation
100   * after checking that it is really necessary for the row/column combination specified by the kv
101   * parameter. This function was added to avoid unnecessary disk seeks by checking row-column Bloom
102   * filters before a seek on multi-column get/scan queries, and to optimize by looking up more
103   * recent files first.
104   * @param forward  do a forward-only "reseek" instead of a random-access seek
105   * @param useBloom whether to enable multi-column Bloom filter optimization
106   */
107  boolean requestSeek(Cell kv, boolean forward, boolean useBloom) throws IOException;
108
109  /**
110   * We optimize our store scanners by checking the most recent store file first, so we sometimes
111   * pretend we have done a seek but delay it until the store scanner bubbles up to the top of the
112   * key-value heap. This method is then used to ensure the top store file scanner has done a seek
113   * operation.
114   */
115  boolean realSeekDone();
116
117  /**
118   * Does the real seek operation in case it was skipped by seekToRowCol(KeyValue, boolean) (TODO:
119   * Whats this?). Note that this function should be never called on scanners that always do real
120   * seek operations (i.e. most of the scanners). The easiest way to achieve this is to call
121   * {@link #realSeekDone()} first.
122   */
123  void enforceSeek() throws IOException;
124
125  /** Returns true if this is a file scanner. Otherwise a memory scanner is assumed. */
126  boolean isFileScanner();
127
128  /**
129   * @return the file path if this is a file scanner, otherwise null.
130   * @see #isFileScanner()
131   */
132  Path getFilePath();
133
134  // Support for "Reversed Scanner"
135  /**
136   * Seek the scanner at or before the row of specified Cell, it firstly tries to seek the scanner
137   * at or after the specified Cell, return if peek KeyValue of scanner has the same row with
138   * specified Cell, otherwise seek the scanner at the first Cell of the row which is the previous
139   * row of specified KeyValue
140   * @param key seek KeyValue
141   * @return true if the scanner is at the valid KeyValue, false if such KeyValue does not exist
142   */
143  public boolean backwardSeek(Cell key) throws IOException;
144
145  /**
146   * Seek the scanner at the first Cell of the row which is the previous row of specified key
147   * @param key seek value
148   * @return true if the scanner at the first valid Cell of previous row, false if not existing such
149   *         Cell
150   */
151  public boolean seekToPreviousRow(Cell key) throws IOException;
152
153  /**
154   * Seek the scanner at the first KeyValue of last row
155   * @return true if scanner has values left, false if the underlying data is empty n
156   */
157  public boolean seekToLastRow() throws IOException;
158
159  /**
160   * @return the next key in the index, usually the first key of next block OR a key that falls
161   *         between last key of current block and first key of next block.. see
162   *         HFileWriterImpl#getMidpoint, or null if not known.
163   */
164  public Cell getNextIndexedKey();
165}