001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.Closeable;
021import java.io.IOException;
022import java.util.Set;
023import java.util.function.IntConsumer;
024import org.apache.hadoop.fs.Path;
025import org.apache.hadoop.hbase.ExtendedCell;
026import org.apache.hadoop.hbase.KeyValue;
027import org.apache.hadoop.hbase.client.Scan;
028import org.apache.yetus.audience.InterfaceAudience;
029
030/**
031 * Scanner that returns the next KeyValue.
032 */
033@InterfaceAudience.Private
034// TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner
035// so this should be something else altogether, a decoration on our base CellScanner. TODO.
036// This class shows in CPs so do it all in one swell swoop. HBase-2.0.0.
037public interface KeyValueScanner extends Shipper, Closeable {
038  /**
039   * The byte array represents for NO_NEXT_INDEXED_KEY; The actual value is irrelevant because this
040   * is always compared by reference.
041   */
042  public static final ExtendedCell NO_NEXT_INDEXED_KEY = new KeyValue();
043
044  /**
045   * Look at the next Cell in this scanner, but do not iterate scanner. NOTICE: The returned cell
046   * has not been passed into ScanQueryMatcher. So it may not be what the user need.
047   * @return the next Cell
048   */
049  ExtendedCell peek();
050
051  /**
052   * Return the next Cell in this scanner, iterating the scanner
053   * @return the next Cell
054   */
055  ExtendedCell next() throws IOException;
056
057  /**
058   * Seek the scanner at or after the specified KeyValue.
059   * @param key seek value
060   * @return true if scanner has values left, false if end of scanner
061   */
062  boolean seek(ExtendedCell key) throws IOException;
063
064  /**
065   * Reseek the scanner at or after the specified KeyValue. This method is guaranteed to seek at or
066   * after the required key only if the key comes after the current position of the scanner. Should
067   * not be used to seek to a key which may come before the current position.
068   * @param key seek value (should be non-null)
069   * @return true if scanner has values left, false if end of scanner
070   */
071  boolean reseek(ExtendedCell key) throws IOException;
072
073  /**
074   * Get the order of this KeyValueScanner. This is only relevant for StoreFileScanners. This is
075   * required for comparing multiple files to find out which one has the latest data.
076   * StoreFileScanners are ordered from 0 (oldest) to newest in increasing order.
077   */
078  default long getScannerOrder() {
079    return 0;
080  }
081
082  /**
083   * Close the KeyValue scanner.
084   */
085  @Override
086  void close();
087
088  /**
089   * Allows to filter out scanners (both StoreFile and memstore) that we don't want to use based on
090   * criteria such as Bloom filters and timestamp ranges.
091   * @param scan              the scan that we are selecting scanners for
092   * @param store             the store we are performing the scan on.
093   * @param oldestUnexpiredTS the oldest timestamp we are interested in for this query, based on TTL
094   * @return true if the scanner should be included in the query
095   */
096  boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS);
097
098  // "Lazy scanner" optimizations
099
100  /**
101   * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only does a seek operation
102   * after checking that it is really necessary for the row/column combination specified by the kv
103   * parameter. This function was added to avoid unnecessary disk seeks by checking row-column Bloom
104   * filters before a seek on multi-column get/scan queries, and to optimize by looking up more
105   * recent files first.
106   * @param forward  do a forward-only "reseek" instead of a random-access seek
107   * @param useBloom whether to enable multi-column Bloom filter optimization
108   */
109  boolean requestSeek(ExtendedCell kv, boolean forward, boolean useBloom) throws IOException;
110
111  /**
112   * We optimize our store scanners by checking the most recent store file first, so we sometimes
113   * pretend we have done a seek but delay it until the store scanner bubbles up to the top of the
114   * key-value heap. This method is then used to ensure the top store file scanner has done a seek
115   * operation.
116   */
117  boolean realSeekDone();
118
119  /**
120   * Does the real seek operation in case it was skipped by seekToRowCol(KeyValue, boolean) (TODO:
121   * Whats this?). Note that this function should be never called on scanners that always do real
122   * seek operations (i.e. most of the scanners). The easiest way to achieve this is to call
123   * {@link #realSeekDone()} first.
124   */
125  void enforceSeek() throws IOException;
126
127  /** Returns true if this is a file scanner. Otherwise a memory scanner is assumed. */
128  boolean isFileScanner();
129
130  /**
131   * Record the size of the current block in bytes, passing as an argument to the blockSizeConsumer.
132   * Implementations should ensure that blockSizeConsumer is only called once per block.
133   * @param blockSizeConsumer to be called with block size in bytes, once per block.
134   */
135  void recordBlockSize(IntConsumer blockSizeConsumer);
136
137  /**
138   * @return the file path if this is a file scanner, otherwise null.
139   * @see #isFileScanner()
140   */
141  Path getFilePath();
142
143  /**
144   * Returns the set of store file paths that were successfully read by this scanner. Typically
145   * populated only after the scanner is closed.
146   */
147  Set<Path> getFilesRead();
148
149  // Support for "Reversed Scanner"
150  /**
151   * Seek the scanner at or before the row of specified Cell, it firstly tries to seek the scanner
152   * at or after the specified Cell, return if peek KeyValue of scanner has the same row with
153   * specified Cell, otherwise seek the scanner at the first Cell of the row which is the previous
154   * row of specified KeyValue
155   * @param key seek KeyValue
156   * @return true if the scanner is at the valid KeyValue, false if such KeyValue does not exist
157   */
158  public boolean backwardSeek(ExtendedCell key) throws IOException;
159
160  /**
161   * Seek the scanner at the first Cell of the row which is the previous row of specified key
162   * @param key seek value
163   * @return true if the scanner at the first valid Cell of previous row, false if not existing such
164   *         Cell
165   */
166  public boolean seekToPreviousRow(ExtendedCell key) throws IOException;
167
168  /**
169   * Seek the scanner at the first KeyValue of last row
170   * @return true if scanner has values left, false if the underlying data is empty
171   */
172  public boolean seekToLastRow() throws IOException;
173
174  /**
175   * @return the next key in the index, usually the first key of next block OR a key that falls
176   *         between last key of current block and first key of next block.. see
177   *         HFileWriterImpl#getMidpoint, or null if not known.
178   */
179  public ExtendedCell getNextIndexedKey();
180}