001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.Closeable;
021import java.io.IOException;
022import java.util.function.IntConsumer;
023import org.apache.hadoop.fs.Path;
024import org.apache.hadoop.hbase.Cell;
025import org.apache.hadoop.hbase.KeyValue;
026import org.apache.hadoop.hbase.client.Scan;
027import org.apache.yetus.audience.InterfaceAudience;
028
029/**
030 * Scanner that returns the next KeyValue.
031 */
032@InterfaceAudience.Private
033// TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner
034// so this should be something else altogether, a decoration on our base CellScanner. TODO.
035// This class shows in CPs so do it all in one swell swoop. HBase-2.0.0.
036public interface KeyValueScanner extends Shipper, Closeable {
037  /**
038   * The byte array represents for NO_NEXT_INDEXED_KEY; The actual value is irrelevant because this
039   * is always compared by reference.
040   */
041  public static final Cell NO_NEXT_INDEXED_KEY = new KeyValue();
042
043  /**
044   * Look at the next Cell in this scanner, but do not iterate scanner. NOTICE: The returned cell
045   * has not been passed into ScanQueryMatcher. So it may not be what the user need.
046   * @return the next Cell
047   */
048  Cell peek();
049
050  /**
051   * Return the next Cell in this scanner, iterating the scanner
052   * @return the next Cell
053   */
054  Cell next() throws IOException;
055
056  /**
057   * Seek the scanner at or after the specified KeyValue.
058   * @param key seek value
059   * @return true if scanner has values left, false if end of scanner
060   */
061  boolean seek(Cell key) throws IOException;
062
063  /**
064   * Reseek the scanner at or after the specified KeyValue. This method is guaranteed to seek at or
065   * after the required key only if the key comes after the current position of the scanner. Should
066   * not be used to seek to a key which may come before the current position.
067   * @param key seek value (should be non-null)
068   * @return true if scanner has values left, false if end of scanner
069   */
070  boolean reseek(Cell key) throws IOException;
071
072  /**
073   * Get the order of this KeyValueScanner. This is only relevant for StoreFileScanners. This is
074   * required for comparing multiple files to find out which one has the latest data.
075   * StoreFileScanners are ordered from 0 (oldest) to newest in increasing order.
076   */
077  default long getScannerOrder() {
078    return 0;
079  }
080
081  /**
082   * Close the KeyValue scanner.
083   */
084  @Override
085  void close();
086
087  /**
088   * Allows to filter out scanners (both StoreFile and memstore) that we don't want to use based on
089   * criteria such as Bloom filters and timestamp ranges.
090   * @param scan              the scan that we are selecting scanners for
091   * @param store             the store we are performing the scan on.
092   * @param oldestUnexpiredTS the oldest timestamp we are interested in for this query, based on TTL
093   * @return true if the scanner should be included in the query
094   */
095  boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS);
096
097  // "Lazy scanner" optimizations
098
099  /**
100   * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only does a seek operation
101   * after checking that it is really necessary for the row/column combination specified by the kv
102   * parameter. This function was added to avoid unnecessary disk seeks by checking row-column Bloom
103   * filters before a seek on multi-column get/scan queries, and to optimize by looking up more
104   * recent files first.
105   * @param forward  do a forward-only "reseek" instead of a random-access seek
106   * @param useBloom whether to enable multi-column Bloom filter optimization
107   */
108  boolean requestSeek(Cell kv, boolean forward, boolean useBloom) throws IOException;
109
110  /**
111   * We optimize our store scanners by checking the most recent store file first, so we sometimes
112   * pretend we have done a seek but delay it until the store scanner bubbles up to the top of the
113   * key-value heap. This method is then used to ensure the top store file scanner has done a seek
114   * operation.
115   */
116  boolean realSeekDone();
117
118  /**
119   * Does the real seek operation in case it was skipped by seekToRowCol(KeyValue, boolean) (TODO:
120   * Whats this?). Note that this function should be never called on scanners that always do real
121   * seek operations (i.e. most of the scanners). The easiest way to achieve this is to call
122   * {@link #realSeekDone()} first.
123   */
124  void enforceSeek() throws IOException;
125
126  /** Returns true if this is a file scanner. Otherwise a memory scanner is assumed. */
127  boolean isFileScanner();
128
129  /**
130   * Record the size of the current block in bytes, passing as an argument to the blockSizeConsumer.
131   * Implementations should ensure that blockSizeConsumer is only called once per block.
132   * @param blockSizeConsumer to be called with block size in bytes, once per block.
133   */
134  void recordBlockSize(IntConsumer blockSizeConsumer);
135
136  /**
137   * @return the file path if this is a file scanner, otherwise null.
138   * @see #isFileScanner()
139   */
140  Path getFilePath();
141
142  // Support for "Reversed Scanner"
143  /**
144   * Seek the scanner at or before the row of specified Cell, it firstly tries to seek the scanner
145   * at or after the specified Cell, return if peek KeyValue of scanner has the same row with
146   * specified Cell, otherwise seek the scanner at the first Cell of the row which is the previous
147   * row of specified KeyValue
148   * @param key seek KeyValue
149   * @return true if the scanner is at the valid KeyValue, false if such KeyValue does not exist
150   */
151  public boolean backwardSeek(Cell key) throws IOException;
152
153  /**
154   * Seek the scanner at the first Cell of the row which is the previous row of specified key
155   * @param key seek value
156   * @return true if the scanner at the first valid Cell of previous row, false if not existing such
157   *         Cell
158   */
159  public boolean seekToPreviousRow(Cell key) throws IOException;
160
161  /**
162   * Seek the scanner at the first KeyValue of last row
163   * @return true if scanner has values left, false if the underlying data is empty
164   */
165  public boolean seekToLastRow() throws IOException;
166
167  /**
168   * @return the next key in the index, usually the first key of next block OR a key that falls
169   *         between last key of current block and first key of next block.. see
170   *         HFileWriterImpl#getMidpoint, or null if not known.
171   */
172  public Cell getNextIndexedKey();
173}