001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.Closeable; 021import java.io.IOException; 022import java.util.Set; 023import java.util.function.IntConsumer; 024import org.apache.hadoop.fs.Path; 025import org.apache.hadoop.hbase.ExtendedCell; 026import org.apache.hadoop.hbase.KeyValue; 027import org.apache.hadoop.hbase.client.Scan; 028import org.apache.yetus.audience.InterfaceAudience; 029 030/** 031 * Scanner that returns the next KeyValue. 032 */ 033@InterfaceAudience.Private 034// TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner 035// so this should be something else altogether, a decoration on our base CellScanner. TODO. 036// This class shows in CPs so do it all in one swell swoop. HBase-2.0.0. 037public interface KeyValueScanner extends Shipper, Closeable { 038 /** 039 * The byte array represents for NO_NEXT_INDEXED_KEY; The actual value is irrelevant because this 040 * is always compared by reference. 041 */ 042 public static final ExtendedCell NO_NEXT_INDEXED_KEY = new KeyValue(); 043 044 /** 045 * Look at the next Cell in this scanner, but do not iterate scanner. NOTICE: The returned cell 046 * has not been passed into ScanQueryMatcher. So it may not be what the user need. 047 * @return the next Cell 048 */ 049 ExtendedCell peek(); 050 051 /** 052 * Return the next Cell in this scanner, iterating the scanner 053 * @return the next Cell 054 */ 055 ExtendedCell next() throws IOException; 056 057 /** 058 * Seek the scanner at or after the specified KeyValue. 059 * @param key seek value 060 * @return true if scanner has values left, false if end of scanner 061 */ 062 boolean seek(ExtendedCell key) throws IOException; 063 064 /** 065 * Reseek the scanner at or after the specified KeyValue. This method is guaranteed to seek at or 066 * after the required key only if the key comes after the current position of the scanner. Should 067 * not be used to seek to a key which may come before the current position. 068 * @param key seek value (should be non-null) 069 * @return true if scanner has values left, false if end of scanner 070 */ 071 boolean reseek(ExtendedCell key) throws IOException; 072 073 /** 074 * Get the order of this KeyValueScanner. This is only relevant for StoreFileScanners. This is 075 * required for comparing multiple files to find out which one has the latest data. 076 * StoreFileScanners are ordered from 0 (oldest) to newest in increasing order. 077 */ 078 default long getScannerOrder() { 079 return 0; 080 } 081 082 /** 083 * Close the KeyValue scanner. 084 */ 085 @Override 086 void close(); 087 088 /** 089 * Allows to filter out scanners (both StoreFile and memstore) that we don't want to use based on 090 * criteria such as Bloom filters and timestamp ranges. 091 * @param scan the scan that we are selecting scanners for 092 * @param store the store we are performing the scan on. 093 * @param oldestUnexpiredTS the oldest timestamp we are interested in for this query, based on TTL 094 * @return true if the scanner should be included in the query 095 */ 096 boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS); 097 098 // "Lazy scanner" optimizations 099 100 /** 101 * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only does a seek operation 102 * after checking that it is really necessary for the row/column combination specified by the kv 103 * parameter. This function was added to avoid unnecessary disk seeks by checking row-column Bloom 104 * filters before a seek on multi-column get/scan queries, and to optimize by looking up more 105 * recent files first. 106 * @param forward do a forward-only "reseek" instead of a random-access seek 107 * @param useBloom whether to enable multi-column Bloom filter optimization 108 */ 109 boolean requestSeek(ExtendedCell kv, boolean forward, boolean useBloom) throws IOException; 110 111 /** 112 * We optimize our store scanners by checking the most recent store file first, so we sometimes 113 * pretend we have done a seek but delay it until the store scanner bubbles up to the top of the 114 * key-value heap. This method is then used to ensure the top store file scanner has done a seek 115 * operation. 116 */ 117 boolean realSeekDone(); 118 119 /** 120 * Does the real seek operation in case it was skipped by seekToRowCol(KeyValue, boolean) (TODO: 121 * Whats this?). Note that this function should be never called on scanners that always do real 122 * seek operations (i.e. most of the scanners). The easiest way to achieve this is to call 123 * {@link #realSeekDone()} first. 124 */ 125 void enforceSeek() throws IOException; 126 127 /** Returns true if this is a file scanner. Otherwise a memory scanner is assumed. */ 128 boolean isFileScanner(); 129 130 /** 131 * Record the size of the current block in bytes, passing as an argument to the blockSizeConsumer. 132 * Implementations should ensure that blockSizeConsumer is only called once per block. 133 * @param blockSizeConsumer to be called with block size in bytes, once per block. 134 */ 135 void recordBlockSize(IntConsumer blockSizeConsumer); 136 137 /** 138 * @return the file path if this is a file scanner, otherwise null. 139 * @see #isFileScanner() 140 */ 141 Path getFilePath(); 142 143 /** 144 * Returns the set of store file paths that were successfully read by this scanner. Typically 145 * populated only after the scanner is closed. 146 */ 147 Set<Path> getFilesRead(); 148 149 // Support for "Reversed Scanner" 150 /** 151 * Seek the scanner at or before the row of specified Cell, it firstly tries to seek the scanner 152 * at or after the specified Cell, return if peek KeyValue of scanner has the same row with 153 * specified Cell, otherwise seek the scanner at the first Cell of the row which is the previous 154 * row of specified KeyValue 155 * @param key seek KeyValue 156 * @return true if the scanner is at the valid KeyValue, false if such KeyValue does not exist 157 */ 158 public boolean backwardSeek(ExtendedCell key) throws IOException; 159 160 /** 161 * Seek the scanner at the first Cell of the row which is the previous row of specified key 162 * @param key seek value 163 * @return true if the scanner at the first valid Cell of previous row, false if not existing such 164 * Cell 165 */ 166 public boolean seekToPreviousRow(ExtendedCell key) throws IOException; 167 168 /** 169 * Seek the scanner at the first KeyValue of last row 170 * @return true if scanner has values left, false if the underlying data is empty 171 */ 172 public boolean seekToLastRow() throws IOException; 173 174 /** 175 * @return the next key in the index, usually the first key of next block OR a key that falls 176 * between last key of current block and first key of next block.. see 177 * HFileWriterImpl#getMidpoint, or null if not known. 178 */ 179 public ExtendedCell getNextIndexedKey(); 180}