001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.regionserver; 020 021import java.io.Closeable; 022import java.io.IOException; 023 024import org.apache.yetus.audience.InterfaceAudience; 025import org.apache.hadoop.fs.Path; 026import org.apache.hadoop.hbase.Cell; 027import org.apache.hadoop.hbase.KeyValue; 028import org.apache.hadoop.hbase.client.Scan; 029 030/** 031 * Scanner that returns the next KeyValue. 032 */ 033@InterfaceAudience.Private 034// TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner 035// so this should be something else altogether, a decoration on our base CellScanner. TODO. 036// This class shows in CPs so do it all in one swell swoop. HBase-2.0.0. 037public interface KeyValueScanner extends Shipper, Closeable { 038 /** 039 * The byte array represents for NO_NEXT_INDEXED_KEY; 040 * The actual value is irrelevant because this is always compared by reference. 041 */ 042 public static final Cell NO_NEXT_INDEXED_KEY = new KeyValue(); 043 044 /** 045 * Look at the next Cell in this scanner, but do not iterate scanner. 046 * NOTICE: The returned cell has not been passed into ScanQueryMatcher. So it may not be what the 047 * user need. 048 * @return the next Cell 049 */ 050 Cell peek(); 051 052 /** 053 * Return the next Cell in this scanner, iterating the scanner 054 * @return the next Cell 055 */ 056 Cell next() throws IOException; 057 058 /** 059 * Seek the scanner at or after the specified KeyValue. 060 * @param key seek value 061 * @return true if scanner has values left, false if end of scanner 062 */ 063 boolean seek(Cell key) throws IOException; 064 065 /** 066 * Reseek the scanner at or after the specified KeyValue. 067 * This method is guaranteed to seek at or after the required key only if the 068 * key comes after the current position of the scanner. Should not be used 069 * to seek to a key which may come before the current position. 070 * @param key seek value (should be non-null) 071 * @return true if scanner has values left, false if end of scanner 072 */ 073 boolean reseek(Cell key) throws IOException; 074 075 /** 076 * Get the order of this KeyValueScanner. This is only relevant for StoreFileScanners. 077 * This is required for comparing multiple files to find out which one has the latest 078 * data. StoreFileScanners are ordered from 0 (oldest) to newest in increasing order. 079 */ 080 default long getScannerOrder(){ 081 return 0; 082 } 083 084 /** 085 * Close the KeyValue scanner. 086 */ 087 @Override 088 void close(); 089 090 /** 091 * Allows to filter out scanners (both StoreFile and memstore) that we don't 092 * want to use based on criteria such as Bloom filters and timestamp ranges. 093 * @param scan the scan that we are selecting scanners for 094 * @param store the store we are performing the scan on. 095 * @param oldestUnexpiredTS the oldest timestamp we are interested in for 096 * this query, based on TTL 097 * @return true if the scanner should be included in the query 098 */ 099 boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS); 100 101 // "Lazy scanner" optimizations 102 103 /** 104 * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only 105 * does a seek operation after checking that it is really necessary for the 106 * row/column combination specified by the kv parameter. This function was 107 * added to avoid unnecessary disk seeks by checking row-column Bloom filters 108 * before a seek on multi-column get/scan queries, and to optimize by looking 109 * up more recent files first. 110 * @param forward do a forward-only "reseek" instead of a random-access seek 111 * @param useBloom whether to enable multi-column Bloom filter optimization 112 */ 113 boolean requestSeek(Cell kv, boolean forward, boolean useBloom) 114 throws IOException; 115 116 /** 117 * We optimize our store scanners by checking the most recent store file 118 * first, so we sometimes pretend we have done a seek but delay it until the 119 * store scanner bubbles up to the top of the key-value heap. This method is 120 * then used to ensure the top store file scanner has done a seek operation. 121 */ 122 boolean realSeekDone(); 123 124 /** 125 * Does the real seek operation in case it was skipped by 126 * seekToRowCol(KeyValue, boolean) (TODO: Whats this?). Note that this function should 127 * be never called on scanners that always do real seek operations (i.e. most 128 * of the scanners). The easiest way to achieve this is to call 129 * {@link #realSeekDone()} first. 130 */ 131 void enforceSeek() throws IOException; 132 133 /** 134 * @return true if this is a file scanner. Otherwise a memory scanner is 135 * assumed. 136 */ 137 boolean isFileScanner(); 138 139 /** 140 * @return the file path if this is a file scanner, otherwise null. 141 * @see #isFileScanner() 142 */ 143 Path getFilePath(); 144 145 // Support for "Reversed Scanner" 146 /** 147 * Seek the scanner at or before the row of specified Cell, it firstly 148 * tries to seek the scanner at or after the specified Cell, return if 149 * peek KeyValue of scanner has the same row with specified Cell, 150 * otherwise seek the scanner at the first Cell of the row which is the 151 * previous row of specified KeyValue 152 * 153 * @param key seek KeyValue 154 * @return true if the scanner is at the valid KeyValue, false if such 155 * KeyValue does not exist 156 * 157 */ 158 public boolean backwardSeek(Cell key) throws IOException; 159 160 /** 161 * Seek the scanner at the first Cell of the row which is the previous row 162 * of specified key 163 * @param key seek value 164 * @return true if the scanner at the first valid Cell of previous row, 165 * false if not existing such Cell 166 */ 167 public boolean seekToPreviousRow(Cell key) throws IOException; 168 169 /** 170 * Seek the scanner at the first KeyValue of last row 171 * 172 * @return true if scanner has values left, false if the underlying data is 173 * empty 174 * @throws IOException 175 */ 176 public boolean seekToLastRow() throws IOException; 177 178 /** 179 * @return the next key in the index, usually the first key of next block OR a key that falls 180 * between last key of current block and first key of next block.. 181 * see HFileWriterImpl#getMidpoint, or null if not known. 182 */ 183 public Cell getNextIndexedKey(); 184}