001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.Closeable; 021import java.io.IOException; 022import org.apache.hadoop.fs.Path; 023import org.apache.hadoop.hbase.Cell; 024import org.apache.hadoop.hbase.KeyValue; 025import org.apache.hadoop.hbase.client.Scan; 026import org.apache.yetus.audience.InterfaceAudience; 027 028/** 029 * Scanner that returns the next KeyValue. 030 */ 031@InterfaceAudience.Private 032// TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner 033// so this should be something else altogether, a decoration on our base CellScanner. TODO. 034// This class shows in CPs so do it all in one swell swoop. HBase-2.0.0. 035public interface KeyValueScanner extends Shipper, Closeable { 036 /** 037 * The byte array represents for NO_NEXT_INDEXED_KEY; The actual value is irrelevant because this 038 * is always compared by reference. 039 */ 040 public static final Cell NO_NEXT_INDEXED_KEY = new KeyValue(); 041 042 /** 043 * Look at the next Cell in this scanner, but do not iterate scanner. NOTICE: The returned cell 044 * has not been passed into ScanQueryMatcher. So it may not be what the user need. 045 * @return the next Cell 046 */ 047 Cell peek(); 048 049 /** 050 * Return the next Cell in this scanner, iterating the scanner 051 * @return the next Cell 052 */ 053 Cell next() throws IOException; 054 055 /** 056 * Seek the scanner at or after the specified KeyValue. 057 * @param key seek value 058 * @return true if scanner has values left, false if end of scanner 059 */ 060 boolean seek(Cell key) throws IOException; 061 062 /** 063 * Reseek the scanner at or after the specified KeyValue. This method is guaranteed to seek at or 064 * after the required key only if the key comes after the current position of the scanner. Should 065 * not be used to seek to a key which may come before the current position. 066 * @param key seek value (should be non-null) 067 * @return true if scanner has values left, false if end of scanner 068 */ 069 boolean reseek(Cell key) throws IOException; 070 071 /** 072 * Get the order of this KeyValueScanner. This is only relevant for StoreFileScanners. This is 073 * required for comparing multiple files to find out which one has the latest data. 074 * StoreFileScanners are ordered from 0 (oldest) to newest in increasing order. 075 */ 076 default long getScannerOrder() { 077 return 0; 078 } 079 080 /** 081 * Close the KeyValue scanner. 082 */ 083 @Override 084 void close(); 085 086 /** 087 * Allows to filter out scanners (both StoreFile and memstore) that we don't want to use based on 088 * criteria such as Bloom filters and timestamp ranges. 089 * @param scan the scan that we are selecting scanners for 090 * @param store the store we are performing the scan on. 091 * @param oldestUnexpiredTS the oldest timestamp we are interested in for this query, based on TTL 092 * @return true if the scanner should be included in the query 093 */ 094 boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS); 095 096 // "Lazy scanner" optimizations 097 098 /** 099 * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only does a seek operation 100 * after checking that it is really necessary for the row/column combination specified by the kv 101 * parameter. This function was added to avoid unnecessary disk seeks by checking row-column Bloom 102 * filters before a seek on multi-column get/scan queries, and to optimize by looking up more 103 * recent files first. 104 * @param forward do a forward-only "reseek" instead of a random-access seek 105 * @param useBloom whether to enable multi-column Bloom filter optimization 106 */ 107 boolean requestSeek(Cell kv, boolean forward, boolean useBloom) throws IOException; 108 109 /** 110 * We optimize our store scanners by checking the most recent store file first, so we sometimes 111 * pretend we have done a seek but delay it until the store scanner bubbles up to the top of the 112 * key-value heap. This method is then used to ensure the top store file scanner has done a seek 113 * operation. 114 */ 115 boolean realSeekDone(); 116 117 /** 118 * Does the real seek operation in case it was skipped by seekToRowCol(KeyValue, boolean) (TODO: 119 * Whats this?). Note that this function should be never called on scanners that always do real 120 * seek operations (i.e. most of the scanners). The easiest way to achieve this is to call 121 * {@link #realSeekDone()} first. 122 */ 123 void enforceSeek() throws IOException; 124 125 /** Returns true if this is a file scanner. Otherwise a memory scanner is assumed. */ 126 boolean isFileScanner(); 127 128 /** 129 * @return the file path if this is a file scanner, otherwise null. 130 * @see #isFileScanner() 131 */ 132 Path getFilePath(); 133 134 // Support for "Reversed Scanner" 135 /** 136 * Seek the scanner at or before the row of specified Cell, it firstly tries to seek the scanner 137 * at or after the specified Cell, return if peek KeyValue of scanner has the same row with 138 * specified Cell, otherwise seek the scanner at the first Cell of the row which is the previous 139 * row of specified KeyValue 140 * @param key seek KeyValue 141 * @return true if the scanner is at the valid KeyValue, false if such KeyValue does not exist 142 */ 143 public boolean backwardSeek(Cell key) throws IOException; 144 145 /** 146 * Seek the scanner at the first Cell of the row which is the previous row of specified key 147 * @param key seek value 148 * @return true if the scanner at the first valid Cell of previous row, false if not existing such 149 * Cell 150 */ 151 public boolean seekToPreviousRow(Cell key) throws IOException; 152 153 /** 154 * Seek the scanner at the first KeyValue of last row 155 * @return true if scanner has values left, false if the underlying data is empty n 156 */ 157 public boolean seekToLastRow() throws IOException; 158 159 /** 160 * @return the next key in the index, usually the first key of next block OR a key that falls 161 * between last key of current block and first key of next block.. see 162 * HFileWriterImpl#getMidpoint, or null if not known. 163 */ 164 public Cell getNextIndexedKey(); 165}