001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import java.io.Closeable; 021import java.io.IOException; 022import java.nio.ByteBuffer; 023import java.util.function.IntConsumer; 024import org.apache.hadoop.hbase.ExtendedCell; 025import org.apache.hadoop.hbase.regionserver.Shipper; 026import org.apache.yetus.audience.InterfaceAudience; 027 028/** 029 * A scanner allows you to position yourself within a HFile and scan through it. It allows you to 030 * reposition yourself as well. 031 * <p> 032 * A scanner doesn't always have a key/value that it is pointing to when it is first created and 033 * before {@link #seekTo()}/{@link #seekTo(ExtendedCell)} are called. In this case, 034 * {@link #getKey()}/{@link #getValue()} returns null. At most other times, a key and value will be 035 * available. The general pattern is that you position the Scanner using the seekTo variants and 036 * then getKey and getValue. 037 */ 038@InterfaceAudience.Private 039public interface HFileScanner extends Shipper, Closeable { 040 /** 041 * SeekTo or just before the passed <code>cell</code>. Examine the return code to figure whether 042 * we found the cell or not. Consider the cell stream of all the cells in the file, 043 * <code>c[0] .. c[n]</code>, where there are n cells in the file. 044 * @return -1, if cell < c[0], no position; 0, such that c[i] = cell and scanner is left in 045 * position i; and 1, such that c[i] < cell, and scanner is left in position i. The 046 * scanner will position itself between c[i] and c[i+1] where c[i] < cell <= c[i+1]. 047 * If there is no cell c[i+1] greater than or equal to the input cell, then the scanner 048 * will position itself at the end of the file and next() will return false when it is 049 * called. 050 */ 051 int seekTo(ExtendedCell cell) throws IOException; 052 053 /** 054 * Reseek to or just before the passed <code>cell</code>. Similar to seekTo except that this can 055 * be called even if the scanner is not at the beginning of a file. This can be used to seek only 056 * to cells which come after the current position of the scanner. Consider the cell stream of all 057 * the cells in the file, <code>c[0] .. c[n]</code>, where there are n cellc in the file after 058 * current position of HFileScanner. The scanner will position itself between c[i] and c[i+1] 059 * where c[i] < cell <= c[i+1]. If there is no cell c[i+1] greater than or equal to the 060 * input cell, then the scanner will position itself at the end of the file and next() will return 061 * false when it is called. 062 * @param cell Cell to find (should be non-null) 063 * @return -1, if cell < c[0], no position; 0, such that c[i] = cell and scanner is left in 064 * position i; and 1, such that c[i] < cell, and scanner is left in position i. 065 */ 066 int reseekTo(ExtendedCell cell) throws IOException; 067 068 /** 069 * Consider the cell stream of all the cells in the file, <code>c[0] .. c[n]</code>, where there 070 * are n cells in the file. 071 * @param cell Cell to find 072 * @return false if cell <= c[0] or true with scanner in position 'i' such that: c[i] < 073 * cell. Furthermore: there may be a c[i+1], such that c[i] < cell <= c[i+1] but 074 * there may also NOT be a c[i+1], and next() will return false (EOF). 075 */ 076 boolean seekBefore(ExtendedCell cell) throws IOException; 077 078 /** 079 * Positions this scanner at the start of the file. 080 * @return False if empty file; i.e. a call to next would return false and the current key and 081 * value are undefined. 082 */ 083 boolean seekTo() throws IOException; 084 085 /** 086 * Scans to the next entry in the file. 087 * @return Returns false if you are at the end otherwise true if more in file. 088 */ 089 boolean next() throws IOException; 090 091 /** 092 * Gets the current key in the form of a cell. You must call {@link #seekTo(ExtendedCell)} before 093 * this method. 094 * @return gets the current key as a Cell. 095 */ 096 ExtendedCell getKey(); 097 098 /** 099 * Gets a buffer view to the current value. You must call {@link #seekTo(ExtendedCell)} before 100 * this method. 101 * @return byte buffer for the value. The limit is set to the value size, and the position is 0, 102 * the start of the buffer view. 103 */ 104 ByteBuffer getValue(); 105 106 /** Returns Instance of {@link ExtendedCell}. */ 107 ExtendedCell getCell(); 108 109 /** Returns Reader that underlies this Scanner instance. */ 110 HFile.Reader getReader(); 111 112 /** 113 * @return True is scanner has had one of the seek calls invoked; i.e. 114 * {@link #seekBefore(ExtendedCell)} or {@link #seekTo()} or 115 * {@link #seekTo(ExtendedCell)}. Otherwise returns false. 116 */ 117 boolean isSeeked(); 118 119 /** Returns the next key in the index (the key to seek to the next block) */ 120 ExtendedCell getNextIndexedKey(); 121 122 /** 123 * Close this HFile scanner and do necessary cleanup. 124 */ 125 @Override 126 void close(); 127 128 /** 129 * Record the size of the current block in bytes, passing as an argument to the blockSizeConsumer. 130 * Implementations should ensure that blockSizeConsumer is only called once per block. 131 * @param blockSizeConsumer to be called with block size in bytes, once per block. 132 */ 133 void recordBlockSize(IntConsumer blockSizeConsumer); 134}