001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.io.hfile; 020 021import java.io.Closeable; 022import java.io.IOException; 023import java.nio.ByteBuffer; 024 025import org.apache.yetus.audience.InterfaceAudience; 026import org.apache.hadoop.hbase.regionserver.Shipper; 027import org.apache.hadoop.hbase.Cell; 028 029/** 030 * A scanner allows you to position yourself within a HFile and 031 * scan through it. It allows you to reposition yourself as well. 032 * 033 * <p>A scanner doesn't always have a key/value that it is pointing to 034 * when it is first created and before 035 * {@link #seekTo()}/{@link #seekTo(Cell)} are called. 036 * In this case, {@link #getKey()}/{@link #getValue()} returns null. At most 037 * other times, a key and value will be available. The general pattern is that 038 * you position the Scanner using the seekTo variants and then getKey and 039 * getValue. 040 */ 041@InterfaceAudience.Private 042public interface HFileScanner extends Shipper, Closeable { 043 /** 044 * SeekTo or just before the passed <code>cell</code>. Examine the return 045 * code to figure whether we found the cell or not. 046 * Consider the cell stream of all the cells in the file, 047 * <code>c[0] .. c[n]</code>, where there are n cells in the file. 048 * @param cell 049 * @return -1, if cell < c[0], no position; 050 * 0, such that c[i] = cell and scanner is left in position i; and 051 * 1, such that c[i] < cell, and scanner is left in position i. 052 * The scanner will position itself between c[i] and c[i+1] where 053 * c[i] < cell <= c[i+1]. 054 * If there is no cell c[i+1] greater than or equal to the input cell, then the 055 * scanner will position itself at the end of the file and next() will return 056 * false when it is called. 057 * @throws IOException 058 */ 059 int seekTo(Cell cell) throws IOException; 060 061 /** 062 * Reseek to or just before the passed <code>cell</code>. Similar to seekTo 063 * except that this can be called even if the scanner is not at the beginning 064 * of a file. 065 * This can be used to seek only to cells which come after the current position 066 * of the scanner. 067 * Consider the cell stream of all the cells in the file, 068 * <code>c[0] .. c[n]</code>, where there are n cellc in the file after 069 * current position of HFileScanner. 070 * The scanner will position itself between c[i] and c[i+1] where 071 * c[i] < cell <= c[i+1]. 072 * If there is no cell c[i+1] greater than or equal to the input cell, then the 073 * scanner will position itself at the end of the file and next() will return 074 * false when it is called. 075 * @param cell Cell to find (should be non-null) 076 * @return -1, if cell < c[0], no position; 077 * 0, such that c[i] = cell and scanner is left in position i; and 078 * 1, such that c[i] < cell, and scanner is left in position i. 079 * @throws IOException 080 */ 081 int reseekTo(Cell cell) throws IOException; 082 083 /** 084 * Consider the cell stream of all the cells in the file, 085 * <code>c[0] .. c[n]</code>, where there are n cells in the file. 086 * @param cell Cell to find 087 * @return false if cell <= c[0] or true with scanner in position 'i' such 088 * that: c[i] < cell. Furthermore: there may be a c[i+1], such that 089 * c[i] < cell <= c[i+1] but there may also NOT be a c[i+1], and next() will 090 * return false (EOF). 091 * @throws IOException 092 */ 093 boolean seekBefore(Cell cell) throws IOException; 094 095 /** 096 * Positions this scanner at the start of the file. 097 * @return False if empty file; i.e. a call to next would return false and 098 * the current key and value are undefined. 099 * @throws IOException 100 */ 101 boolean seekTo() throws IOException; 102 103 /** 104 * Scans to the next entry in the file. 105 * @return Returns false if you are at the end otherwise true if more in file. 106 * @throws IOException 107 */ 108 boolean next() throws IOException; 109 110 /** 111 * Gets the current key in the form of a cell. You must call 112 * {@link #seekTo(Cell)} before this method. 113 * @return gets the current key as a Cell. 114 */ 115 Cell getKey(); 116 117 /** 118 * Gets a buffer view to the current value. You must call 119 * {@link #seekTo(Cell)} before this method. 120 * 121 * @return byte buffer for the value. The limit is set to the value size, and 122 * the position is 0, the start of the buffer view. 123 */ 124 ByteBuffer getValue(); 125 126 /** 127 * @return Instance of {@link org.apache.hadoop.hbase.Cell}. 128 */ 129 Cell getCell(); 130 131 /** 132 * Convenience method to get a copy of the key as a string - interpreting the 133 * bytes as UTF8. You must call {@link #seekTo(Cell)} before this method. 134 * @return key as a string 135 * @deprecated Since hbase-2.0.0 136 */ 137 @Deprecated 138 String getKeyString(); 139 140 /** 141 * Convenience method to get a copy of the value as a string - interpreting 142 * the bytes as UTF8. You must call {@link #seekTo(Cell)} before this method. 143 * @return value as a string 144 * @deprecated Since hbase-2.0.0 145 */ 146 @Deprecated 147 String getValueString(); 148 149 /** 150 * @return Reader that underlies this Scanner instance. 151 */ 152 HFile.Reader getReader(); 153 154 /** 155 * @return True is scanner has had one of the seek calls invoked; i.e. 156 * {@link #seekBefore(Cell)} or {@link #seekTo()} or {@link #seekTo(Cell)}. 157 * Otherwise returns false. 158 */ 159 boolean isSeeked(); 160 161 /** 162 * @return the next key in the index (the key to seek to the next block) 163 */ 164 Cell getNextIndexedKey(); 165 166 /** 167 * Close this HFile scanner and do necessary cleanup. 168 */ 169 @Override 170 void close(); 171}