001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import java.io.Closeable;
021import java.io.IOException;
022import java.nio.ByteBuffer;
023import java.util.function.IntConsumer;
024import org.apache.hadoop.hbase.Cell;
025import org.apache.hadoop.hbase.regionserver.Shipper;
026import org.apache.yetus.audience.InterfaceAudience;
027
028/**
029 * A scanner allows you to position yourself within a HFile and scan through it. It allows you to
030 * reposition yourself as well.
031 * <p>
032 * A scanner doesn't always have a key/value that it is pointing to when it is first created and
033 * before {@link #seekTo()}/{@link #seekTo(Cell)} are called. In this case,
034 * {@link #getKey()}/{@link #getValue()} returns null. At most other times, a key and value will be
035 * available. The general pattern is that you position the Scanner using the seekTo variants and
036 * then getKey and getValue.
037 */
038@InterfaceAudience.Private
039public interface HFileScanner extends Shipper, Closeable {
040  /**
041   * SeekTo or just before the passed <code>cell</code>. Examine the return code to figure whether
042   * we found the cell or not. Consider the cell stream of all the cells in the file,
043   * <code>c[0] .. c[n]</code>, where there are n cells in the file.
044   * @return -1, if cell &lt; c[0], no position; 0, such that c[i] = cell and scanner is left in
045   *         position i; and 1, such that c[i] &lt; cell, and scanner is left in position i. The
046   *         scanner will position itself between c[i] and c[i+1] where c[i] &lt; cell &lt;= c[i+1].
047   *         If there is no cell c[i+1] greater than or equal to the input cell, then the scanner
048   *         will position itself at the end of the file and next() will return false when it is
049   *         called.
050   */
051  int seekTo(Cell cell) throws IOException;
052
053  /**
054   * Reseek to or just before the passed <code>cell</code>. Similar to seekTo except that this can
055   * be called even if the scanner is not at the beginning of a file. This can be used to seek only
056   * to cells which come after the current position of the scanner. Consider the cell stream of all
057   * the cells in the file, <code>c[0] .. c[n]</code>, where there are n cellc in the file after
058   * current position of HFileScanner. The scanner will position itself between c[i] and c[i+1]
059   * where c[i] &lt; cell &lt;= c[i+1]. If there is no cell c[i+1] greater than or equal to the
060   * input cell, then the scanner will position itself at the end of the file and next() will return
061   * false when it is called.
062   * @param cell Cell to find (should be non-null)
063   * @return -1, if cell &lt; c[0], no position; 0, such that c[i] = cell and scanner is left in
064   *         position i; and 1, such that c[i] &lt; cell, and scanner is left in position i.
065   */
066  int reseekTo(Cell cell) throws IOException;
067
068  /**
069   * Consider the cell stream of all the cells in the file, <code>c[0] .. c[n]</code>, where there
070   * are n cells in the file.
071   * @param cell Cell to find
072   * @return false if cell &lt;= c[0] or true with scanner in position 'i' such that: c[i] &lt;
073   *         cell. Furthermore: there may be a c[i+1], such that c[i] &lt; cell &lt;= c[i+1] but
074   *         there may also NOT be a c[i+1], and next() will return false (EOF).
075   */
076  boolean seekBefore(Cell cell) throws IOException;
077
078  /**
079   * Positions this scanner at the start of the file.
080   * @return False if empty file; i.e. a call to next would return false and the current key and
081   *         value are undefined.
082   */
083  boolean seekTo() throws IOException;
084
085  /**
086   * Scans to the next entry in the file.
087   * @return Returns false if you are at the end otherwise true if more in file.
088   */
089  boolean next() throws IOException;
090
091  /**
092   * Gets the current key in the form of a cell. You must call {@link #seekTo(Cell)} before this
093   * method.
094   * @return gets the current key as a Cell.
095   */
096  Cell getKey();
097
098  /**
099   * Gets a buffer view to the current value. You must call {@link #seekTo(Cell)} before this
100   * method.
101   * @return byte buffer for the value. The limit is set to the value size, and the position is 0,
102   *         the start of the buffer view.
103   */
104  ByteBuffer getValue();
105
106  /** Returns Instance of {@link org.apache.hadoop.hbase.Cell}. */
107  Cell getCell();
108
109  /**
110   * Convenience method to get a copy of the key as a string - interpreting the bytes as UTF8. You
111   * must call {@link #seekTo(Cell)} before this method.
112   * @return key as a string
113   * @deprecated Since hbase-2.0.0
114   */
115  @Deprecated
116  String getKeyString();
117
118  /**
119   * Convenience method to get a copy of the value as a string - interpreting the bytes as UTF8. You
120   * must call {@link #seekTo(Cell)} before this method.
121   * @return value as a string
122   * @deprecated Since hbase-2.0.0
123   */
124  @Deprecated
125  String getValueString();
126
127  /** Returns Reader that underlies this Scanner instance. */
128  HFile.Reader getReader();
129
130  /**
131   * @return True is scanner has had one of the seek calls invoked; i.e. {@link #seekBefore(Cell)}
132   *         or {@link #seekTo()} or {@link #seekTo(Cell)}. Otherwise returns false.
133   */
134  boolean isSeeked();
135
136  /** Returns the next key in the index (the key to seek to the next block) */
137  Cell getNextIndexedKey();
138
139  /**
140   * Close this HFile scanner and do necessary cleanup.
141   */
142  @Override
143  void close();
144
145  /**
146   * Record the size of the current block in bytes, passing as an argument to the blockSizeConsumer.
147   * Implementations should ensure that blockSizeConsumer is only called once per block.
148   * @param blockSizeConsumer to be called with block size in bytes, once per block.
149   */
150  void recordBlockSize(IntConsumer blockSizeConsumer);
151}