001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.encoding;
019
020import java.io.DataInputStream;
021import java.io.DataOutputStream;
022import java.io.IOException;
023import java.nio.ByteBuffer;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.hbase.Cell;
026import org.apache.hadoop.hbase.CellComparator;
027import org.apache.hadoop.hbase.io.hfile.HFileContext;
028import org.apache.hadoop.hbase.nio.ByteBuff;
029import org.apache.yetus.audience.InterfaceAudience;
030
031/**
032 * Encoding of KeyValue. It aims to be fast and efficient using assumptions:
033 * <ul>
034 * <li>the KeyValues are stored sorted by key</li>
035 * <li>we know the structure of KeyValue</li>
036 * <li>the values are always iterated forward from beginning of block</li>
037 * <li>knowledge of Key Value format</li>
038 * </ul>
039 * It is designed to work fast enough to be feasible as in memory compression.
040 */
041@InterfaceAudience.Private
042public interface DataBlockEncoder {
043  // TODO: This Interface should be deprecated and replaced. It presumes hfile and carnal knowledge
044  // of
045  // Cell internals. It was done for a different time. Remove. Purge.
046  /**
047   * Starts encoding for a block of KeyValues. Call
048   * {@link #endBlockEncoding(HFileBlockEncodingContext, DataOutputStream, byte[])} to finish
049   * encoding of a block.
050   */
051  void startBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out)
052    throws IOException;
053
054  /**
055   * Encodes a KeyValue. After the encode, {@link EncodingState#postCellEncode(int, int)} needs to
056   * be called to keep track of the encoded and unencoded data size
057   */
058  void encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
059    throws IOException;
060
061  /**
062   * Ends encoding for a block of KeyValues. Gives a chance for the encoder to do the finishing
063   * stuff for the encoded block. It must be called at the end of block encoding.
064   */
065  void endBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out,
066    byte[] uncompressedBytesWithHeader) throws IOException;
067
068  /**
069   * Decode.
070   * @param source Compressed stream of KeyValues.
071   * @return Uncompressed block of KeyValues.
072   * @throws IOException If there is an error in source.
073   */
074  ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx)
075    throws IOException;
076
077  /**
078   * Return first key in block as a cell. Useful for indexing. Typically does not make a deep copy
079   * but returns a buffer wrapping a segment of the actual block's byte array. This is because the
080   * first key in block is usually stored unencoded.
081   * @param block encoded block we want index, the position will not change
082   * @return First key in block as a cell.
083   */
084  Cell getFirstKeyCellInBlock(ByteBuff block);
085
086  /**
087   * Create a HFileBlock seeker which find KeyValues within a block.
088   * @return A newly created seeker.
089   */
090  EncodedSeeker createSeeker(HFileBlockDecodingContext decodingCtx);
091
092  /**
093   * Creates a encoder specific encoding context n * store configuration n * encoding strategy used
094   * n * header bytes to be written, put a dummy header here if the header is unknown n * HFile meta
095   * data
096   * @return a newly created encoding context
097   */
098  HFileBlockEncodingContext newDataBlockEncodingContext(Configuration conf,
099    DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta);
100
101  /**
102   * Creates an encoder specific decoding context, which will prepare the data before actual
103   * decoding n * store configuration n * HFile meta data
104   * @return a newly created decoding context
105   */
106  HFileBlockDecodingContext newDataBlockDecodingContext(Configuration conf, HFileContext meta);
107
108  /**
109   * An interface which enable to seek while underlying data is encoded. It works on one HFileBlock,
110   * but it is reusable. See {@link #setCurrentBuffer(ByteBuff)}.
111   */
112  interface EncodedSeeker {
113    /**
114     * Set on which buffer there will be done seeking.
115     * @param buffer Used for seeking.
116     */
117    void setCurrentBuffer(ByteBuff buffer);
118
119    /**
120     * From the current position creates a cell using the key part of the current buffer
121     * @return key at current position
122     */
123    Cell getKey();
124
125    /**
126     * Does a shallow copy of the value at the current position. A shallow copy is possible because
127     * the returned buffer refers to the backing array of the original encoded buffer.
128     * @return value at current position
129     */
130    ByteBuffer getValueShallowCopy();
131
132    /** Returns the Cell at the current position. Includes memstore timestamp. */
133    Cell getCell();
134
135    /** Set position to beginning of given block */
136    void rewind();
137
138    /**
139     * Move to next position
140     * @return true on success, false if there is no more positions.
141     */
142    boolean next();
143
144    /**
145     * Moves the seeker position within the current block to:
146     * <ul>
147     * <li>the last key that that is less than or equal to the given key if <code>seekBefore</code>
148     * is false</li>
149     * <li>the last key that is strictly less than the given key if <code>
150     * seekBefore</code> is true. The caller is responsible for loading the previous block if the
151     * requested key turns out to be the first key of the current block.</li>
152     * </ul>
153     * @param key        - Cell to which the seek should happen
154     * @param seekBefore find the key strictly less than the given key in case of an exact match.
155     *                   Does not matter in case of an inexact match.
156     * @return 0 on exact match, 1 on inexact match.
157     */
158    int seekToKeyInBlock(Cell key, boolean seekBefore);
159
160    /**
161     * Compare the given key against the current key
162     * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater
163     */
164    public int compareKey(CellComparator comparator, Cell key);
165  }
166}