View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import java.io.DataInputStream;
20  import java.io.DataOutputStream;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  
24  import org.apache.hadoop.hbase.Cell;
25  import org.apache.hadoop.hbase.CellComparator;
26  import org.apache.hadoop.hbase.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.io.hfile.HFileContext;
28  import org.apache.hadoop.hbase.nio.ByteBuff;
29  
30  /**
31   * Encoding of KeyValue. It aims to be fast and efficient using assumptions:
32   * <ul>
33   * <li>the KeyValues are stored sorted by key</li>
34   * <li>we know the structure of KeyValue</li>
35   * <li>the values are always iterated forward from beginning of block</li>
36   * <li>knowledge of Key Value format</li>
37   * </ul>
38   * It is designed to work fast enough to be feasible as in memory compression.
39   */
40  @InterfaceAudience.Private
41  public interface DataBlockEncoder {
42  // TODO: This Interface should be deprecated and replaced. It presumes hfile and carnal knowledge of
43  // Cell internals. It was done for a different time. Remove. Purge.
44    /**
45     * Starts encoding for a block of KeyValues. Call
46     * {@link #endBlockEncoding(HFileBlockEncodingContext, DataOutputStream, byte[])} to finish
47     * encoding of a block.
48     * @param encodingCtx
49     * @param out
50     * @throws IOException
51     */
52    void startBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out)
53        throws IOException;
54  
55    /**
56     * Encodes a KeyValue.
57     * @param cell
58     * @param encodingCtx
59     * @param out
60     * @return unencoded kv size written
61     * @throws IOException
62     */
63    int encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
64        throws IOException;
65  
66    /**
67     * Ends encoding for a block of KeyValues. Gives a chance for the encoder to do the finishing
68     * stuff for the encoded block. It must be called at the end of block encoding.
69     * @param encodingCtx
70     * @param out
71     * @param uncompressedBytesWithHeader
72     * @throws IOException
73     */
74    void endBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out,
75        byte[] uncompressedBytesWithHeader) throws IOException;
76  
77    /**
78     * Decode.
79     * @param source Compressed stream of KeyValues.
80     * @param decodingCtx
81     * @return Uncompressed block of KeyValues.
82     * @throws IOException If there is an error in source.
83     */
84    ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx)
85        throws IOException;
86  
87    /**
88     * Return first key in block as a cell. Useful for indexing. Typically does not make
89     * a deep copy but returns a buffer wrapping a segment of the actual block's
90     * byte array. This is because the first key in block is usually stored
91     * unencoded.
92     * @param block encoded block we want index, the position will not change
93     * @return First key in block as a cell.
94     */
95    Cell getFirstKeyCellInBlock(ByteBuff block);
96  
97    /**
98     * Create a HFileBlock seeker which find KeyValues within a block.
99     * @param comparator what kind of comparison should be used
100    * @param decodingCtx
101    * @return A newly created seeker.
102    */
103   EncodedSeeker createSeeker(CellComparator comparator, 
104       HFileBlockDecodingContext decodingCtx);
105 
106   /**
107    * Creates a encoder specific encoding context
108    *
109    * @param encoding
110    *          encoding strategy used
111    * @param headerBytes
112    *          header bytes to be written, put a dummy header here if the header
113    *          is unknown
114    * @param meta
115    *          HFile meta data
116    * @return a newly created encoding context
117    */
118   HFileBlockEncodingContext newDataBlockEncodingContext(
119       DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta);
120 
121   /**
122    * Creates an encoder specific decoding context, which will prepare the data
123    * before actual decoding
124    *
125    * @param meta
126    *          HFile meta data        
127    * @return a newly created decoding context
128    */
129   HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta);
130 
131   /**
132    * An interface which enable to seek while underlying data is encoded.
133    *
134    * It works on one HFileBlock, but it is reusable. See
135    * {@link #setCurrentBuffer(ByteBuff)}.
136    */
137   interface EncodedSeeker {
138     /**
139      * Set on which buffer there will be done seeking.
140      * @param buffer Used for seeking.
141      */
142     void setCurrentBuffer(ByteBuff buffer);
143 
144     /**
145      * From the current position creates a cell using the key part
146      * of the current buffer
147      * @return key at current position
148      */
149     Cell getKey();
150 
151     /**
152      * Does a shallow copy of the value at the current position. A shallow
153      * copy is possible because the returned buffer refers to the backing array
154      * of the original encoded buffer.
155      * @return value at current position
156      */
157     ByteBuffer getValueShallowCopy();
158 
159     /**
160      * @return the Cell at the current position. Includes memstore timestamp.
161      */
162     Cell getCell();
163 
164     /** Set position to beginning of given block */
165     void rewind();
166 
167     /**
168      * Move to next position
169      * @return true on success, false if there is no more positions.
170      */
171     boolean next();
172 
173     /**
174      * Moves the seeker position within the current block to:
175      * <ul>
176      * <li>the last key that that is less than or equal to the given key if
177      * <code>seekBefore</code> is false</li>
178      * <li>the last key that is strictly less than the given key if <code>
179      * seekBefore</code> is true. The caller is responsible for loading the
180      * previous block if the requested key turns out to be the first key of the
181      * current block.</li>
182      * </ul>
183      * @param key - Cell to which the seek should happen
184      * @param seekBefore find the key strictly less than the given key in case
185      *          of an exact match. Does not matter in case of an inexact match.
186      * @return 0 on exact match, 1 on inexact match.
187      */
188     int seekToKeyInBlock(Cell key, boolean seekBefore);
189 
190     /**
191      * Compare the given key against the current key
192      * @param comparator
193      * @param key
194      * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater
195      */
196     public int compareKey(CellComparator comparator, Cell key);
197   }
198 }