View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import java.io.DataInputStream;
20  import java.io.DataOutputStream;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  
24  import org.apache.hadoop.classification.InterfaceAudience;
25  import org.apache.hadoop.hbase.Cell;
26  import org.apache.hadoop.hbase.KeyValue;
27  import org.apache.hadoop.hbase.KeyValue.KVComparator;
28  import org.apache.hadoop.hbase.io.hfile.HFileContext;
29  
30  /**
31   * Encoding of KeyValue. It aims to be fast and efficient using assumptions:
32   * <ul>
33   * <li>the KeyValues are stored sorted by key</li>
34   * <li>we know the structure of KeyValue</li>
35   * <li>the values are always iterated forward from beginning of block</li>
36   * <li>knowledge of Key Value format</li>
37   * </ul>
38   * It is designed to work fast enough to be feasible as in memory compression.
39   */
40  @InterfaceAudience.Private
41  public interface DataBlockEncoder {
42  
43    /**
44     * Starts encoding for a block of KeyValues. Call
45     * {@link #endBlockEncoding(HFileBlockEncodingContext, DataOutputStream, byte[])} to finish
46     * encoding of a block.
47     * @param encodingCtx
48     * @param out
49     * @throws IOException
50     */
51    void startBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out)
52        throws IOException;
53  
54    /**
55     * Encodes a KeyValue.
56     * @param kv
57     * @param encodingCtx
58     * @param out
59     * @return unencoded kv size written
60     * @throws IOException
61     */
62    int encode(KeyValue kv, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
63        throws IOException;
64  
65    /**
66     * Ends encoding for a block of KeyValues. Gives a chance for the encoder to do the finishing
67     * stuff for the encoded block. It must be called at the end of block encoding.
68     * @param encodingCtx
69     * @param out
70     * @param uncompressedBytesWithHeader
71     * @throws IOException
72     */
73    void endBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out,
74        byte[] uncompressedBytesWithHeader) throws IOException;
75  
76    /**
77     * Decode.
78     * @param source Compressed stream of KeyValues.
79     * @param decodingCtx
80     * @return Uncompressed block of KeyValues.
81     * @throws IOException If there is an error in source.
82     */
83    ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx)
84        throws IOException;
85  
86    /**
87     * Return first key in block. Useful for indexing. Typically does not make
88     * a deep copy but returns a buffer wrapping a segment of the actual block's
89     * byte array. This is because the first key in block is usually stored
90     * unencoded.
91     * @param block encoded block we want index, the position will not change
92     * @return First key in block.
93     */
94    ByteBuffer getFirstKeyInBlock(ByteBuffer block);
95  
96    /**
97     * Create a HFileBlock seeker which find KeyValues within a block.
98     * @param comparator what kind of comparison should be used
99     * @param decodingCtx
100    * @return A newly created seeker.
101    */
102   EncodedSeeker createSeeker(KVComparator comparator, 
103       HFileBlockDecodingContext decodingCtx);
104 
105   /**
106    * Creates a encoder specific encoding context
107    *
108    * @param encoding
109    *          encoding strategy used
110    * @param headerBytes
111    *          header bytes to be written, put a dummy header here if the header
112    *          is unknown
113    * @param meta
114    *          HFile meta data
115    * @return a newly created encoding context
116    */
117   HFileBlockEncodingContext newDataBlockEncodingContext(
118       DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta);
119 
120   /**
121    * Creates an encoder specific decoding context, which will prepare the data
122    * before actual decoding
123    *
124    * @param meta
125    *          HFile meta data        
126    * @return a newly created decoding context
127    */
128   HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta);
129 
130   /**
131    * An interface which enable to seek while underlying data is encoded.
132    *
133    * It works on one HFileBlock, but it is reusable. See
134    * {@link #setCurrentBuffer(ByteBuffer)}.
135    */
136   interface EncodedSeeker {
137     /**
138      * Set on which buffer there will be done seeking.
139      * @param buffer Used for seeking.
140      */
141     void setCurrentBuffer(ByteBuffer buffer);
142 
143     /**
144      * Does a deep copy of the key at the current position. A deep copy is
145      * necessary because buffers are reused in the decoder.
146      * @return key at current position
147      */
148     ByteBuffer getKeyDeepCopy();
149 
150     /**
151      * Does a shallow copy of the value at the current position. A shallow
152      * copy is possible because the returned buffer refers to the backing array
153      * of the original encoded buffer.
154      * @return value at current position
155      */
156     ByteBuffer getValueShallowCopy();
157 
158     /** @return key value at current position with position set to limit */
159     ByteBuffer getKeyValueBuffer();
160 
161     /**
162      * @return the KeyValue object at the current position. Includes memstore
163      *         timestamp.
164      */
165     Cell getKeyValue();
166 
167     /** Set position to beginning of given block */
168     void rewind();
169 
170     /**
171      * Move to next position
172      * @return true on success, false if there is no more positions.
173      */
174     boolean next();
175 
176     /**
177      * Moves the seeker position within the current block to:
178      * <ul>
179      * <li>the last key that that is less than or equal to the given key if
180      * <code>seekBefore</code> is false</li>
181      * <li>the last key that is strictly less than the given key if <code>
182      * seekBefore</code> is true. The caller is responsible for loading the
183      * previous block if the requested key turns out to be the first key of the
184      * current block.</li>
185      * </ul>
186      * @param key byte array containing the key
187      * @param offset key position the array
188      * @param length key length in bytes
189      * @param seekBefore find the key strictly less than the given key in case
190      *          of an exact match. Does not matter in case of an inexact match.
191      * @return 0 on exact match, 1 on inexact match.
192      */
193     @Deprecated
194     int seekToKeyInBlock(
195       byte[] key, int offset, int length, boolean seekBefore
196     );
197     /**
198      * Moves the seeker position within the current block to:
199      * <ul>
200      * <li>the last key that that is less than or equal to the given key if
201      * <code>seekBefore</code> is false</li>
202      * <li>the last key that is strictly less than the given key if <code>
203      * seekBefore</code> is true. The caller is responsible for loading the
204      * previous block if the requested key turns out to be the first key of the
205      * current block.</li>
206      * </ul>
207      * @param key - Cell to which the seek should happen
208      * @param seekBefore find the key strictly less than the given key in case
209      *          of an exact match. Does not matter in case of an inexact match.
210      * @return 0 on exact match, 1 on inexact match.
211      */
212     int seekToKeyInBlock(Cell key, boolean seekBefore);
213 
214     /**
215      * Compare the given key against the current key
216      * @param comparator
217      * @param key
218      * @param offset
219      * @param length
220      * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater
221      */
222     public int compareKey(KVComparator comparator, byte[] key, int offset, int length);
223 
224     public int compareKey(KVComparator comparator, Cell key);
225   }
226 }