1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with this
4 * work for additional information regarding copyright ownership. The ASF
5 * licenses this file to you under the Apache License, Version 2.0 (the
6 * "License"); you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 * License for the specific language governing permissions and limitations
15 * under the License.
16 */
17 package org.apache.hadoop.hbase.io.encoding;
18
19 import java.io.DataInputStream;
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22
23 import org.apache.hadoop.classification.InterfaceAudience;
24 import org.apache.hadoop.hbase.KeyValue;
25 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
26 import org.apache.hadoop.io.RawComparator;
27
28 /**
29 * Encoding of KeyValue. It aims to be fast and efficient using assumptions:
30 * <ul>
31 * <li>the KeyValues are stored sorted by key</li>
32 * <li>we know the structure of KeyValue</li>
33 * <li>the values are always iterated forward from beginning of block</li>
34 * <li>knowledge of Key Value format</li>
35 * </ul>
36 * It is designed to work fast enough to be feasible as in memory compression.
37 *
38 * After encoding, it also optionally compresses the encoded data if a
39 * compression algorithm is specified in HFileBlockEncodingContext argument of
40 * {@link #encodeKeyValues(ByteBuffer, boolean, HFileBlockEncodingContext)}.
41 */
42 @InterfaceAudience.Private
43 public interface DataBlockEncoder {
44
45 /**
46 * Encodes KeyValues. It will first encode key value pairs, and then
47 * optionally do the compression for the encoded data.
48 *
49 * @param in
50 * Source of KeyValue for compression.
51 * @param includesMemstoreTS
52 * true if including memstore timestamp after every key-value pair
53 * @param encodingContext
54 * the encoding context which will contain encoded uncompressed bytes
55 * as well as compressed encoded bytes if compression is enabled, and
56 * also it will reuse resources across multiple calls.
57 * @throws IOException
58 * If there is an error writing to output stream.
59 */
60 public void encodeKeyValues(
61 ByteBuffer in, boolean includesMemstoreTS,
62 HFileBlockEncodingContext encodingContext) throws IOException;
63
64 /**
65 * Decode.
66 * @param source Compressed stream of KeyValues.
67 * @param includesMemstoreTS true if including memstore timestamp after every
68 * key-value pair
69 * @return Uncompressed block of KeyValues.
70 * @throws IOException If there is an error in source.
71 */
72 public ByteBuffer decodeKeyValues(DataInputStream source,
73 boolean includesMemstoreTS) throws IOException;
74
75 /**
76 * Uncompress.
77 * @param source encoded stream of KeyValues.
78 * @param allocateHeaderLength allocate this many bytes for the header.
79 * @param skipLastBytes Do not copy n last bytes.
80 * @param includesMemstoreTS true if including memstore timestamp after every
81 * key-value pair
82 * @return Uncompressed block of KeyValues.
83 * @throws IOException If there is an error in source.
84 */
85 public ByteBuffer decodeKeyValues(DataInputStream source,
86 int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS)
87 throws IOException;
88
89 /**
90 * Return first key in block. Useful for indexing. Typically does not make
91 * a deep copy but returns a buffer wrapping a segment of the actual block's
92 * byte array. This is because the first key in block is usually stored
93 * unencoded.
94 * @param block encoded block we want index, the position will not change
95 * @return First key in block.
96 */
97 public ByteBuffer getFirstKeyInBlock(ByteBuffer block);
98
99 /**
100 * Create a HFileBlock seeker which find KeyValues within a block.
101 * @param comparator what kind of comparison should be used
102 * @param includesMemstoreTS true if including memstore timestamp after every
103 * key-value pair
104 * @return A newly created seeker.
105 */
106 public EncodedSeeker createSeeker(RawComparator<byte[]> comparator,
107 boolean includesMemstoreTS);
108
109 /**
110 * Creates a encoder specific encoding context
111 *
112 * @param compressionAlgorithm
113 * compression algorithm used if the final data needs to be
114 * compressed
115 * @param encoding
116 * encoding strategy used
117 * @param headerBytes
118 * header bytes to be written, put a dummy header here if the header
119 * is unknown
120 * @return a newly created encoding context
121 */
122 public HFileBlockEncodingContext newDataBlockEncodingContext(
123 Algorithm compressionAlgorithm, DataBlockEncoding encoding,
124 byte[] headerBytes);
125
126 /**
127 * Creates an encoder specific decoding context, which will prepare the data
128 * before actual decoding
129 *
130 * @param compressionAlgorithm
131 * compression algorithm used if the data needs to be decompressed
132 * @return a newly created decoding context
133 */
134 public HFileBlockDecodingContext newDataBlockDecodingContext(
135 Algorithm compressionAlgorithm);
136
137 /**
138 * An interface which enable to seek while underlying data is encoded.
139 *
140 * It works on one HFileBlock, but it is reusable. See
141 * {@link #setCurrentBuffer(ByteBuffer)}.
142 */
143 public static interface EncodedSeeker {
144 /**
145 * Set on which buffer there will be done seeking.
146 * @param buffer Used for seeking.
147 */
148 public void setCurrentBuffer(ByteBuffer buffer);
149
150 /**
151 * Does a deep copy of the key at the current position. A deep copy is
152 * necessary because buffers are reused in the decoder.
153 * @return key at current position
154 */
155 public ByteBuffer getKeyDeepCopy();
156
157 /**
158 * Does a shallow copy of the value at the current position. A shallow
159 * copy is possible because the returned buffer refers to the backing array
160 * of the original encoded buffer.
161 * @return value at current position
162 */
163 public ByteBuffer getValueShallowCopy();
164
165 /** @return key value at current position with position set to limit */
166 public ByteBuffer getKeyValueBuffer();
167
168 /**
169 * @return the KeyValue object at the current position. Includes memstore
170 * timestamp.
171 */
172 public KeyValue getKeyValue();
173
174 /** Set position to beginning of given block */
175 public void rewind();
176
177 /**
178 * Move to next position
179 * @return true on success, false if there is no more positions.
180 */
181 public boolean next();
182
183 /**
184 * Moves the seeker position within the current block to:
185 * <ul>
186 * <li>the last key that that is less than or equal to the given key if
187 * <code>seekBefore</code> is false</li>
188 * <li>the last key that is strictly less than the given key if <code>
189 * seekBefore</code> is true. The caller is responsible for loading the
190 * previous block if the requested key turns out to be the first key of the
191 * current block.</li>
192 * </ul>
193 * @param key byte array containing the key
194 * @param offset key position the array
195 * @param length key length in bytes
196 * @param seekBefore find the key strictly less than the given key in case
197 * of an exact match. Does not matter in case of an inexact match.
198 * @return 0 on exact match, 1 on inexact match.
199 */
200 public int seekToKeyInBlock(byte[] key, int offset, int length,
201 boolean seekBefore);
202 }
203 }