1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with this 4 * work for additional information regarding copyright ownership. The ASF 5 * licenses this file to you under the Apache License, Version 2.0 (the 6 * "License"); you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14 * License for the specific language governing permissions and limitations 15 * under the License. 16 */ 17 package org.apache.hadoop.hbase.io.encoding; 18 19 import java.io.DataInputStream; 20 import java.io.DataOutputStream; 21 import java.io.IOException; 22 import java.nio.ByteBuffer; 23 24 import org.apache.hadoop.hbase.classification.InterfaceAudience; 25 import org.apache.hadoop.hbase.Cell; 26 import org.apache.hadoop.hbase.KeyValue.KVComparator; 27 import org.apache.hadoop.hbase.io.hfile.HFileContext; 28 29 /** 30 * Encoding of KeyValue. It aims to be fast and efficient using assumptions: 31 * <ul> 32 * <li>the KeyValues are stored sorted by key</li> 33 * <li>we know the structure of KeyValue</li> 34 * <li>the values are always iterated forward from beginning of block</li> 35 * <li>knowledge of Key Value format</li> 36 * </ul> 37 * It is designed to work fast enough to be feasible as in memory compression. 38 */ 39 @InterfaceAudience.Private 40 public interface DataBlockEncoder { 41 42 /** 43 * Starts encoding for a block of KeyValues. Call 44 * {@link #endBlockEncoding(HFileBlockEncodingContext, DataOutputStream, byte[])} to finish 45 * encoding of a block. 46 * @param encodingCtx 47 * @param out 48 * @throws IOException 49 */ 50 void startBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out) 51 throws IOException; 52 53 /** 54 * Encodes a KeyValue. 55 * @param cell 56 * @param encodingCtx 57 * @param out 58 * @return unencoded kv size written 59 * @throws IOException 60 */ 61 int encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out) 62 throws IOException; 63 64 /** 65 * Ends encoding for a block of KeyValues. Gives a chance for the encoder to do the finishing 66 * stuff for the encoded block. It must be called at the end of block encoding. 67 * @param encodingCtx 68 * @param out 69 * @param uncompressedBytesWithHeader 70 * @throws IOException 71 */ 72 void endBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out, 73 byte[] uncompressedBytesWithHeader) throws IOException; 74 75 /** 76 * Decode. 77 * @param source Compressed stream of KeyValues. 78 * @param decodingCtx 79 * @return Uncompressed block of KeyValues. 80 * @throws IOException If there is an error in source. 81 */ 82 ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx) 83 throws IOException; 84 85 /** 86 * Return first key in block. Useful for indexing. Typically does not make 87 * a deep copy but returns a buffer wrapping a segment of the actual block's 88 * byte array. This is because the first key in block is usually stored 89 * unencoded. 90 * @param block encoded block we want index, the position will not change 91 * @return First key in block. 92 */ 93 ByteBuffer getFirstKeyInBlock(ByteBuffer block); 94 95 /** 96 * Create a HFileBlock seeker which find KeyValues within a block. 97 * @param comparator what kind of comparison should be used 98 * @param decodingCtx 99 * @return A newly created seeker. 100 */ 101 EncodedSeeker createSeeker(KVComparator comparator, 102 HFileBlockDecodingContext decodingCtx); 103 104 /** 105 * Creates a encoder specific encoding context 106 * 107 * @param encoding 108 * encoding strategy used 109 * @param headerBytes 110 * header bytes to be written, put a dummy header here if the header 111 * is unknown 112 * @param meta 113 * HFile meta data 114 * @return a newly created encoding context 115 */ 116 HFileBlockEncodingContext newDataBlockEncodingContext( 117 DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta); 118 119 /** 120 * Creates an encoder specific decoding context, which will prepare the data 121 * before actual decoding 122 * 123 * @param meta 124 * HFile meta data 125 * @return a newly created decoding context 126 */ 127 HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta); 128 129 /** 130 * An interface which enable to seek while underlying data is encoded. 131 * 132 * It works on one HFileBlock, but it is reusable. See 133 * {@link #setCurrentBuffer(ByteBuffer)}. 134 */ 135 interface EncodedSeeker { 136 /** 137 * Set on which buffer there will be done seeking. 138 * @param buffer Used for seeking. 139 */ 140 void setCurrentBuffer(ByteBuffer buffer); 141 142 /** 143 * Does a deep copy of the key at the current position. A deep copy is 144 * necessary because buffers are reused in the decoder. 145 * @return key at current position 146 */ 147 ByteBuffer getKeyDeepCopy(); 148 149 /** 150 * Does a shallow copy of the value at the current position. A shallow 151 * copy is possible because the returned buffer refers to the backing array 152 * of the original encoded buffer. 153 * @return value at current position 154 */ 155 ByteBuffer getValueShallowCopy(); 156 157 158 /** 159 * @return the KeyValue object at the current position. Includes memstore 160 * timestamp. 161 */ 162 Cell getKeyValue(); 163 164 /** Set position to beginning of given block */ 165 void rewind(); 166 167 /** 168 * Move to next position 169 * @return true on success, false if there is no more positions. 170 */ 171 boolean next(); 172 173 /** 174 * Moves the seeker position within the current block to: 175 * <ul> 176 * <li>the last key that that is less than or equal to the given key if 177 * <code>seekBefore</code> is false</li> 178 * <li>the last key that is strictly less than the given key if <code> 179 * seekBefore</code> is true. The caller is responsible for loading the 180 * previous block if the requested key turns out to be the first key of the 181 * current block.</li> 182 * </ul> 183 * @param key byte array containing the key 184 * @param offset key position the array 185 * @param length key length in bytes 186 * @param seekBefore find the key strictly less than the given key in case 187 * of an exact match. Does not matter in case of an inexact match. 188 * @return 0 on exact match, 1 on inexact match. 189 */ 190 @Deprecated 191 int seekToKeyInBlock( 192 byte[] key, int offset, int length, boolean seekBefore 193 ); 194 /** 195 * Moves the seeker position within the current block to: 196 * <ul> 197 * <li>the last key that that is less than or equal to the given key if 198 * <code>seekBefore</code> is false</li> 199 * <li>the last key that is strictly less than the given key if <code> 200 * seekBefore</code> is true. The caller is responsible for loading the 201 * previous block if the requested key turns out to be the first key of the 202 * current block.</li> 203 * </ul> 204 * @param key - Cell to which the seek should happen 205 * @param seekBefore find the key strictly less than the given key in case 206 * of an exact match. Does not matter in case of an inexact match. 207 * @return 0 on exact match, 1 on inexact match. 208 */ 209 int seekToKeyInBlock(Cell key, boolean seekBefore); 210 211 /** 212 * Compare the given key against the current key 213 * @param comparator 214 * @param key 215 * @param offset 216 * @param length 217 * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater 218 */ 219 public int compareKey(KVComparator comparator, byte[] key, int offset, int length); 220 221 public int compareKey(KVComparator comparator, Cell key); 222 } 223 }