001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with this 004 * work for additional information regarding copyright ownership. The ASF 005 * licenses this file to you under the Apache License, Version 2.0 (the 006 * "License"); you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 014 * License for the specific language governing permissions and limitations 015 * under the License. 016 */ 017package org.apache.hadoop.hbase.io.encoding; 018 019import java.io.DataInputStream; 020import java.io.DataOutputStream; 021import java.io.IOException; 022import java.nio.ByteBuffer; 023import org.apache.hadoop.hbase.Cell; 024import org.apache.hadoop.hbase.CellComparator; 025import org.apache.hadoop.hbase.io.hfile.HFileContext; 026import org.apache.hadoop.hbase.nio.ByteBuff; 027import org.apache.yetus.audience.InterfaceAudience; 028 029/** 030 * Encoding of KeyValue. It aims to be fast and efficient using assumptions: 031 * <ul> 032 * <li>the KeyValues are stored sorted by key</li> 033 * <li>we know the structure of KeyValue</li> 034 * <li>the values are always iterated forward from beginning of block</li> 035 * <li>knowledge of Key Value format</li> 036 * </ul> 037 * It is designed to work fast enough to be feasible as in memory compression. 038 */ 039@InterfaceAudience.Private 040public interface DataBlockEncoder { 041// TODO: This Interface should be deprecated and replaced. It presumes hfile and carnal knowledge of 042// Cell internals. It was done for a different time. Remove. Purge. 043 /** 044 * Starts encoding for a block of KeyValues. Call 045 * {@link #endBlockEncoding(HFileBlockEncodingContext, DataOutputStream, byte[])} to finish 046 * encoding of a block. 047 */ 048 void startBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out) 049 throws IOException; 050 051 /** 052 * Encodes a KeyValue. 053 * After the encode, {@link EncodingState#postCellEncode(int, int)} needs to be called to keep 054 * track of the encoded and unencoded data size 055 */ 056 void encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out) 057 throws IOException; 058 059 /** 060 * Ends encoding for a block of KeyValues. Gives a chance for the encoder to do the finishing 061 * stuff for the encoded block. It must be called at the end of block encoding. 062 */ 063 void endBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out, 064 byte[] uncompressedBytesWithHeader) throws IOException; 065 066 /** 067 * Decode. 068 * @param source Compressed stream of KeyValues. 069 * @return Uncompressed block of KeyValues. 070 * @throws IOException If there is an error in source. 071 */ 072 ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx) 073 throws IOException; 074 075 /** 076 * Return first key in block as a cell. Useful for indexing. Typically does not make 077 * a deep copy but returns a buffer wrapping a segment of the actual block's 078 * byte array. This is because the first key in block is usually stored 079 * unencoded. 080 * @param block encoded block we want index, the position will not change 081 * @return First key in block as a cell. 082 */ 083 Cell getFirstKeyCellInBlock(ByteBuff block); 084 085 /** 086 * Create a HFileBlock seeker which find KeyValues within a block. 087 * @return A newly created seeker. 088 */ 089 EncodedSeeker createSeeker(HFileBlockDecodingContext decodingCtx); 090 091 /** 092 * Creates a encoder specific encoding context 093 * 094 * @param encoding 095 * encoding strategy used 096 * @param headerBytes 097 * header bytes to be written, put a dummy header here if the header 098 * is unknown 099 * @param meta 100 * HFile meta data 101 * @return a newly created encoding context 102 */ 103 HFileBlockEncodingContext newDataBlockEncodingContext( 104 DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta); 105 106 /** 107 * Creates an encoder specific decoding context, which will prepare the data 108 * before actual decoding 109 * 110 * @param meta 111 * HFile meta data 112 * @return a newly created decoding context 113 */ 114 HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta); 115 116 /** 117 * An interface which enable to seek while underlying data is encoded. 118 * 119 * It works on one HFileBlock, but it is reusable. See 120 * {@link #setCurrentBuffer(ByteBuff)}. 121 */ 122 interface EncodedSeeker { 123 /** 124 * Set on which buffer there will be done seeking. 125 * @param buffer Used for seeking. 126 */ 127 void setCurrentBuffer(ByteBuff buffer); 128 129 /** 130 * From the current position creates a cell using the key part 131 * of the current buffer 132 * @return key at current position 133 */ 134 Cell getKey(); 135 136 /** 137 * Does a shallow copy of the value at the current position. A shallow 138 * copy is possible because the returned buffer refers to the backing array 139 * of the original encoded buffer. 140 * @return value at current position 141 */ 142 ByteBuffer getValueShallowCopy(); 143 144 /** 145 * @return the Cell at the current position. Includes memstore timestamp. 146 */ 147 Cell getCell(); 148 149 /** Set position to beginning of given block */ 150 void rewind(); 151 152 /** 153 * Move to next position 154 * @return true on success, false if there is no more positions. 155 */ 156 boolean next(); 157 158 /** 159 * Moves the seeker position within the current block to: 160 * <ul> 161 * <li>the last key that that is less than or equal to the given key if 162 * <code>seekBefore</code> is false</li> 163 * <li>the last key that is strictly less than the given key if <code> 164 * seekBefore</code> is true. The caller is responsible for loading the 165 * previous block if the requested key turns out to be the first key of the 166 * current block.</li> 167 * </ul> 168 * @param key - Cell to which the seek should happen 169 * @param seekBefore find the key strictly less than the given key in case 170 * of an exact match. Does not matter in case of an inexact match. 171 * @return 0 on exact match, 1 on inexact match. 172 */ 173 int seekToKeyInBlock(Cell key, boolean seekBefore); 174 175 /** 176 * Compare the given key against the current key 177 * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater 178 */ 179 public int compareKey(CellComparator comparator, Cell key); 180 } 181}