001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.encoding; 019 020import java.io.DataInputStream; 021import java.io.DataOutputStream; 022import java.io.IOException; 023import java.nio.ByteBuffer; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.hbase.Cell; 026import org.apache.hadoop.hbase.CellComparator; 027import org.apache.hadoop.hbase.io.hfile.HFileContext; 028import org.apache.hadoop.hbase.nio.ByteBuff; 029import org.apache.yetus.audience.InterfaceAudience; 030 031/** 032 * Encoding of KeyValue. It aims to be fast and efficient using assumptions: 033 * <ul> 034 * <li>the KeyValues are stored sorted by key</li> 035 * <li>we know the structure of KeyValue</li> 036 * <li>the values are always iterated forward from beginning of block</li> 037 * <li>knowledge of Key Value format</li> 038 * </ul> 039 * It is designed to work fast enough to be feasible as in memory compression. 040 */ 041@InterfaceAudience.Private 042public interface DataBlockEncoder { 043 // TODO: This Interface should be deprecated and replaced. It presumes hfile and carnal knowledge 044 // of 045 // Cell internals. It was done for a different time. Remove. Purge. 046 /** 047 * Starts encoding for a block of KeyValues. Call 048 * {@link #endBlockEncoding(HFileBlockEncodingContext, DataOutputStream, byte[])} to finish 049 * encoding of a block. 050 */ 051 void startBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out) 052 throws IOException; 053 054 /** 055 * Encodes a KeyValue. After the encode, {@link EncodingState#postCellEncode(int, int)} needs to 056 * be called to keep track of the encoded and unencoded data size 057 */ 058 void encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out) 059 throws IOException; 060 061 /** 062 * Ends encoding for a block of KeyValues. Gives a chance for the encoder to do the finishing 063 * stuff for the encoded block. It must be called at the end of block encoding. 064 */ 065 void endBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out, 066 byte[] uncompressedBytesWithHeader) throws IOException; 067 068 /** 069 * Decode. 070 * @param source Compressed stream of KeyValues. 071 * @return Uncompressed block of KeyValues. 072 * @throws IOException If there is an error in source. 073 */ 074 ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx) 075 throws IOException; 076 077 /** 078 * Return first key in block as a cell. Useful for indexing. Typically does not make a deep copy 079 * but returns a buffer wrapping a segment of the actual block's byte array. This is because the 080 * first key in block is usually stored unencoded. 081 * @param block encoded block we want index, the position will not change 082 * @return First key in block as a cell. 083 */ 084 Cell getFirstKeyCellInBlock(ByteBuff block); 085 086 /** 087 * Create a HFileBlock seeker which find KeyValues within a block. 088 * @return A newly created seeker. 089 */ 090 EncodedSeeker createSeeker(HFileBlockDecodingContext decodingCtx); 091 092 /** 093 * Creates a encoder specific encoding context n * store configuration n * encoding strategy used 094 * n * header bytes to be written, put a dummy header here if the header is unknown n * HFile meta 095 * data 096 * @return a newly created encoding context 097 */ 098 HFileBlockEncodingContext newDataBlockEncodingContext(Configuration conf, 099 DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta); 100 101 /** 102 * Creates an encoder specific decoding context, which will prepare the data before actual 103 * decoding n * store configuration n * HFile meta data 104 * @return a newly created decoding context 105 */ 106 HFileBlockDecodingContext newDataBlockDecodingContext(Configuration conf, HFileContext meta); 107 108 /** 109 * An interface which enable to seek while underlying data is encoded. It works on one HFileBlock, 110 * but it is reusable. See {@link #setCurrentBuffer(ByteBuff)}. 111 */ 112 interface EncodedSeeker { 113 /** 114 * Set on which buffer there will be done seeking. 115 * @param buffer Used for seeking. 116 */ 117 void setCurrentBuffer(ByteBuff buffer); 118 119 /** 120 * From the current position creates a cell using the key part of the current buffer 121 * @return key at current position 122 */ 123 Cell getKey(); 124 125 /** 126 * Does a shallow copy of the value at the current position. A shallow copy is possible because 127 * the returned buffer refers to the backing array of the original encoded buffer. 128 * @return value at current position 129 */ 130 ByteBuffer getValueShallowCopy(); 131 132 /** Returns the Cell at the current position. Includes memstore timestamp. */ 133 Cell getCell(); 134 135 /** Set position to beginning of given block */ 136 void rewind(); 137 138 /** 139 * Move to next position 140 * @return true on success, false if there is no more positions. 141 */ 142 boolean next(); 143 144 /** 145 * Moves the seeker position within the current block to: 146 * <ul> 147 * <li>the last key that that is less than or equal to the given key if <code>seekBefore</code> 148 * is false</li> 149 * <li>the last key that is strictly less than the given key if <code> 150 * seekBefore</code> is true. The caller is responsible for loading the previous block if the 151 * requested key turns out to be the first key of the current block.</li> 152 * </ul> 153 * @param key - Cell to which the seek should happen 154 * @param seekBefore find the key strictly less than the given key in case of an exact match. 155 * Does not matter in case of an inexact match. 156 * @return 0 on exact match, 1 on inexact match. 157 */ 158 int seekToKeyInBlock(Cell key, boolean seekBefore); 159 160 /** 161 * Compare the given key against the current key 162 * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater 163 */ 164 public int compareKey(CellComparator comparator, Cell key); 165 } 166}