1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.hfile;
18  
19  import java.io.IOException;
20  import java.nio.ByteBuffer;
21  
22  import org.apache.hadoop.classification.InterfaceAudience;
23  import org.apache.hadoop.hbase.HConstants;
24  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
25  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
26  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
27  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
28  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
29  import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
30  import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
31  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
32  import org.apache.hadoop.hbase.util.Bytes;
33  
34  import com.google.common.base.Preconditions;
35  
36  /**
37   * Do different kinds of data block encoding according to column family
38   * options.
39   */
40  @InterfaceAudience.Private
41  public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
42    private final DataBlockEncoding onDisk;
43    private final DataBlockEncoding inCache;
44    private final HFileBlockEncodingContext inCacheEncodeCtx;
45  
46    public HFileDataBlockEncoderImpl(DataBlockEncoding encoding) {
47      this(encoding, encoding);
48    }
49  
50    /**
51     * Do data block encoding with specified options.
52     * @param onDisk What kind of data block encoding will be used before writing
53     *          HFileBlock to disk. This must be either the same as inCache or
54     *          {@link DataBlockEncoding#NONE}.
55     * @param inCache What kind of data block encoding will be used in block
56     *          cache.
57     */
58    public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
59        DataBlockEncoding inCache) {
60      this(onDisk, inCache, HConstants.HFILEBLOCK_DUMMY_HEADER);
61    }
62  
63    /**
64     * Do data block encoding with specified options.
65     * @param onDisk What kind of data block encoding will be used before writing
66     *          HFileBlock to disk. This must be either the same as inCache or
67     *          {@link DataBlockEncoding#NONE}.
68     * @param inCache What kind of data block encoding will be used in block
69     *          cache.
70     * @param dummyHeader dummy header bytes
71     */
72    public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
73        DataBlockEncoding inCache, byte[] dummyHeader) {
74      this.onDisk = onDisk != null ?
75          onDisk : DataBlockEncoding.NONE;
76      this.inCache = inCache != null ?
77          inCache : DataBlockEncoding.NONE;
78      if (inCache != DataBlockEncoding.NONE) {
79        inCacheEncodeCtx =
80            this.inCache.getEncoder().newDataBlockEncodingContext(
81                Algorithm.NONE, this.inCache, dummyHeader);
82      } else {
83        // create a default encoding context
84        inCacheEncodeCtx =
85            new HFileBlockDefaultEncodingContext(Algorithm.NONE,
86                this.inCache, dummyHeader);
87      }
88  
89      Preconditions.checkArgument(onDisk == DataBlockEncoding.NONE ||
90          onDisk == inCache, "on-disk encoding (" + onDisk + ") must be " +
91          "either the same as in-cache encoding (" + inCache + ") or " +
92          DataBlockEncoding.NONE);
93    }
94  
95    public static HFileDataBlockEncoder createFromFileInfo(
96        FileInfo fileInfo, DataBlockEncoding preferredEncodingInCache)
97        throws IOException {
98      boolean hasPreferredCacheEncoding = preferredEncodingInCache != null
99          && preferredEncodingInCache != DataBlockEncoding.NONE;
100 
101     byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING);
102     if (dataBlockEncodingType == null && !hasPreferredCacheEncoding) {
103       return NoOpDataBlockEncoder.INSTANCE;
104     }
105 
106     DataBlockEncoding onDisk;
107     if (dataBlockEncodingType == null) {
108       onDisk = DataBlockEncoding.NONE;
109     } else {
110       String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType);
111       try {
112         onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr);
113       } catch (IllegalArgumentException ex) {
114         throw new IOException("Invalid data block encoding type in file info: "
115             + dataBlockEncodingStr, ex);
116       }
117     }
118 
119     DataBlockEncoding inCache;
120     if (onDisk == DataBlockEncoding.NONE) {
121       // This is an "in-cache-only" encoding or fully-unencoded scenario.
122       // Either way, we use the given encoding (possibly NONE) specified by
123       // the column family in cache.
124       inCache = preferredEncodingInCache;
125     } else {
126       // Leave blocks in cache encoded the same way as they are on disk.
127       // If we switch encoding type for the CF or the in-cache-only encoding
128       // flag, old files will keep their encoding both on disk and in cache,
129       // but new files will be generated with the new encoding.
130       inCache = onDisk;
131     }
132     // TODO: we are not passing proper header size here based on minor version, presumably
133     //       because this encoder will never actually be used for encoding.
134     return new HFileDataBlockEncoderImpl(onDisk, inCache);
135   }
136 
137   @Override
138   public void saveMetadata(HFile.Writer writer) throws IOException {
139     writer.appendFileInfo(DATA_BLOCK_ENCODING, onDisk.getNameInBytes());
140   }
141 
142   @Override
143   public DataBlockEncoding getEncodingOnDisk() {
144     return onDisk;
145   }
146 
147   @Override
148   public DataBlockEncoding getEncodingInCache() {
149     return inCache;
150   }
151 
152   @Override
153   public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) {
154     if (!useEncodedScanner(isCompaction)) {
155       return DataBlockEncoding.NONE;
156     }
157     return inCache;
158   }
159 
160   @Override
161   public HFileBlock diskToCacheFormat(HFileBlock block, boolean isCompaction) {
162     if (block.getBlockType() == BlockType.DATA) {
163       if (!useEncodedScanner(isCompaction)) {
164         // Unencoded block, and we don't want to encode in cache.
165         return block;
166       }
167       // Encode the unencoded block with the in-cache encoding.
168       return encodeDataBlock(block, inCache, block.doesIncludeMemstoreTS(),
169           inCacheEncodeCtx);
170     }
171 
172     if (block.getBlockType() == BlockType.ENCODED_DATA) {
173       if (block.getDataBlockEncodingId() == onDisk.getId()) {
174         // The block is already in the desired in-cache encoding.
175         return block;
176       }
177       // We don't want to re-encode a block in a different encoding. The HFile
178       // reader should have been instantiated in such a way that we would not
179       // have to do this.
180       throw new AssertionError("Expected on-disk data block encoding " +
181           onDisk + ", got " + block.getDataBlockEncoding());
182     }
183     return block;
184   }
185 
186   /**
187    * Precondition: a non-encoded buffer. Postcondition: on-disk encoding.
188    *
189    * The encoded results can be stored in {@link HFileBlockEncodingContext}.
190    *
191    * @throws IOException
192    */
193   @Override
194   public void beforeWriteToDisk(ByteBuffer in,
195       boolean includesMemstoreTS,
196       HFileBlockEncodingContext encodeCtx,
197       BlockType blockType) throws IOException {
198     if (onDisk == DataBlockEncoding.NONE) {
199       // there is no need to encode the block before writing it to disk
200       ((HFileBlockDefaultEncodingContext) encodeCtx).compressAfterEncodingWithBlockType(
201           in.array(), blockType);
202       return;
203     }
204     encodeBufferToHFileBlockBuffer(in, onDisk,
205         includesMemstoreTS, encodeCtx);
206   }
207 
208   @Override
209   public boolean useEncodedScanner(boolean isCompaction) {
210     if (isCompaction && onDisk == DataBlockEncoding.NONE) {
211       return false;
212     }
213     return inCache != DataBlockEncoding.NONE;
214   }
215 
216   /**
217    * Encode a block of key value pairs.
218    *
219    * @param in input data to encode
220    * @param algo encoding algorithm
221    * @param includesMemstoreTS includes memstore timestamp or not
222    * @param encodeCtx where will the output data be stored
223    */
224   private void encodeBufferToHFileBlockBuffer(ByteBuffer in,
225       DataBlockEncoding algo, boolean includesMemstoreTS,
226       HFileBlockEncodingContext encodeCtx) {
227     DataBlockEncoder encoder = algo.getEncoder();
228     try {
229       encoder.encodeKeyValues(in, includesMemstoreTS, encodeCtx);
230     } catch (IOException e) {
231       throw new RuntimeException(String.format(
232           "Bug in data block encoder "
233               + "'%s', it probably requested too much data, " +
234               "exception message: %s.",
235               algo.toString(), e.getMessage()), e);
236     }
237   }
238 
239   private HFileBlock encodeDataBlock(HFileBlock block,
240       DataBlockEncoding algo, boolean includesMemstoreTS,
241       HFileBlockEncodingContext encodingCtx) {
242     encodingCtx.setDummyHeader(block.getDummyHeaderForVersion());
243     encodeBufferToHFileBlockBuffer(
244       block.getBufferWithoutHeader(), algo, includesMemstoreTS, encodingCtx);
245     byte[] encodedUncompressedBytes =
246       encodingCtx.getUncompressedBytesWithHeader();
247     ByteBuffer bufferWrapper = ByteBuffer.wrap(encodedUncompressedBytes);
248     int sizeWithoutHeader = bufferWrapper.limit() - block.headerSize();
249     HFileBlock encodedBlock = new HFileBlock(BlockType.ENCODED_DATA,
250         block.getOnDiskSizeWithoutHeader(),
251         sizeWithoutHeader, block.getPrevBlockOffset(),
252         bufferWrapper, HFileBlock.FILL_HEADER, block.getOffset(),
253         includesMemstoreTS, block.getMinorVersion(),
254         block.getBytesPerChecksum(), block.getChecksumType(),
255         block.getOnDiskDataSizeWithHeader());
256     return encodedBlock;
257   }
258 
259   @Override
260   public String toString() {
261     return getClass().getSimpleName() + "(onDisk=" + onDisk + ", inCache=" +
262         inCache + ")";
263   }
264 
265   @Override
266   public HFileBlockEncodingContext newOnDiskDataBlockEncodingContext(
267       Algorithm compressionAlgorithm,  byte[] dummyHeader) {
268     if (onDisk != null) {
269       DataBlockEncoder encoder = onDisk.getEncoder();
270       if (encoder != null) {
271         return encoder.newDataBlockEncodingContext(
272             compressionAlgorithm, onDisk, dummyHeader);
273       }
274     }
275     return new HFileBlockDefaultEncodingContext(compressionAlgorithm,
276         null, dummyHeader);
277   }
278 
279   @Override
280   public HFileBlockDecodingContext newOnDiskDataBlockDecodingContext(
281       Algorithm compressionAlgorithm) {
282     if (onDisk != null) {
283       DataBlockEncoder encoder = onDisk.getEncoder();
284       if (encoder != null) {
285         return encoder.newDataBlockDecodingContext(
286             compressionAlgorithm);
287       }
288     }
289     return new HFileBlockDefaultDecodingContext(compressionAlgorithm);
290   }
291 
292 }