View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.DataInputStream;
21  import java.io.DataOutput;
22  import java.io.DataOutputStream;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.nio.ByteBuffer;
26  import java.util.concurrent.locks.Lock;
27  import java.util.concurrent.locks.ReentrantLock;
28  
29  import org.apache.hadoop.fs.FSDataInputStream;
30  import org.apache.hadoop.fs.FSDataOutputStream;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.Cell;
33  import org.apache.hadoop.hbase.HConstants;
34  import org.apache.hadoop.hbase.classification.InterfaceAudience;
35  import org.apache.hadoop.hbase.fs.HFileSystem;
36  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
37  import org.apache.hadoop.hbase.io.ByteArrayOutputStream;
38  import org.apache.hadoop.hbase.io.ByteBuffInputStream;
39  import org.apache.hadoop.hbase.io.ByteBufferSupportDataOutputStream;
40  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
41  import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
42  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
43  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
44  import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
45  import org.apache.hadoop.hbase.nio.ByteBuff;
46  import org.apache.hadoop.hbase.nio.MultiByteBuff;
47  import org.apache.hadoop.hbase.nio.SingleByteBuff;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.apache.hadoop.hbase.util.ChecksumType;
50  import org.apache.hadoop.hbase.util.ClassSize;
51  import org.apache.hadoop.io.IOUtils;
52  
53  import com.google.common.annotations.VisibleForTesting;
54  import com.google.common.base.Preconditions;
55  
56  /**
57   * Reading {@link HFile} version 1 and 2 blocks, and writing version 2 blocks.
58   * <ul>
59   * <li>In version 1 all blocks are always compressed or uncompressed, as
60   * specified by the {@link HFile}'s compression algorithm, with a type-specific
61   * magic record stored in the beginning of the compressed data (i.e. one needs
62   * to uncompress the compressed block to determine the block type). There is
63   * only a single compression algorithm setting for all blocks. Offset and size
64   * information from the block index are required to read a block.
65   * <li>In version 2 a block is structured as follows:
66   * <ul>
67   * <li>header (see Writer#finishBlock())
68   * <ul>
69   * <li>Magic record identifying the block type (8 bytes)
70   * <li>Compressed block size, excluding header, including checksum (4 bytes)
71   * <li>Uncompressed block size, excluding header, excluding checksum (4 bytes)
72   * <li>The offset of the previous block of the same type (8 bytes). This is
73   * used to be able to navigate to the previous block without going to the block
74   * <li>For minorVersions &gt;=1, the ordinal describing checksum type (1 byte)
75   * <li>For minorVersions &gt;=1, the number of data bytes/checksum chunk (4 bytes)
76   * <li>For minorVersions &gt;=1, the size of data on disk, including header,
77   * excluding checksums (4 bytes)
78   * </ul>
79   * </li>
80   * <li>Raw/Compressed/Encrypted/Encoded data. The compression algorithm is the
81   * same for all the blocks in the {@link HFile}, similarly to what was done in
82   * version 1.
83   * <li>For minorVersions &gt;=1, a series of 4 byte checksums, one each for
84   * the number of bytes specified by bytesPerChecksum.
85   * </ul>
86   * </ul>
87   */
88  @InterfaceAudience.Private
89  public class HFileBlock implements Cacheable {
90  
91    /**
92     * On a checksum failure on a Reader, these many suceeding read
93     * requests switch back to using hdfs checksums before auto-reenabling
94     * hbase checksum verification.
95     */
96    static final int CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD = 3;
97  
98    public static final boolean FILL_HEADER = true;
99    public static final boolean DONT_FILL_HEADER = false;
100 
101   /**
102    * The size of block header when blockType is {@link BlockType#ENCODED_DATA}.
103    * This extends normal header by adding the id of encoder.
104    */
105   public static final int ENCODED_HEADER_SIZE = HConstants.HFILEBLOCK_HEADER_SIZE
106       + DataBlockEncoding.ID_SIZE;
107 
108   static final byte[] DUMMY_HEADER_NO_CHECKSUM =
109      new byte[HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM];
110 
111   // How to get the estimate correctly? if it is a singleBB?
112   public static final int MULTI_BYTE_BUFFER_HEAP_SIZE =
113       (int)ClassSize.estimateBase(MultiByteBuff.class, false);
114 
115   // meta.usesHBaseChecksum+offset+nextBlockOnDiskSizeWithHeader
116   public static final int EXTRA_SERIALIZATION_SPACE = Bytes.SIZEOF_BYTE + Bytes.SIZEOF_INT
117       + Bytes.SIZEOF_LONG;
118 
119   /**
120    * Each checksum value is an integer that can be stored in 4 bytes.
121    */
122   static final int CHECKSUM_SIZE = Bytes.SIZEOF_INT;
123 
124   static final CacheableDeserializer<Cacheable> blockDeserializer =
125       new CacheableDeserializer<Cacheable>() {
126         public HFileBlock deserialize(ByteBuff buf, boolean reuse, MemoryType memType)
127             throws IOException {
128           buf.limit(buf.limit() - HFileBlock.EXTRA_SERIALIZATION_SPACE).rewind();
129           ByteBuff newByteBuffer;
130           if (reuse) {
131             newByteBuffer = buf.slice();
132           } else {
133             // Used only in tests
134             int len = buf.limit();
135             newByteBuffer = new SingleByteBuff(ByteBuffer.allocate(len));
136             newByteBuffer.put(0, buf, buf.position(), len);
137           }
138           buf.position(buf.limit());
139           buf.limit(buf.limit() + HFileBlock.EXTRA_SERIALIZATION_SPACE);
140           boolean usesChecksum = buf.get() == (byte)1;
141           HFileBlock hFileBlock = new HFileBlock(newByteBuffer, usesChecksum, memType);
142           hFileBlock.offset = buf.getLong();
143           hFileBlock.nextBlockOnDiskSizeWithHeader = buf.getInt();
144           if (hFileBlock.hasNextBlockHeader()) {
145             hFileBlock.buf.limit(hFileBlock.buf.limit() - hFileBlock.headerSize());
146           }
147           return hFileBlock;
148         }
149 
150         @Override
151         public int getDeserialiserIdentifier() {
152           return deserializerIdentifier;
153         }
154 
155         @Override
156         public HFileBlock deserialize(ByteBuff b) throws IOException {
157           // Used only in tests
158           return deserialize(b, false, MemoryType.EXCLUSIVE);
159         }
160       };
161   private static final int deserializerIdentifier;
162   static {
163     deserializerIdentifier = CacheableDeserializerIdManager
164         .registerDeserializer(blockDeserializer);
165   }
166 
167   /** Type of block. Header field 0. */
168   private BlockType blockType;
169 
170   /** Size on disk excluding header, including checksum. Header field 1. */
171   private int onDiskSizeWithoutHeader;
172 
173   /** Size of pure data. Does not include header or checksums. Header field 2. */
174   private final int uncompressedSizeWithoutHeader;
175 
176   /** The offset of the previous block on disk. Header field 3. */
177   private final long prevBlockOffset;
178 
179   /**
180    * Size on disk of header + data. Excludes checksum. Header field 6,
181    * OR calculated from {@link #onDiskSizeWithoutHeader} when using HDFS checksum.
182    */
183   private final int onDiskDataSizeWithHeader;
184 
185   /** The in-memory representation of the hfile block */
186   private ByteBuff buf;
187 
188   /** Meta data that holds meta information on the hfileblock */
189   private HFileContext fileContext;
190 
191   /**
192    * The offset of this block in the file. Populated by the reader for
193    * convenience of access. This offset is not part of the block header.
194    */
195   private long offset = -1;
196 
197   /**
198    * The on-disk size of the next block, including the header, obtained by
199    * peeking into the first {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes of the next block's
200    * header, or -1 if unknown.
201    */
202   private int nextBlockOnDiskSizeWithHeader = -1;
203 
204   private MemoryType memType = MemoryType.EXCLUSIVE;
205 
206   /**
207    * Creates a new {@link HFile} block from the given fields. This constructor
208    * is mostly used when the block data has already been read and uncompressed,
209    * and is sitting in a byte buffer.
210    *
211    * @param blockType the type of this block, see {@link BlockType}
212    * @param onDiskSizeWithoutHeader see {@link #onDiskSizeWithoutHeader}
213    * @param uncompressedSizeWithoutHeader see {@link #uncompressedSizeWithoutHeader}
214    * @param prevBlockOffset see {@link #prevBlockOffset}
215    * @param buf block header ({@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes) followed by
216    *          uncompressed data. This
217    * @param fillHeader when true, parse {@code buf} and override the first 4 header fields.
218    * @param offset the file offset the block was read from
219    * @param onDiskDataSizeWithHeader see {@link #onDiskDataSizeWithHeader}
220    * @param fileContext HFile meta data
221    */
222   HFileBlock(BlockType blockType, int onDiskSizeWithoutHeader, int uncompressedSizeWithoutHeader,
223       long prevBlockOffset, ByteBuff buf, boolean fillHeader, long offset,
224       int onDiskDataSizeWithHeader, HFileContext fileContext) {
225     this.blockType = blockType;
226     this.onDiskSizeWithoutHeader = onDiskSizeWithoutHeader;
227     this.uncompressedSizeWithoutHeader = uncompressedSizeWithoutHeader;
228     this.prevBlockOffset = prevBlockOffset;
229     this.buf = buf;
230     this.offset = offset;
231     this.onDiskDataSizeWithHeader = onDiskDataSizeWithHeader;
232     this.fileContext = fileContext;
233     if (fillHeader)
234       overwriteHeader();
235     this.buf.rewind();
236   }
237 
238   HFileBlock(BlockType blockType, int onDiskSizeWithoutHeader, int uncompressedSizeWithoutHeader,
239       long prevBlockOffset, ByteBuffer buf, boolean fillHeader, long offset,
240       int onDiskDataSizeWithHeader, HFileContext fileContext) {
241     this(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset,
242         new SingleByteBuff(buf), fillHeader, offset, onDiskDataSizeWithHeader, fileContext);
243   }
244 
245   /**
246    * Copy constructor. Creates a shallow copy of {@code that}'s buffer.
247    */
248   HFileBlock(HFileBlock that) {
249     this.blockType = that.blockType;
250     this.onDiskSizeWithoutHeader = that.onDiskSizeWithoutHeader;
251     this.uncompressedSizeWithoutHeader = that.uncompressedSizeWithoutHeader;
252     this.prevBlockOffset = that.prevBlockOffset;
253     this.buf = that.buf.duplicate();
254     this.offset = that.offset;
255     this.onDiskDataSizeWithHeader = that.onDiskDataSizeWithHeader;
256     this.fileContext = that.fileContext;
257     this.nextBlockOnDiskSizeWithHeader = that.nextBlockOnDiskSizeWithHeader;
258   }
259 
260   HFileBlock(ByteBuffer b, boolean usesHBaseChecksum) throws IOException {
261     this(new SingleByteBuff(b), usesHBaseChecksum);
262   }
263 
264   /**
265    * Creates a block from an existing buffer starting with a header. Rewinds
266    * and takes ownership of the buffer. By definition of rewind, ignores the
267    * buffer position, but if you slice the buffer beforehand, it will rewind
268    * to that point.
269    */
270   HFileBlock(ByteBuff b, boolean usesHBaseChecksum) throws IOException {
271     this(b, usesHBaseChecksum, MemoryType.EXCLUSIVE);
272   }
273 
274   /**
275    * Creates a block from an existing buffer starting with a header. Rewinds
276    * and takes ownership of the buffer. By definition of rewind, ignores the
277    * buffer position, but if you slice the buffer beforehand, it will rewind
278    * to that point.
279    */
280   HFileBlock(ByteBuff b, boolean usesHBaseChecksum, MemoryType memType) throws IOException {
281     b.rewind();
282     blockType = BlockType.read(b);
283     onDiskSizeWithoutHeader = b.getInt();
284     uncompressedSizeWithoutHeader = b.getInt();
285     prevBlockOffset = b.getLong();
286     HFileContextBuilder contextBuilder = new HFileContextBuilder();
287     contextBuilder.withHBaseCheckSum(usesHBaseChecksum);
288     if (usesHBaseChecksum) {
289       contextBuilder.withChecksumType(ChecksumType.codeToType(b.get()));
290       contextBuilder.withBytesPerCheckSum(b.getInt());
291       this.onDiskDataSizeWithHeader = b.getInt();
292     } else {
293       contextBuilder.withChecksumType(ChecksumType.NULL);
294       contextBuilder.withBytesPerCheckSum(0);
295       this.onDiskDataSizeWithHeader = onDiskSizeWithoutHeader +
296                                        HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
297     }
298     this.fileContext = contextBuilder.build();
299     this.memType = memType;
300     buf = b;
301     buf.rewind();
302   }
303 
304   public BlockType getBlockType() {
305     return blockType;
306   }
307 
308   /** @return get data block encoding id that was used to encode this block */
309   public short getDataBlockEncodingId() {
310     if (blockType != BlockType.ENCODED_DATA) {
311       throw new IllegalArgumentException("Querying encoder ID of a block " +
312           "of type other than " + BlockType.ENCODED_DATA + ": " + blockType);
313     }
314     return buf.getShort(headerSize());
315   }
316 
317   /**
318    * @return the on-disk size of header + data part + checksum.
319    */
320   public int getOnDiskSizeWithHeader() {
321     return onDiskSizeWithoutHeader + headerSize();
322   }
323 
324   /**
325    * @return the on-disk size of the data part + checksum (header excluded).
326    */
327   public int getOnDiskSizeWithoutHeader() {
328     return onDiskSizeWithoutHeader;
329   }
330 
331   /**
332    * @return the uncompressed size of data part (header and checksum excluded).
333    */
334    public int getUncompressedSizeWithoutHeader() {
335     return uncompressedSizeWithoutHeader;
336   }
337 
338   /**
339    * @return the offset of the previous block of the same type in the file, or
340    *         -1 if unknown
341    */
342   public long getPrevBlockOffset() {
343     return prevBlockOffset;
344   }
345 
346   /**
347    * Rewinds {@code buf} and writes first 4 header fields. {@code buf} position
348    * is modified as side-effect.
349    */
350   private void overwriteHeader() {
351     buf.rewind();
352     blockType.write(buf);
353     buf.putInt(onDiskSizeWithoutHeader);
354     buf.putInt(uncompressedSizeWithoutHeader);
355     buf.putLong(prevBlockOffset);
356     if (this.fileContext.isUseHBaseChecksum()) {
357       buf.put(fileContext.getChecksumType().getCode());
358       buf.putInt(fileContext.getBytesPerChecksum());
359       buf.putInt(onDiskDataSizeWithHeader);
360     }
361   }
362 
363   /**
364    * Returns a buffer that does not include the header or checksum.
365    *
366    * @return the buffer with header skipped and checksum omitted.
367    */
368   public ByteBuff getBufferWithoutHeader() {
369     ByteBuff dup = this.buf.duplicate();
370     dup.position(headerSize());
371     dup.limit(buf.limit() - totalChecksumBytes());
372     return dup.slice();
373   }
374 
375   /**
376    * Returns the buffer this block stores internally. The clients must not
377    * modify the buffer object. This method has to be public because it is used
378    * in {@link CompoundBloomFilter} to avoid object creation on every Bloom
379    * filter lookup, but has to be used with caution. Checksum data is not
380    * included in the returned buffer but header data is.
381    *
382    * @return the buffer of this block for read-only operations
383    */
384   public ByteBuff getBufferReadOnly() {
385     ByteBuff dup = this.buf.duplicate();
386     dup.limit(buf.limit() - totalChecksumBytes());
387     return dup.slice();
388   }
389 
390   /**
391    * Returns the buffer of this block, including header data. The clients must
392    * not modify the buffer object. This method has to be public because it is
393    * used in {@link org.apache.hadoop.hbase.io.hfile.bucket.BucketCache} to avoid buffer copy.
394    *
395    * @return the buffer with header and checksum included for read-only operations
396    */
397   public ByteBuff getBufferReadOnlyWithHeader() {
398     ByteBuff dup = this.buf.duplicate();
399     return dup.slice();
400   }
401 
402   /**
403    * Returns a byte buffer of this block, including header data and checksum, positioned at
404    * the beginning of header. The underlying data array is not copied.
405    *
406    * @return the byte buffer with header and checksum included
407    */
408   ByteBuff getBufferWithHeader() {
409     ByteBuff dupBuf = buf.duplicate();
410     dupBuf.rewind();
411     return dupBuf;
412   }
413 
414   private void sanityCheckAssertion(long valueFromBuf, long valueFromField,
415       String fieldName) throws IOException {
416     if (valueFromBuf != valueFromField) {
417       throw new AssertionError(fieldName + " in the buffer (" + valueFromBuf
418           + ") is different from that in the field (" + valueFromField + ")");
419     }
420   }
421 
422   private void sanityCheckAssertion(BlockType valueFromBuf, BlockType valueFromField)
423       throws IOException {
424     if (valueFromBuf != valueFromField) {
425       throw new IOException("Block type stored in the buffer: " +
426         valueFromBuf + ", block type field: " + valueFromField);
427     }
428   }
429 
430   /**
431    * Checks if the block is internally consistent, i.e. the first
432    * {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes of the buffer contain a
433    * valid header consistent with the fields. Assumes a packed block structure.
434    * This function is primary for testing and debugging, and is not
435    * thread-safe, because it alters the internal buffer pointer.
436    */
437   void sanityCheck() throws IOException {
438     buf.rewind();
439 
440     sanityCheckAssertion(BlockType.read(buf), blockType);
441 
442     sanityCheckAssertion(buf.getInt(), onDiskSizeWithoutHeader,
443         "onDiskSizeWithoutHeader");
444 
445     sanityCheckAssertion(buf.getInt(), uncompressedSizeWithoutHeader,
446         "uncompressedSizeWithoutHeader");
447 
448     sanityCheckAssertion(buf.getLong(), prevBlockOffset, "prevBlocKOffset");
449     if (this.fileContext.isUseHBaseChecksum()) {
450       sanityCheckAssertion(buf.get(), this.fileContext.getChecksumType().getCode(), "checksumType");
451       sanityCheckAssertion(buf.getInt(), this.fileContext.getBytesPerChecksum(),
452           "bytesPerChecksum");
453       sanityCheckAssertion(buf.getInt(), onDiskDataSizeWithHeader, "onDiskDataSizeWithHeader");
454     }
455 
456     int cksumBytes = totalChecksumBytes();
457     int expectedBufLimit = onDiskDataSizeWithHeader + cksumBytes;
458     if (buf.limit() != expectedBufLimit) {
459       throw new AssertionError("Expected buffer limit " + expectedBufLimit
460           + ", got " + buf.limit());
461     }
462 
463     // We might optionally allocate HFILEBLOCK_HEADER_SIZE more bytes to read the next
464     // block's header, so there are two sensible values for buffer capacity.
465     int hdrSize = headerSize();
466     if (buf.capacity() != expectedBufLimit &&
467         buf.capacity() != expectedBufLimit + hdrSize) {
468       throw new AssertionError("Invalid buffer capacity: " + buf.capacity() +
469           ", expected " + expectedBufLimit + " or " + (expectedBufLimit + hdrSize));
470     }
471   }
472 
473   @Override
474   public String toString() {
475     StringBuilder sb = new StringBuilder()
476       .append("HFileBlock [")
477       .append(" fileOffset=").append(offset)
478       .append(" headerSize()=").append(headerSize())
479       .append(" blockType=").append(blockType)
480       .append(" onDiskSizeWithoutHeader=").append(onDiskSizeWithoutHeader)
481       .append(" uncompressedSizeWithoutHeader=").append(uncompressedSizeWithoutHeader)
482       .append(" prevBlockOffset=").append(prevBlockOffset)
483       .append(" isUseHBaseChecksum()=").append(fileContext.isUseHBaseChecksum());
484     if (fileContext.isUseHBaseChecksum()) {
485       sb.append(" checksumType=").append(ChecksumType.codeToType(this.buf.get(24)))
486         .append(" bytesPerChecksum=").append(this.buf.getInt(24 + 1))
487         .append(" onDiskDataSizeWithHeader=").append(onDiskDataSizeWithHeader);
488     } else {
489       sb.append(" onDiskDataSizeWithHeader=").append(onDiskDataSizeWithHeader)
490         .append("(").append(onDiskSizeWithoutHeader)
491         .append("+").append(HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM).append(")");
492     }
493     String dataBegin = null;
494     if (buf.hasArray()) {
495       dataBegin = Bytes.toStringBinary(buf.array(), buf.arrayOffset() + headerSize(),
496           Math.min(32, buf.limit() - buf.arrayOffset() - headerSize()));
497     } else {
498       ByteBuff bufWithoutHeader = getBufferWithoutHeader();
499       byte[] dataBeginBytes = new byte[Math.min(32,
500           bufWithoutHeader.limit() - bufWithoutHeader.position())];
501       bufWithoutHeader.get(dataBeginBytes);
502       dataBegin = Bytes.toStringBinary(dataBeginBytes);
503     }
504     sb.append(" getOnDiskSizeWithHeader()=").append(getOnDiskSizeWithHeader())
505       .append(" totalChecksumBytes()=").append(totalChecksumBytes())
506       .append(" isUnpacked()=").append(isUnpacked())
507       .append(" buf=[ ").append(buf).append(" ]")
508       .append(" dataBeginsWith=").append(dataBegin)
509       .append(" fileContext=").append(fileContext)
510       .append(" ]");
511     return sb.toString();
512   }
513 
514   /**
515    * Called after reading a block with provided onDiskSizeWithHeader.
516    */
517   private void validateOnDiskSizeWithoutHeader(int expectedOnDiskSizeWithoutHeader)
518   throws IOException {
519     if (onDiskSizeWithoutHeader != expectedOnDiskSizeWithoutHeader) {
520       String dataBegin = null;
521       if (buf.hasArray()) {
522         dataBegin = Bytes.toStringBinary(buf.array(), buf.arrayOffset(), Math.min(32, buf.limit()));
523       } else {
524         ByteBuff bufDup = getBufferReadOnly();
525         byte[] dataBeginBytes = new byte[Math.min(32, bufDup.limit() - bufDup.position())];
526         bufDup.get(dataBeginBytes);
527         dataBegin = Bytes.toStringBinary(dataBeginBytes);
528       }
529       String blockInfoMsg =
530         "Block offset: " + offset + ", data starts with: " + dataBegin;
531       throw new IOException("On-disk size without header provided is "
532           + expectedOnDiskSizeWithoutHeader + ", but block "
533           + "header contains " + onDiskSizeWithoutHeader + ". " +
534           blockInfoMsg);
535     }
536   }
537 
538   /**
539    * Retrieves the decompressed/decrypted view of this block. An encoded block remains in its
540    * encoded structure. Internal structures are shared between instances where applicable.
541    */
542   HFileBlock unpack(HFileContext fileContext, FSReader reader) throws IOException {
543     if (!fileContext.isCompressedOrEncrypted()) {
544       // TODO: cannot use our own fileContext here because HFileBlock(ByteBuffer, boolean),
545       // which is used for block serialization to L2 cache, does not preserve encoding and
546       // encryption details.
547       return this;
548     }
549 
550     HFileBlock unpacked = new HFileBlock(this);
551     unpacked.allocateBuffer(); // allocates space for the decompressed block
552 
553     HFileBlockDecodingContext ctx = blockType == BlockType.ENCODED_DATA ?
554       reader.getBlockDecodingContext() : reader.getDefaultBlockDecodingContext();
555 
556     ByteBuff dup = this.buf.duplicate();
557     dup.position(this.headerSize());
558     dup = dup.slice();
559     ctx.prepareDecoding(unpacked.getOnDiskSizeWithoutHeader(),
560       unpacked.getUncompressedSizeWithoutHeader(), unpacked.getBufferWithoutHeader(),
561       dup);
562 
563     // Preserve the next block's header bytes in the new block if we have them.
564     if (unpacked.hasNextBlockHeader()) {
565       // Both the buffers are limited till checksum bytes and avoid the next block's header.
566       // Below call to copyFromBufferToBuffer() will try positional read/write from/to buffers when
567       // any of the buffer is DBB. So we change the limit on a dup buffer. No copying just create
568       // new BB objects
569       ByteBuff inDup = this.buf.duplicate();
570       inDup.limit(inDup.limit() + headerSize());
571       ByteBuff outDup = unpacked.buf.duplicate();
572       outDup.limit(outDup.limit() + unpacked.headerSize());
573       outDup.put(
574           unpacked.headerSize() + unpacked.uncompressedSizeWithoutHeader
575               + unpacked.totalChecksumBytes(), inDup, this.onDiskDataSizeWithHeader,
576           unpacked.headerSize());
577     }
578     return unpacked;
579   }
580 
581   /**
582    * Return true when this buffer includes next block's header.
583    */
584   private boolean hasNextBlockHeader() {
585     return nextBlockOnDiskSizeWithHeader > 0;
586   }
587 
588   /**
589    * Always allocates a new buffer of the correct size. Copies header bytes
590    * from the existing buffer. Does not change header fields.
591    * Reserve room to keep checksum bytes too.
592    */
593   private void allocateBuffer() {
594     int cksumBytes = totalChecksumBytes();
595     int headerSize = headerSize();
596     int capacityNeeded = headerSize + uncompressedSizeWithoutHeader +
597         cksumBytes + (hasNextBlockHeader() ? headerSize : 0);
598 
599     // TODO we need consider allocating offheap here?
600     ByteBuffer newBuf = ByteBuffer.allocate(capacityNeeded);
601 
602     // Copy header bytes into newBuf.
603     // newBuf is HBB so no issue in calling array()
604     buf.position(0);
605     buf.get(newBuf.array(), newBuf.arrayOffset(), headerSize);
606 
607     buf = new SingleByteBuff(newBuf);
608     // set limit to exclude next block's header
609     buf.limit(headerSize + uncompressedSizeWithoutHeader + cksumBytes);
610   }
611 
612   /**
613    * Return true when this block's buffer has been unpacked, false otherwise. Note this is a
614    * calculated heuristic, not tracked attribute of the block.
615    */
616   public boolean isUnpacked() {
617     final int cksumBytes = totalChecksumBytes();
618     final int headerSize = headerSize();
619     final int expectedCapacity = headerSize + uncompressedSizeWithoutHeader + cksumBytes;
620     final int bufCapacity = buf.capacity();
621     return bufCapacity == expectedCapacity || bufCapacity == expectedCapacity + headerSize;
622   }
623 
624   /** An additional sanity-check in case no compression or encryption is being used. */
625   public void assumeUncompressed() throws IOException {
626     if (onDiskSizeWithoutHeader != uncompressedSizeWithoutHeader +
627         totalChecksumBytes()) {
628       throw new IOException("Using no compression but "
629           + "onDiskSizeWithoutHeader=" + onDiskSizeWithoutHeader + ", "
630           + "uncompressedSizeWithoutHeader=" + uncompressedSizeWithoutHeader
631           + ", numChecksumbytes=" + totalChecksumBytes());
632     }
633   }
634 
635   /**
636    * @param expectedType the expected type of this block
637    * @throws IOException if this block's type is different than expected
638    */
639   public void expectType(BlockType expectedType) throws IOException {
640     if (blockType != expectedType) {
641       throw new IOException("Invalid block type: expected=" + expectedType
642           + ", actual=" + blockType);
643     }
644   }
645 
646   /** @return the offset of this block in the file it was read from */
647   public long getOffset() {
648     if (offset < 0) {
649       throw new IllegalStateException(
650           "HFile block offset not initialized properly");
651     }
652     return offset;
653   }
654 
655   /**
656    * @return a byte stream reading the data + checksum of this block
657    */
658   public DataInputStream getByteStream() {
659     ByteBuff dup = this.buf.duplicate();
660     dup.position(this.headerSize());
661     return new DataInputStream(new ByteBuffInputStream(dup));
662   }
663 
664   @Override
665   public long heapSize() {
666     long size = ClassSize.align(
667         ClassSize.OBJECT +
668         // Block type, multi byte buffer, MemoryType and meta references
669         4 * ClassSize.REFERENCE +
670         // On-disk size, uncompressed size, and next block's on-disk size
671         // bytePerChecksum and onDiskDataSize
672         4 * Bytes.SIZEOF_INT +
673         // This and previous block offset
674         2 * Bytes.SIZEOF_LONG +
675         // Heap size of the meta object. meta will be always not null.
676         fileContext.heapSize()
677     );
678 
679     if (buf != null) {
680       // Deep overhead of the byte buffer. Needs to be aligned separately.
681       size += ClassSize.align(buf.capacity() + MULTI_BYTE_BUFFER_HEAP_SIZE);
682     }
683 
684     return ClassSize.align(size);
685   }
686 
687   /**
688    * Read from an input stream. Analogous to
689    * {@link IOUtils#readFully(InputStream, byte[], int, int)}, but specifies a
690    * number of "extra" bytes that would be desirable but not absolutely
691    * necessary to read.
692    *
693    * @param in the input stream to read from
694    * @param buf the buffer to read into
695    * @param bufOffset the destination offset in the buffer
696    * @param necessaryLen the number of bytes that are absolutely necessary to
697    *          read
698    * @param extraLen the number of extra bytes that would be nice to read
699    * @return true if succeeded reading the extra bytes
700    * @throws IOException if failed to read the necessary bytes
701    */
702   public static boolean readWithExtra(InputStream in, byte[] buf,
703       int bufOffset, int necessaryLen, int extraLen) throws IOException {
704     int bytesRemaining = necessaryLen + extraLen;
705     while (bytesRemaining > 0) {
706       int ret = in.read(buf, bufOffset, bytesRemaining);
707       if (ret == -1 && bytesRemaining <= extraLen) {
708         // We could not read the "extra data", but that is OK.
709         break;
710       }
711 
712       if (ret < 0) {
713         throw new IOException("Premature EOF from inputStream (read "
714             + "returned " + ret + ", was trying to read " + necessaryLen
715             + " necessary bytes and " + extraLen + " extra bytes, "
716             + "successfully read "
717             + (necessaryLen + extraLen - bytesRemaining));
718       }
719       bufOffset += ret;
720       bytesRemaining -= ret;
721     }
722     return bytesRemaining <= 0;
723   }
724 
725   /**
726    * Read from an input stream. Analogous to
727    * {@link IOUtils#readFully(InputStream, byte[], int, int)}, but uses
728    * positional read and specifies a number of "extra" bytes that would be
729    * desirable but not absolutely necessary to read.
730    *
731    * @param in the input stream to read from
732    * @param position the position within the stream from which to start reading
733    * @param buf the buffer to read into
734    * @param bufOffset the destination offset in the buffer
735    * @param necessaryLen the number of bytes that are absolutely necessary to
736    *     read
737    * @param extraLen the number of extra bytes that would be nice to read
738    * @return true if and only if extraLen is > 0 and reading those extra bytes
739    *     was successful
740    * @throws IOException if failed to read the necessary bytes
741    */
742   @VisibleForTesting
743   static boolean positionalReadWithExtra(FSDataInputStream in,
744       long position, byte[] buf, int bufOffset, int necessaryLen, int extraLen)
745       throws IOException {
746     int bytesRemaining = necessaryLen + extraLen;
747     int bytesRead = 0;
748     while (bytesRead < necessaryLen) {
749       int ret = in.read(position, buf, bufOffset, bytesRemaining);
750       if (ret < 0) {
751         throw new IOException("Premature EOF from inputStream (positional read "
752             + "returned " + ret + ", was trying to read " + necessaryLen
753             + " necessary bytes and " + extraLen + " extra bytes, "
754             + "successfully read " + bytesRead);
755       }
756       position += ret;
757       bufOffset += ret;
758       bytesRemaining -= ret;
759       bytesRead += ret;
760     }
761     return bytesRead != necessaryLen && bytesRemaining <= 0;
762   }
763 
764   /**
765    * @return the on-disk size of the next block (including the header size)
766    *         that was read by peeking into the next block's header
767    */
768   public int getNextBlockOnDiskSizeWithHeader() {
769     return nextBlockOnDiskSizeWithHeader;
770   }
771 
772   /**
773    * Unified version 2 {@link HFile} block writer. The intended usage pattern
774    * is as follows:
775    * <ol>
776    * <li>Construct an {@link HFileBlock.Writer}, providing a compression algorithm.
777    * <li>Call {@link Writer#startWriting} and get a data stream to write to.
778    * <li>Write your data into the stream.
779    * <li>Call {@link Writer#writeHeaderAndData(FSDataOutputStream)} as many times as you need to.
780    * store the serialized block into an external stream.
781    * <li>Repeat to write more blocks.
782    * </ol>
783    * <p>
784    */
785   public static class Writer {
786 
787     private enum State {
788       INIT,
789       WRITING,
790       BLOCK_READY
791     };
792 
793     /** Writer state. Used to ensure the correct usage protocol. */
794     private State state = State.INIT;
795 
796     /** Data block encoder used for data blocks */
797     private final HFileDataBlockEncoder dataBlockEncoder;
798 
799     private HFileBlockEncodingContext dataBlockEncodingCtx;
800 
801     /** block encoding context for non-data blocks */
802     private HFileBlockDefaultEncodingContext defaultBlockEncodingCtx;
803 
804     /**
805      * The stream we use to accumulate data in uncompressed format for each
806      * block. We reset this stream at the end of each block and reuse it. The
807      * header is written as the first {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes into this
808      * stream.
809      */
810     private ByteArrayOutputStream baosInMemory;
811 
812     /**
813      * Current block type. Set in {@link #startWriting(BlockType)}. Could be
814      * changed in {@link #finishBlock()} from {@link BlockType#DATA}
815      * to {@link BlockType#ENCODED_DATA}.
816      */
817     private BlockType blockType;
818 
819     /**
820      * A stream that we write uncompressed bytes to, which compresses them and
821      * writes them to {@link #baosInMemory}.
822      */
823     private DataOutputStream userDataStream;
824 
825     // Size of actual data being written. Not considering the block encoding/compression. This
826     // includes the header size also.
827     private int unencodedDataSizeWritten;
828 
829     /**
830      * Bytes to be written to the file system, including the header. Compressed
831      * if compression is turned on. It also includes the checksum data that
832      * immediately follows the block data. (header + data + checksums)
833      */
834     private byte[] onDiskBytesWithHeader;
835 
836     /**
837      * The size of the checksum data on disk. It is used only if data is
838      * not compressed. If data is compressed, then the checksums are already
839      * part of onDiskBytesWithHeader. If data is uncompressed, then this
840      * variable stores the checksum data for this block.
841      */
842     private byte[] onDiskChecksum;
843 
844     /**
845      * Valid in the READY state. Contains the header and the uncompressed (but
846      * potentially encoded, if this is a data block) bytes, so the length is
847      * {@link #uncompressedSizeWithoutHeader} +
848      * {@link org.apache.hadoop.hbase.HConstants#HFILEBLOCK_HEADER_SIZE}.
849      * Does not store checksums.
850      */
851     private byte[] uncompressedBytesWithHeader;
852 
853     /**
854      * Current block's start offset in the {@link HFile}. Set in
855      * {@link #writeHeaderAndData(FSDataOutputStream)}.
856      */
857     private long startOffset;
858 
859     /**
860      * Offset of previous block by block type. Updated when the next block is
861      * started.
862      */
863     private long[] prevOffsetByType;
864 
865     /** The offset of the previous block of the same type */
866     private long prevOffset;
867     /** Meta data that holds information about the hfileblock**/
868     private HFileContext fileContext;
869 
870     /**
871      * @param dataBlockEncoder data block encoding algorithm to use
872      */
873     public Writer(HFileDataBlockEncoder dataBlockEncoder, HFileContext fileContext) {
874       this.dataBlockEncoder = dataBlockEncoder != null
875           ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
876       defaultBlockEncodingCtx = new HFileBlockDefaultEncodingContext(null,
877           HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext);
878       dataBlockEncodingCtx = this.dataBlockEncoder
879           .newDataBlockEncodingContext(HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext);
880 
881       if (fileContext.getBytesPerChecksum() < HConstants.HFILEBLOCK_HEADER_SIZE) {
882         throw new RuntimeException("Unsupported value of bytesPerChecksum. " +
883             " Minimum is " + HConstants.HFILEBLOCK_HEADER_SIZE + " but the configured value is " +
884             fileContext.getBytesPerChecksum());
885       }
886 
887       baosInMemory = new ByteArrayOutputStream();
888 
889       prevOffsetByType = new long[BlockType.values().length];
890       for (int i = 0; i < prevOffsetByType.length; ++i)
891         prevOffsetByType[i] = -1;
892 
893       this.fileContext = fileContext;
894     }
895 
896     /**
897      * Starts writing into the block. The previous block's data is discarded.
898      *
899      * @return the stream the user can write their data into
900      * @throws IOException
901      */
902     public DataOutputStream startWriting(BlockType newBlockType)
903         throws IOException {
904       if (state == State.BLOCK_READY && startOffset != -1) {
905         // We had a previous block that was written to a stream at a specific
906         // offset. Save that offset as the last offset of a block of that type.
907         prevOffsetByType[blockType.getId()] = startOffset;
908       }
909 
910       startOffset = -1;
911       blockType = newBlockType;
912 
913       baosInMemory.reset();
914       baosInMemory.write(HConstants.HFILEBLOCK_DUMMY_HEADER);
915 
916       state = State.WRITING;
917 
918       // We will compress it later in finishBlock()
919       userDataStream = new ByteBufferSupportDataOutputStream(baosInMemory);
920       if (newBlockType == BlockType.DATA) {
921         this.dataBlockEncoder.startBlockEncoding(dataBlockEncodingCtx, userDataStream);
922       }
923       this.unencodedDataSizeWritten = 0;
924       return userDataStream;
925     }
926 
927     /**
928      * Writes the Cell to this block
929      * @param cell
930      * @throws IOException
931      */
932     public void write(Cell cell) throws IOException{
933       expectState(State.WRITING);
934       this.unencodedDataSizeWritten += this.dataBlockEncoder.encode(cell, dataBlockEncodingCtx,
935           this.userDataStream);
936     }
937 
938     /**
939      * Returns the stream for the user to write to. The block writer takes care
940      * of handling compression and buffering for caching on write. Can only be
941      * called in the "writing" state.
942      *
943      * @return the data output stream for the user to write to
944      */
945     DataOutputStream getUserDataStream() {
946       expectState(State.WRITING);
947       return userDataStream;
948     }
949 
950     /**
951      * Transitions the block writer from the "writing" state to the "block
952      * ready" state.  Does nothing if a block is already finished.
953      */
954     void ensureBlockReady() throws IOException {
955       Preconditions.checkState(state != State.INIT,
956           "Unexpected state: " + state);
957 
958       if (state == State.BLOCK_READY)
959         return;
960 
961       // This will set state to BLOCK_READY.
962       finishBlock();
963     }
964 
965     /**
966      * An internal method that flushes the compressing stream (if using
967      * compression), serializes the header, and takes care of the separate
968      * uncompressed stream for caching on write, if applicable. Sets block
969      * write state to "block ready".
970      */
971     private void finishBlock() throws IOException {
972       if (blockType == BlockType.DATA) {
973         this.dataBlockEncoder.endBlockEncoding(dataBlockEncodingCtx, userDataStream,
974             baosInMemory.getBuffer(), blockType);
975         blockType = dataBlockEncodingCtx.getBlockType();
976       }
977       userDataStream.flush();
978       // This does an array copy, so it is safe to cache this byte array.
979       uncompressedBytesWithHeader = baosInMemory.toByteArray();
980       prevOffset = prevOffsetByType[blockType.getId()];
981 
982       // We need to set state before we can package the block up for
983       // cache-on-write. In a way, the block is ready, but not yet encoded or
984       // compressed.
985       state = State.BLOCK_READY;
986       if (blockType == BlockType.DATA || blockType == BlockType.ENCODED_DATA) {
987         onDiskBytesWithHeader = dataBlockEncodingCtx
988             .compressAndEncrypt(uncompressedBytesWithHeader);
989       } else {
990         onDiskBytesWithHeader = defaultBlockEncodingCtx
991             .compressAndEncrypt(uncompressedBytesWithHeader);
992       }
993       int numBytes = (int) ChecksumUtil.numBytes(
994           onDiskBytesWithHeader.length,
995           fileContext.getBytesPerChecksum());
996 
997       // put the header for on disk bytes
998       putHeader(onDiskBytesWithHeader, 0,
999           onDiskBytesWithHeader.length + numBytes,
1000           uncompressedBytesWithHeader.length, onDiskBytesWithHeader.length);
1001       // set the header for the uncompressed bytes (for cache-on-write)
1002       putHeader(uncompressedBytesWithHeader, 0,
1003           onDiskBytesWithHeader.length + numBytes,
1004           uncompressedBytesWithHeader.length, onDiskBytesWithHeader.length);
1005 
1006       onDiskChecksum = new byte[numBytes];
1007       ChecksumUtil.generateChecksums(
1008           onDiskBytesWithHeader, 0, onDiskBytesWithHeader.length,
1009           onDiskChecksum, 0, fileContext.getChecksumType(), fileContext.getBytesPerChecksum());
1010     }
1011 
1012     /**
1013      * Put the header into the given byte array at the given offset.
1014      * @param onDiskSize size of the block on disk header + data + checksum
1015      * @param uncompressedSize size of the block after decompression (but
1016      *          before optional data block decoding) including header
1017      * @param onDiskDataSize size of the block on disk with header
1018      *        and data but not including the checksums
1019      */
1020     private void putHeader(byte[] dest, int offset, int onDiskSize,
1021         int uncompressedSize, int onDiskDataSize) {
1022       offset = blockType.put(dest, offset);
1023       offset = Bytes.putInt(dest, offset, onDiskSize - HConstants.HFILEBLOCK_HEADER_SIZE);
1024       offset = Bytes.putInt(dest, offset, uncompressedSize - HConstants.HFILEBLOCK_HEADER_SIZE);
1025       offset = Bytes.putLong(dest, offset, prevOffset);
1026       offset = Bytes.putByte(dest, offset, fileContext.getChecksumType().getCode());
1027       offset = Bytes.putInt(dest, offset, fileContext.getBytesPerChecksum());
1028       Bytes.putInt(dest, offset, onDiskDataSize);
1029     }
1030 
1031     /**
1032      * Similar to {@link #writeHeaderAndData(FSDataOutputStream)}, but records
1033      * the offset of this block so that it can be referenced in the next block
1034      * of the same type.
1035      *
1036      * @param out
1037      * @throws IOException
1038      */
1039     public void writeHeaderAndData(FSDataOutputStream out) throws IOException {
1040       long offset = out.getPos();
1041       if (startOffset != -1 && offset != startOffset) {
1042         throw new IOException("A " + blockType + " block written to a "
1043             + "stream twice, first at offset " + startOffset + ", then at "
1044             + offset);
1045       }
1046       startOffset = offset;
1047 
1048       finishBlockAndWriteHeaderAndData((DataOutputStream) out);
1049     }
1050 
1051     /**
1052      * Writes the header and the compressed data of this block (or uncompressed
1053      * data when not using compression) into the given stream. Can be called in
1054      * the "writing" state or in the "block ready" state. If called in the
1055      * "writing" state, transitions the writer to the "block ready" state.
1056      *
1057      * @param out the output stream to write the
1058      * @throws IOException
1059      */
1060     protected void finishBlockAndWriteHeaderAndData(DataOutputStream out)
1061       throws IOException {
1062       ensureBlockReady();
1063       out.write(onDiskBytesWithHeader);
1064       out.write(onDiskChecksum);
1065     }
1066 
1067     /**
1068      * Returns the header or the compressed data (or uncompressed data when not
1069      * using compression) as a byte array. Can be called in the "writing" state
1070      * or in the "block ready" state. If called in the "writing" state,
1071      * transitions the writer to the "block ready" state. This returns
1072      * the header + data + checksums stored on disk.
1073      *
1074      * @return header and data as they would be stored on disk in a byte array
1075      * @throws IOException
1076      */
1077     byte[] getHeaderAndDataForTest() throws IOException {
1078       ensureBlockReady();
1079       // This is not very optimal, because we are doing an extra copy.
1080       // But this method is used only by unit tests.
1081       byte[] output =
1082           new byte[onDiskBytesWithHeader.length
1083               + onDiskChecksum.length];
1084       System.arraycopy(onDiskBytesWithHeader, 0, output, 0,
1085           onDiskBytesWithHeader.length);
1086       System.arraycopy(onDiskChecksum, 0, output,
1087           onDiskBytesWithHeader.length, onDiskChecksum.length);
1088       return output;
1089     }
1090 
1091     /**
1092      * Releases resources used by this writer.
1093      */
1094     public void release() {
1095       if (dataBlockEncodingCtx != null) {
1096         dataBlockEncodingCtx.close();
1097         dataBlockEncodingCtx = null;
1098       }
1099       if (defaultBlockEncodingCtx != null) {
1100         defaultBlockEncodingCtx.close();
1101         defaultBlockEncodingCtx = null;
1102       }
1103     }
1104 
1105     /**
1106      * Returns the on-disk size of the data portion of the block. This is the
1107      * compressed size if compression is enabled. Can only be called in the
1108      * "block ready" state. Header is not compressed, and its size is not
1109      * included in the return value.
1110      *
1111      * @return the on-disk size of the block, not including the header.
1112      */
1113     int getOnDiskSizeWithoutHeader() {
1114       expectState(State.BLOCK_READY);
1115       return onDiskBytesWithHeader.length
1116           + onDiskChecksum.length
1117           - HConstants.HFILEBLOCK_HEADER_SIZE;
1118     }
1119 
1120     /**
1121      * Returns the on-disk size of the block. Can only be called in the
1122      * "block ready" state.
1123      *
1124      * @return the on-disk size of the block ready to be written, including the
1125      *         header size, the data and the checksum data.
1126      */
1127     int getOnDiskSizeWithHeader() {
1128       expectState(State.BLOCK_READY);
1129       return onDiskBytesWithHeader.length + onDiskChecksum.length;
1130     }
1131 
1132     /**
1133      * The uncompressed size of the block data. Does not include header size.
1134      */
1135     int getUncompressedSizeWithoutHeader() {
1136       expectState(State.BLOCK_READY);
1137       return uncompressedBytesWithHeader.length - HConstants.HFILEBLOCK_HEADER_SIZE;
1138     }
1139 
1140     /**
1141      * The uncompressed size of the block data, including header size.
1142      */
1143     int getUncompressedSizeWithHeader() {
1144       expectState(State.BLOCK_READY);
1145       return uncompressedBytesWithHeader.length;
1146     }
1147 
1148     /** @return true if a block is being written  */
1149     public boolean isWriting() {
1150       return state == State.WRITING;
1151     }
1152 
1153     /**
1154      * Returns the number of bytes written into the current block so far, or
1155      * zero if not writing the block at the moment. Note that this will return
1156      * zero in the "block ready" state as well.
1157      *
1158      * @return the number of bytes written
1159      */
1160     public int blockSizeWritten() {
1161       if (state != State.WRITING) return 0;
1162       return this.unencodedDataSizeWritten;
1163     }
1164 
1165     /**
1166      * Returns the header followed by the uncompressed data, even if using
1167      * compression. This is needed for storing uncompressed blocks in the block
1168      * cache. Can be called in the "writing" state or the "block ready" state.
1169      * Returns only the header and data, does not include checksum data.
1170      *
1171      * @return uncompressed block bytes for caching on write
1172      */
1173     ByteBuffer getUncompressedBufferWithHeader() {
1174       expectState(State.BLOCK_READY);
1175       return ByteBuffer.wrap(uncompressedBytesWithHeader);
1176     }
1177 
1178     /**
1179      * Returns the header followed by the on-disk (compressed/encoded/encrypted) data. This is
1180      * needed for storing packed blocks in the block cache. Expects calling semantics identical to
1181      * {@link #getUncompressedBufferWithHeader()}. Returns only the header and data,
1182      * Does not include checksum data.
1183      *
1184      * @return packed block bytes for caching on write
1185      */
1186     ByteBuffer getOnDiskBufferWithHeader() {
1187       expectState(State.BLOCK_READY);
1188       return ByteBuffer.wrap(onDiskBytesWithHeader);
1189     }
1190 
1191     private void expectState(State expectedState) {
1192       if (state != expectedState) {
1193         throw new IllegalStateException("Expected state: " + expectedState +
1194             ", actual state: " + state);
1195       }
1196     }
1197 
1198     /**
1199      * Takes the given {@link BlockWritable} instance, creates a new block of
1200      * its appropriate type, writes the writable into this block, and flushes
1201      * the block into the output stream. The writer is instructed not to buffer
1202      * uncompressed bytes for cache-on-write.
1203      *
1204      * @param bw the block-writable object to write as a block
1205      * @param out the file system output stream
1206      * @throws IOException
1207      */
1208     public void writeBlock(BlockWritable bw, FSDataOutputStream out)
1209         throws IOException {
1210       bw.writeToBlock(startWriting(bw.getBlockType()));
1211       writeHeaderAndData(out);
1212     }
1213 
1214     /**
1215      * Creates a new HFileBlock. Checksums have already been validated, so
1216      * the byte buffer passed into the constructor of this newly created
1217      * block does not have checksum data even though the header minor
1218      * version is MINOR_VERSION_WITH_CHECKSUM. This is indicated by setting a
1219      * 0 value in bytesPerChecksum.
1220      */
1221     public HFileBlock getBlockForCaching(CacheConfig cacheConf) {
1222       HFileContext newContext = new HFileContextBuilder()
1223                                 .withBlockSize(fileContext.getBlocksize())
1224                                 .withBytesPerCheckSum(0)
1225                                 .withChecksumType(ChecksumType.NULL) // no checksums in cached data
1226                                 .withCompression(fileContext.getCompression())
1227                                 .withDataBlockEncoding(fileContext.getDataBlockEncoding())
1228                                 .withHBaseCheckSum(fileContext.isUseHBaseChecksum())
1229                                 .withCompressTags(fileContext.isCompressTags())
1230                                 .withIncludesMvcc(fileContext.isIncludesMvcc())
1231                                 .withIncludesTags(fileContext.isIncludesTags())
1232                                 .build();
1233       return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(),
1234           getUncompressedSizeWithoutHeader(), prevOffset,
1235           cacheConf.shouldCacheCompressed(blockType.getCategory()) ?
1236             getOnDiskBufferWithHeader() :
1237             getUncompressedBufferWithHeader(),
1238           FILL_HEADER, startOffset,
1239           onDiskBytesWithHeader.length + onDiskChecksum.length, newContext);
1240     }
1241   }
1242 
1243   /** Something that can be written into a block. */
1244   public interface BlockWritable {
1245 
1246     /** The type of block this data should use. */
1247     BlockType getBlockType();
1248 
1249     /**
1250      * Writes the block to the provided stream. Must not write any magic
1251      * records.
1252      *
1253      * @param out a stream to write uncompressed data into
1254      */
1255     void writeToBlock(DataOutput out) throws IOException;
1256   }
1257 
1258   // Block readers and writers
1259 
1260   /** An interface allowing to iterate {@link HFileBlock}s. */
1261   public interface BlockIterator {
1262 
1263     /**
1264      * Get the next block, or null if there are no more blocks to iterate.
1265      */
1266     HFileBlock nextBlock() throws IOException;
1267 
1268     /**
1269      * Similar to {@link #nextBlock()} but checks block type, throws an
1270      * exception if incorrect, and returns the HFile block
1271      */
1272     HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException;
1273   }
1274 
1275   /** A full-fledged reader with iteration ability. */
1276   public interface FSReader {
1277 
1278     /**
1279      * Reads the block at the given offset in the file with the given on-disk
1280      * size and uncompressed size.
1281      *
1282      * @param offset
1283      * @param onDiskSize the on-disk size of the entire block, including all
1284      *          applicable headers, or -1 if unknown
1285      * @param uncompressedSize the uncompressed size of the compressed part of
1286      *          the block, or -1 if unknown
1287      * @return the newly read block
1288      */
1289     HFileBlock readBlockData(long offset, long onDiskSize,
1290         int uncompressedSize, boolean pread) throws IOException;
1291 
1292     /**
1293      * Creates a block iterator over the given portion of the {@link HFile}.
1294      * The iterator returns blocks starting with offset such that offset &lt;=
1295      * startOffset &lt; endOffset. Returned blocks are always unpacked.
1296      *
1297      * @param startOffset the offset of the block to start iteration with
1298      * @param endOffset the offset to end iteration at (exclusive)
1299      * @return an iterator of blocks between the two given offsets
1300      */
1301     BlockIterator blockRange(long startOffset, long endOffset);
1302 
1303     /** Closes the backing streams */
1304     void closeStreams() throws IOException;
1305 
1306     /** Get a decoder for {@link BlockType#ENCODED_DATA} blocks from this file. */
1307     HFileBlockDecodingContext getBlockDecodingContext();
1308 
1309     /** Get the default decoder for blocks from this file. */
1310     HFileBlockDecodingContext getDefaultBlockDecodingContext();
1311 
1312     void setIncludesMemstoreTS(boolean includesMemstoreTS);
1313     void setDataBlockEncoder(HFileDataBlockEncoder encoder);
1314   }
1315 
1316   /**
1317    * We always prefetch the header of the next block, so that we know its
1318    * on-disk size in advance and can read it in one operation.
1319    */
1320   private static class PrefetchedHeader {
1321     long offset = -1;
1322     byte[] header = new byte[HConstants.HFILEBLOCK_HEADER_SIZE];
1323     final ByteBuffer buf = ByteBuffer.wrap(header, 0, HConstants.HFILEBLOCK_HEADER_SIZE);
1324   }
1325 
1326   /** Reads version 2 blocks from the filesystem. */
1327   static class FSReaderImpl implements FSReader {
1328     /** The file system stream of the underlying {@link HFile} that
1329      * does or doesn't do checksum validations in the filesystem */
1330     protected FSDataInputStreamWrapper streamWrapper;
1331 
1332     private HFileBlockDecodingContext encodedBlockDecodingCtx;
1333 
1334     /** Default context used when BlockType != {@link BlockType#ENCODED_DATA}. */
1335     private final HFileBlockDefaultDecodingContext defaultDecodingCtx;
1336 
1337     private ThreadLocal<PrefetchedHeader> prefetchedHeaderForThread =
1338         new ThreadLocal<PrefetchedHeader>() {
1339       @Override
1340       public PrefetchedHeader initialValue() {
1341         return new PrefetchedHeader();
1342       }
1343     };
1344 
1345     /** Compression algorithm used by the {@link HFile} */
1346 
1347     /** The size of the file we are reading from, or -1 if unknown. */
1348     protected long fileSize;
1349 
1350     /** The size of the header */
1351     protected final int hdrSize;
1352 
1353     /** The filesystem used to access data */
1354     protected HFileSystem hfs;
1355 
1356     private final Lock streamLock = new ReentrantLock();
1357 
1358     /** The default buffer size for our buffered streams */
1359     public static final int DEFAULT_BUFFER_SIZE = 1 << 20;
1360 
1361     protected HFileContext fileContext;
1362     // Cache the fileName
1363     protected String pathName;
1364 
1365     public FSReaderImpl(FSDataInputStreamWrapper stream, long fileSize, HFileSystem hfs, Path path,
1366         HFileContext fileContext) throws IOException {
1367       this.fileSize = fileSize;
1368       this.hfs = hfs;
1369       if (path != null) {
1370         this.pathName = path.toString();
1371       }
1372       this.fileContext = fileContext;
1373       this.hdrSize = headerSize(fileContext.isUseHBaseChecksum());
1374 
1375       this.streamWrapper = stream;
1376       // Older versions of HBase didn't support checksum.
1377       this.streamWrapper.prepareForBlockReader(!fileContext.isUseHBaseChecksum());
1378       defaultDecodingCtx = new HFileBlockDefaultDecodingContext(fileContext);
1379       encodedBlockDecodingCtx = defaultDecodingCtx;
1380     }
1381 
1382     /**
1383      * A constructor that reads files with the latest minor version.
1384      * This is used by unit tests only.
1385      */
1386     FSReaderImpl(FSDataInputStream istream, long fileSize, HFileContext fileContext)
1387     throws IOException {
1388       this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext);
1389     }
1390 
1391     public BlockIterator blockRange(final long startOffset, final long endOffset) {
1392       final FSReader owner = this; // handle for inner class
1393       return new BlockIterator() {
1394         private long offset = startOffset;
1395 
1396         @Override
1397         public HFileBlock nextBlock() throws IOException {
1398           if (offset >= endOffset)
1399             return null;
1400           HFileBlock b = readBlockData(offset, -1, -1, false);
1401           offset += b.getOnDiskSizeWithHeader();
1402           return b.unpack(fileContext, owner);
1403         }
1404 
1405         @Override
1406         public HFileBlock nextBlockWithBlockType(BlockType blockType)
1407             throws IOException {
1408           HFileBlock blk = nextBlock();
1409           if (blk.getBlockType() != blockType) {
1410             throw new IOException("Expected block of type " + blockType
1411                 + " but found " + blk.getBlockType());
1412           }
1413           return blk;
1414         }
1415       };
1416     }
1417 
1418     /**
1419      * Does a positional read or a seek and read into the given buffer. Returns
1420      * the on-disk size of the next block, or -1 if it could not be determined.
1421      *
1422      * @param dest destination buffer
1423      * @param destOffset offset in the destination buffer
1424      * @param size size of the block to be read
1425      * @param peekIntoNextBlock whether to read the next block's on-disk size
1426      * @param fileOffset position in the stream to read at
1427      * @param pread whether we should do a positional read
1428      * @param istream The input source of data
1429      * @return the on-disk size of the next block with header size included, or
1430      *         -1 if it could not be determined
1431      * @throws IOException
1432      */
1433     protected int readAtOffset(FSDataInputStream istream,
1434         byte[] dest, int destOffset, int size,
1435         boolean peekIntoNextBlock, long fileOffset, boolean pread)
1436         throws IOException {
1437       if (peekIntoNextBlock &&
1438           destOffset + size + hdrSize > dest.length) {
1439         // We are asked to read the next block's header as well, but there is
1440         // not enough room in the array.
1441         throw new IOException("Attempted to read " + size + " bytes and " +
1442             hdrSize + " bytes of next header into a " + dest.length +
1443             "-byte array at offset " + destOffset);
1444       }
1445 
1446       if (!pread && streamLock.tryLock()) {
1447         // Seek + read. Better for scanning.
1448         try {
1449           istream.seek(fileOffset);
1450 
1451           long realOffset = istream.getPos();
1452           if (realOffset != fileOffset) {
1453             throw new IOException("Tried to seek to " + fileOffset + " to "
1454                 + "read " + size + " bytes, but pos=" + realOffset
1455                 + " after seek");
1456           }
1457 
1458           if (!peekIntoNextBlock) {
1459             IOUtils.readFully(istream, dest, destOffset, size);
1460             return -1;
1461           }
1462 
1463           // Try to read the next block header.
1464           if (!readWithExtra(istream, dest, destOffset, size, hdrSize))
1465             return -1;
1466         } finally {
1467           streamLock.unlock();
1468         }
1469       } else {
1470         // Positional read. Better for random reads; or when the streamLock is already locked.
1471         int extraSize = peekIntoNextBlock ? hdrSize : 0;
1472         if (!positionalReadWithExtra(istream, fileOffset, dest, destOffset,
1473             size, extraSize)) {
1474           return -1;
1475         }
1476       }
1477 
1478       assert peekIntoNextBlock;
1479       return Bytes.toInt(dest, destOffset + size + BlockType.MAGIC_LENGTH) + hdrSize;
1480     }
1481 
1482     /**
1483      * Reads a version 2 block (version 1 blocks not supported and not expected). Tries to do as
1484      * little memory allocation as possible, using the provided on-disk size.
1485      *
1486      * @param offset the offset in the stream to read at
1487      * @param onDiskSizeWithHeaderL the on-disk size of the block, including
1488      *          the header, or -1 if unknown
1489      * @param uncompressedSize the uncompressed size of the the block. Always
1490      *          expected to be -1. This parameter is only used in version 1.
1491      * @param pread whether to use a positional read
1492      */
1493     @Override
1494     public HFileBlock readBlockData(long offset, long onDiskSizeWithHeaderL,
1495         int uncompressedSize, boolean pread)
1496     throws IOException {
1497 
1498       // get a copy of the current state of whether to validate
1499       // hbase checksums or not for this read call. This is not
1500       // thread-safe but the one constaint is that if we decide
1501       // to skip hbase checksum verification then we are
1502       // guaranteed to use hdfs checksum verification.
1503       boolean doVerificationThruHBaseChecksum = streamWrapper.shouldUseHBaseChecksum();
1504       FSDataInputStream is = streamWrapper.getStream(doVerificationThruHBaseChecksum);
1505 
1506       HFileBlock blk = readBlockDataInternal(is, offset,
1507                          onDiskSizeWithHeaderL,
1508                          uncompressedSize, pread,
1509                          doVerificationThruHBaseChecksum);
1510       if (blk == null) {
1511         HFile.LOG.warn("HBase checksum verification failed for file " +
1512                        pathName + " at offset " +
1513                        offset + " filesize " + fileSize +
1514                        ". Retrying read with HDFS checksums turned on...");
1515 
1516         if (!doVerificationThruHBaseChecksum) {
1517           String msg = "HBase checksum verification failed for file " +
1518                        pathName + " at offset " +
1519                        offset + " filesize " + fileSize +
1520                        " but this cannot happen because doVerify is " +
1521                        doVerificationThruHBaseChecksum;
1522           HFile.LOG.warn(msg);
1523           throw new IOException(msg); // cannot happen case here
1524         }
1525         HFile.checksumFailures.incrementAndGet(); // update metrics
1526 
1527         // If we have a checksum failure, we fall back into a mode where
1528         // the next few reads use HDFS level checksums. We aim to make the
1529         // next CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD reads avoid
1530         // hbase checksum verification, but since this value is set without
1531         // holding any locks, it can so happen that we might actually do
1532         // a few more than precisely this number.
1533         is = this.streamWrapper.fallbackToFsChecksum(CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD);
1534         doVerificationThruHBaseChecksum = false;
1535         blk = readBlockDataInternal(is, offset, onDiskSizeWithHeaderL,
1536                                     uncompressedSize, pread,
1537                                     doVerificationThruHBaseChecksum);
1538         if (blk != null) {
1539           HFile.LOG.warn("HDFS checksum verification suceeded for file " +
1540                          pathName + " at offset " +
1541                          offset + " filesize " + fileSize);
1542         }
1543       }
1544       if (blk == null && !doVerificationThruHBaseChecksum) {
1545         String msg = "readBlockData failed, possibly due to " +
1546                      "checksum verification failed for file " + pathName +
1547                      " at offset " + offset + " filesize " + fileSize;
1548         HFile.LOG.warn(msg);
1549         throw new IOException(msg);
1550       }
1551 
1552       // If there is a checksum mismatch earlier, then retry with
1553       // HBase checksums switched off and use HDFS checksum verification.
1554       // This triggers HDFS to detect and fix corrupt replicas. The
1555       // next checksumOffCount read requests will use HDFS checksums.
1556       // The decrementing of this.checksumOffCount is not thread-safe,
1557       // but it is harmless because eventually checksumOffCount will be
1558       // a negative number.
1559       streamWrapper.checksumOk();
1560       return blk;
1561     }
1562 
1563     /**
1564      * Reads a version 2 block.
1565      *
1566      * @param offset the offset in the stream to read at
1567      * @param onDiskSizeWithHeaderL the on-disk size of the block, including
1568      *          the header, or -1 if unknown
1569      * @param uncompressedSize the uncompressed size of the the block. Always
1570      *          expected to be -1. This parameter is only used in version 1.
1571      * @param pread whether to use a positional read
1572      * @param verifyChecksum Whether to use HBase checksums.
1573      *        If HBase checksum is switched off, then use HDFS checksum.
1574      * @return the HFileBlock or null if there is a HBase checksum mismatch
1575      */
1576     private HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
1577         long onDiskSizeWithHeaderL, int uncompressedSize, boolean pread,
1578         boolean verifyChecksum)
1579     throws IOException {
1580       if (offset < 0) {
1581         throw new IOException("Invalid offset=" + offset + " trying to read "
1582             + "block (onDiskSize=" + onDiskSizeWithHeaderL
1583             + ", uncompressedSize=" + uncompressedSize + ")");
1584       }
1585 
1586       if (uncompressedSize != -1) {
1587         throw new IOException("Version 2 block reader API does not need " +
1588             "the uncompressed size parameter");
1589       }
1590 
1591       if ((onDiskSizeWithHeaderL < hdrSize && onDiskSizeWithHeaderL != -1)
1592           || onDiskSizeWithHeaderL >= Integer.MAX_VALUE) {
1593         throw new IOException("Invalid onDisksize=" + onDiskSizeWithHeaderL
1594             + ": expected to be at least " + hdrSize
1595             + " and at most " + Integer.MAX_VALUE + ", or -1 (offset="
1596             + offset + ", uncompressedSize=" + uncompressedSize + ")");
1597       }
1598 
1599       int onDiskSizeWithHeader = (int) onDiskSizeWithHeaderL;
1600       // See if we can avoid reading the header. This is desirable, because
1601       // we will not incur a backward seek operation if we have already
1602       // read this block's header as part of the previous read's look-ahead.
1603       // And we also want to skip reading the header again if it has already
1604       // been read.
1605       // TODO: How often does this optimization fire? Has to be same thread so the thread local
1606       // is pertinent and we have to be reading next block as in a big scan.
1607       PrefetchedHeader prefetchedHeader = prefetchedHeaderForThread.get();
1608       ByteBuffer headerBuf = prefetchedHeader.offset == offset? prefetchedHeader.buf: null;
1609 
1610       // Allocate enough space to fit the next block's header too.
1611       int nextBlockOnDiskSize = 0;
1612       byte[] onDiskBlock = null;
1613 
1614       HFileBlock b = null;
1615       if (onDiskSizeWithHeader > 0) {
1616         // We know the total on-disk size. Read the entire block into memory,
1617         // then parse the header. This code path is used when
1618         // doing a random read operation relying on the block index, as well as
1619         // when the client knows the on-disk size from peeking into the next
1620         // block's header (e.g. this block's header) when reading the previous
1621         // block. This is the faster and more preferable case.
1622 
1623         // Size that we have to skip in case we have already read the header.
1624         int preReadHeaderSize = headerBuf == null ? 0 : hdrSize;
1625         onDiskBlock = new byte[onDiskSizeWithHeader + hdrSize]; // room for this block plus the
1626                                                                 // next block's header
1627         nextBlockOnDiskSize = readAtOffset(is, onDiskBlock,
1628             preReadHeaderSize, onDiskSizeWithHeader - preReadHeaderSize,
1629             true, offset + preReadHeaderSize, pread);
1630         if (headerBuf != null) {
1631           // the header has been read when reading the previous block, copy
1632           // to this block's header
1633           // headerBuf is HBB
1634           assert headerBuf.hasArray();
1635           System.arraycopy(headerBuf.array(),
1636               headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize);
1637         } else {
1638           headerBuf = ByteBuffer.wrap(onDiskBlock, 0, hdrSize);
1639         }
1640         // We know the total on-disk size but not the uncompressed size. Parse the header.
1641         try {
1642           // TODO: FIX!!! Expensive parse just to get a length
1643           b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum());
1644         } catch (IOException ex) {
1645           // Seen in load testing. Provide comprehensive debug info.
1646           throw new IOException("Failed to read compressed block at "
1647               + offset
1648               + ", onDiskSizeWithoutHeader="
1649               + onDiskSizeWithHeader
1650               + ", preReadHeaderSize="
1651               + hdrSize
1652               + ", header.length="
1653               + prefetchedHeader.header.length
1654               + ", header bytes: "
1655               + Bytes.toStringBinary(prefetchedHeader.header, 0,
1656                   hdrSize), ex);
1657         }
1658         // if the caller specifies a onDiskSizeWithHeader, validate it.
1659         int onDiskSizeWithoutHeader = onDiskSizeWithHeader - hdrSize;
1660         assert onDiskSizeWithoutHeader >= 0;
1661         b.validateOnDiskSizeWithoutHeader(onDiskSizeWithoutHeader);
1662       } else {
1663         // Check headerBuf to see if we have read this block's header as part of
1664         // reading the previous block. This is an optimization of peeking into
1665         // the next block's header (e.g.this block's header) when reading the
1666         // previous block. This is the faster and more preferable case. If the
1667         // header is already there, don't read the header again.
1668 
1669         // Unfortunately, we still have to do a separate read operation to
1670         // read the header.
1671         if (headerBuf == null) {
1672           // From the header, determine the on-disk size of the given hfile
1673           // block, and read the remaining data, thereby incurring two read
1674           // operations. This might happen when we are doing the first read
1675           // in a series of reads or a random read, and we don't have access
1676           // to the block index. This is costly and should happen very rarely.
1677           headerBuf = ByteBuffer.allocate(hdrSize);
1678           // headerBuf is HBB
1679           readAtOffset(is, headerBuf.array(), headerBuf.arrayOffset(),
1680               hdrSize, false, offset, pread);
1681         }
1682         // TODO: FIX!!! Expensive parse just to get a length
1683         b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum());
1684         onDiskBlock = new byte[b.getOnDiskSizeWithHeader() + hdrSize];
1685         // headerBuf is HBB
1686         System.arraycopy(headerBuf.array(), headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize);
1687         nextBlockOnDiskSize =
1688           readAtOffset(is, onDiskBlock, hdrSize, b.getOnDiskSizeWithHeader()
1689               - hdrSize, true, offset + hdrSize, pread);
1690         onDiskSizeWithHeader = b.onDiskSizeWithoutHeader + hdrSize;
1691       }
1692 
1693       if (!fileContext.isCompressedOrEncrypted()) {
1694         b.assumeUncompressed();
1695       }
1696 
1697       if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) {
1698         return null;             // checksum mismatch
1699       }
1700 
1701       // The onDiskBlock will become the headerAndDataBuffer for this block.
1702       // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
1703       // contains the header of next block, so no need to set next
1704       // block's header in it.
1705       b = new HFileBlock(ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader),
1706         this.fileContext.isUseHBaseChecksum());
1707 
1708       b.nextBlockOnDiskSizeWithHeader = nextBlockOnDiskSize;
1709 
1710       // Set prefetched header
1711       if (b.hasNextBlockHeader()) {
1712         prefetchedHeader.offset = offset + b.getOnDiskSizeWithHeader();
1713         System.arraycopy(onDiskBlock, onDiskSizeWithHeader, prefetchedHeader.header, 0, hdrSize);
1714       }
1715 
1716       b.offset = offset;
1717       b.fileContext.setIncludesTags(this.fileContext.isIncludesTags());
1718       b.fileContext.setIncludesMvcc(this.fileContext.isIncludesMvcc());
1719       return b;
1720     }
1721 
1722     public void setIncludesMemstoreTS(boolean includesMemstoreTS) {
1723       this.fileContext.setIncludesMvcc(includesMemstoreTS);
1724     }
1725 
1726     public void setDataBlockEncoder(HFileDataBlockEncoder encoder) {
1727       encodedBlockDecodingCtx = encoder.newDataBlockDecodingContext(this.fileContext);
1728     }
1729 
1730     @Override
1731     public HFileBlockDecodingContext getBlockDecodingContext() {
1732       return this.encodedBlockDecodingCtx;
1733     }
1734 
1735     @Override
1736     public HFileBlockDecodingContext getDefaultBlockDecodingContext() {
1737       return this.defaultDecodingCtx;
1738     }
1739 
1740     /**
1741      * Generates the checksum for the header as well as the data and
1742      * then validates that it matches the value stored in the header.
1743      * If there is a checksum mismatch, then return false. Otherwise
1744      * return true.
1745      */
1746     protected boolean validateBlockChecksum(HFileBlock block,  byte[] data, int hdrSize)
1747         throws IOException {
1748       return ChecksumUtil.validateBlockChecksum(pathName, block, data, hdrSize);
1749     }
1750 
1751     @Override
1752     public void closeStreams() throws IOException {
1753       streamWrapper.close();
1754     }
1755 
1756     @Override
1757     public String toString() {
1758       return "hfs=" + hfs + ", path=" + pathName + ", fileContext=" + fileContext;
1759     }
1760   }
1761 
1762   @Override
1763   public int getSerializedLength() {
1764     if (buf != null) {
1765       // include extra bytes for the next header when it's available.
1766       int extraSpace = hasNextBlockHeader() ? headerSize() : 0;
1767       return this.buf.limit() + extraSpace + HFileBlock.EXTRA_SERIALIZATION_SPACE;
1768     }
1769     return 0;
1770   }
1771 
1772   @Override
1773   public void serialize(ByteBuffer destination) {
1774     this.buf.get(destination, 0, getSerializedLength()
1775         - EXTRA_SERIALIZATION_SPACE);
1776     serializeExtraInfo(destination);
1777   }
1778 
1779   public void serializeExtraInfo(ByteBuffer destination) {
1780     destination.put(this.fileContext.isUseHBaseChecksum() ? (byte) 1 : (byte) 0);
1781     destination.putLong(this.offset);
1782     destination.putInt(this.nextBlockOnDiskSizeWithHeader);
1783     destination.rewind();
1784   }
1785 
1786   @Override
1787   public CacheableDeserializer<Cacheable> getDeserializer() {
1788     return HFileBlock.blockDeserializer;
1789   }
1790 
1791   @Override
1792   public int hashCode() {
1793     int result = 1;
1794     result = result * 31 + blockType.hashCode();
1795     result = result * 31 + nextBlockOnDiskSizeWithHeader;
1796     result = result * 31 + (int) (offset ^ (offset >>> 32));
1797     result = result * 31 + onDiskSizeWithoutHeader;
1798     result = result * 31 + (int) (prevBlockOffset ^ (prevBlockOffset >>> 32));
1799     result = result * 31 + uncompressedSizeWithoutHeader;
1800     result = result * 31 + buf.hashCode();
1801     return result;
1802   }
1803 
1804   @Override
1805   public boolean equals(Object comparison) {
1806     if (this == comparison) {
1807       return true;
1808     }
1809     if (comparison == null) {
1810       return false;
1811     }
1812     if (comparison.getClass() != this.getClass()) {
1813       return false;
1814     }
1815 
1816     HFileBlock castedComparison = (HFileBlock) comparison;
1817 
1818     if (castedComparison.blockType != this.blockType) {
1819       return false;
1820     }
1821     if (castedComparison.nextBlockOnDiskSizeWithHeader != this.nextBlockOnDiskSizeWithHeader) {
1822       return false;
1823     }
1824     if (castedComparison.offset != this.offset) {
1825       return false;
1826     }
1827     if (castedComparison.onDiskSizeWithoutHeader != this.onDiskSizeWithoutHeader) {
1828       return false;
1829     }
1830     if (castedComparison.prevBlockOffset != this.prevBlockOffset) {
1831       return false;
1832     }
1833     if (castedComparison.uncompressedSizeWithoutHeader != this.uncompressedSizeWithoutHeader) {
1834       return false;
1835     }
1836     if (ByteBuff.compareTo(this.buf, 0, this.buf.limit(), castedComparison.buf, 0,
1837         castedComparison.buf.limit()) != 0) {
1838       return false;
1839     }
1840     return true;
1841   }
1842 
1843   public DataBlockEncoding getDataBlockEncoding() {
1844     if (blockType == BlockType.ENCODED_DATA) {
1845       return DataBlockEncoding.getEncodingById(getDataBlockEncodingId());
1846     }
1847     return DataBlockEncoding.NONE;
1848   }
1849 
1850   byte getChecksumType() {
1851     return this.fileContext.getChecksumType().getCode();
1852   }
1853 
1854   int getBytesPerChecksum() {
1855     return this.fileContext.getBytesPerChecksum();
1856   }
1857 
1858   /** @return the size of data on disk + header. Excludes checksum. */
1859   int getOnDiskDataSizeWithHeader() {
1860     return this.onDiskDataSizeWithHeader;
1861   }
1862 
1863   /**
1864    * Calcuate the number of bytes required to store all the checksums
1865    * for this block. Each checksum value is a 4 byte integer.
1866    */
1867   int totalChecksumBytes() {
1868     // If the hfile block has minorVersion 0, then there are no checksum
1869     // data to validate. Similarly, a zero value in this.bytesPerChecksum
1870     // indicates that cached blocks do not have checksum data because
1871     // checksums were already validated when the block was read from disk.
1872     if (!fileContext.isUseHBaseChecksum() || this.fileContext.getBytesPerChecksum() == 0) {
1873       return 0;
1874     }
1875     return (int) ChecksumUtil.numBytes(onDiskDataSizeWithHeader,
1876         this.fileContext.getBytesPerChecksum());
1877   }
1878 
1879   /**
1880    * Returns the size of this block header.
1881    */
1882   public int headerSize() {
1883     return headerSize(this.fileContext.isUseHBaseChecksum());
1884   }
1885 
1886   /**
1887    * Maps a minor version to the size of the header.
1888    */
1889   public static int headerSize(boolean usesHBaseChecksum) {
1890     if (usesHBaseChecksum) {
1891       return HConstants.HFILEBLOCK_HEADER_SIZE;
1892     }
1893     return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
1894   }
1895 
1896   /**
1897    * Return the appropriate DUMMY_HEADER for the minor version
1898    */
1899   public byte[] getDummyHeaderForVersion() {
1900     return getDummyHeaderForVersion(this.fileContext.isUseHBaseChecksum());
1901   }
1902 
1903   /**
1904    * Return the appropriate DUMMY_HEADER for the minor version
1905    */
1906   static private byte[] getDummyHeaderForVersion(boolean usesHBaseChecksum) {
1907     if (usesHBaseChecksum) {
1908       return HConstants.HFILEBLOCK_DUMMY_HEADER;
1909     }
1910     return DUMMY_HEADER_NO_CHECKSUM;
1911   }
1912 
1913   /**
1914    * @return the HFileContext used to create this HFileBlock. Not necessary the
1915    * fileContext for the file from which this block's data was originally read.
1916    */
1917   public HFileContext getHFileContext() {
1918     return this.fileContext;
1919   }
1920 
1921   @Override
1922   public MemoryType getMemoryType() {
1923     return this.memType;
1924   }
1925 
1926   /**
1927    * @return true if this block is backed by a shared memory area(such as that of a BucketCache).
1928    */
1929   public boolean usesSharedMemory() {
1930     return this.memType == MemoryType.SHARED;
1931   }
1932 
1933   /**
1934    * Convert the contents of the block header into a human readable string.
1935    * This is mostly helpful for debugging. This assumes that the block
1936    * has minor version > 0.
1937    */
1938   static String toStringHeader(ByteBuff buf) throws IOException {
1939     byte[] magicBuf = new byte[Math.min(buf.limit() - buf.position(), BlockType.MAGIC_LENGTH)];
1940     buf.get(magicBuf);
1941     BlockType bt = BlockType.parse(magicBuf, 0, BlockType.MAGIC_LENGTH);
1942     int compressedBlockSizeNoHeader = buf.getInt();
1943     int uncompressedBlockSizeNoHeader = buf.getInt();
1944     long prevBlockOffset = buf.getLong();
1945     byte cksumtype = buf.get();
1946     long bytesPerChecksum = buf.getInt();
1947     long onDiskDataSizeWithHeader = buf.getInt();
1948     return " Header dump: magic: " + Bytes.toString(magicBuf) +
1949                    " blockType " + bt +
1950                    " compressedBlockSizeNoHeader " +
1951                    compressedBlockSizeNoHeader +
1952                    " uncompressedBlockSizeNoHeader " +
1953                    uncompressedBlockSizeNoHeader +
1954                    " prevBlockOffset " + prevBlockOffset +
1955                    " checksumType " + ChecksumType.codeToType(cksumtype) +
1956                    " bytesPerChecksum " + bytesPerChecksum +
1957                    " onDiskDataSizeWithHeader " + onDiskDataSizeWithHeader;
1958   }
1959 }