View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.DataInputStream;
22  import java.io.DataOutput;
23  import java.io.DataOutputStream;
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.nio.ByteBuffer;
27  import java.util.concurrent.locks.Lock;
28  import java.util.concurrent.locks.ReentrantLock;
29  
30  import org.apache.hadoop.fs.FSDataInputStream;
31  import org.apache.hadoop.fs.FSDataOutputStream;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.Cell;
34  import org.apache.hadoop.hbase.HConstants;
35  import org.apache.hadoop.hbase.classification.InterfaceAudience;
36  import org.apache.hadoop.hbase.fs.HFileSystem;
37  import org.apache.hadoop.hbase.io.ByteBufferInputStream;
38  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
39  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
40  import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
41  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
42  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
43  import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
44  import org.apache.hadoop.hbase.util.ByteBufferUtils;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.apache.hadoop.hbase.util.ChecksumType;
47  import org.apache.hadoop.hbase.util.ClassSize;
48  import org.apache.hadoop.io.IOUtils;
49  
50  import com.google.common.base.Preconditions;
51  
52  /**
53   * Reading {@link HFile} version 1 and 2 blocks, and writing version 2 blocks.
54   * <ul>
55   * <li>In version 1 all blocks are always compressed or uncompressed, as
56   * specified by the {@link HFile}'s compression algorithm, with a type-specific
57   * magic record stored in the beginning of the compressed data (i.e. one needs
58   * to uncompress the compressed block to determine the block type). There is
59   * only a single compression algorithm setting for all blocks. Offset and size
60   * information from the block index are required to read a block.
61   * <li>In version 2 a block is structured as follows:
62   * <ul>
63   * <li>header (see Writer#finishBlock())
64   * <ul>
65   * <li>Magic record identifying the block type (8 bytes)
66   * <li>Compressed block size, excluding header, including checksum (4 bytes)
67   * <li>Uncompressed block size, excluding header, excluding checksum (4 bytes)
68   * <li>The offset of the previous block of the same type (8 bytes). This is
69   * used to be able to navigate to the previous block without going to the block
70   * <li>For minorVersions &gt;=1, the ordinal describing checksum type (1 byte)
71   * <li>For minorVersions &gt;=1, the number of data bytes/checksum chunk (4 bytes)
72   * <li>For minorVersions &gt;=1, the size of data on disk, including header,
73   * excluding checksums (4 bytes)
74   * </ul>
75   * </li>
76   * <li>Raw/Compressed/Encrypted/Encoded data. The compression algorithm is the
77   * same for all the blocks in the {@link HFile}, similarly to what was done in
78   * version 1.
79   * <li>For minorVersions &gt;=1, a series of 4 byte checksums, one each for
80   * the number of bytes specified by bytesPerChecksum.
81   * </ul>
82   * </ul>
83   */
84  @InterfaceAudience.Private
85  public class HFileBlock implements Cacheable {
86  
87    /**
88     * On a checksum failure on a Reader, these many suceeding read
89     * requests switch back to using hdfs checksums before auto-reenabling
90     * hbase checksum verification.
91     */
92    static final int CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD = 3;
93  
94    public static final boolean FILL_HEADER = true;
95    public static final boolean DONT_FILL_HEADER = false;
96  
97    /**
98     * The size of block header when blockType is {@link BlockType#ENCODED_DATA}.
99     * This extends normal header by adding the id of encoder.
100    */
101   public static final int ENCODED_HEADER_SIZE = HConstants.HFILEBLOCK_HEADER_SIZE
102       + DataBlockEncoding.ID_SIZE;
103 
104   static final byte[] DUMMY_HEADER_NO_CHECKSUM =
105      new byte[HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM];
106 
107   public static final int BYTE_BUFFER_HEAP_SIZE = (int) ClassSize.estimateBase(
108       ByteBuffer.wrap(new byte[0], 0, 0).getClass(), false);
109 
110   // meta.usesHBaseChecksum+offset+nextBlockOnDiskSizeWithHeader
111   public static final int EXTRA_SERIALIZATION_SPACE = Bytes.SIZEOF_BYTE + Bytes.SIZEOF_INT
112       + Bytes.SIZEOF_LONG;
113 
114   /**
115    * Each checksum value is an integer that can be stored in 4 bytes.
116    */
117   static final int CHECKSUM_SIZE = Bytes.SIZEOF_INT;
118 
119   static final CacheableDeserializer<Cacheable> blockDeserializer =
120       new CacheableDeserializer<Cacheable>() {
121         public HFileBlock deserialize(ByteBuffer buf, boolean reuse) throws IOException{
122           buf.limit(buf.limit() - HFileBlock.EXTRA_SERIALIZATION_SPACE).rewind();
123           ByteBuffer newByteBuffer;
124           if (reuse) {
125             newByteBuffer = buf.slice();
126           } else {
127            newByteBuffer = ByteBuffer.allocate(buf.limit());
128            newByteBuffer.put(buf);
129           }
130           buf.position(buf.limit());
131           buf.limit(buf.limit() + HFileBlock.EXTRA_SERIALIZATION_SPACE);
132           boolean usesChecksum = buf.get() == (byte)1;
133           HFileBlock hFileBlock = new HFileBlock(newByteBuffer, usesChecksum);
134           hFileBlock.offset = buf.getLong();
135           hFileBlock.nextBlockOnDiskSizeWithHeader = buf.getInt();
136           if (hFileBlock.hasNextBlockHeader()) {
137             hFileBlock.buf.limit(hFileBlock.buf.limit() - hFileBlock.headerSize());
138           }
139           return hFileBlock;
140         }
141 
142         @Override
143         public int getDeserialiserIdentifier() {
144           return deserializerIdentifier;
145         }
146 
147         @Override
148         public HFileBlock deserialize(ByteBuffer b) throws IOException {
149           return deserialize(b, false);
150         }
151       };
152   private static final int deserializerIdentifier;
153   static {
154     deserializerIdentifier = CacheableDeserializerIdManager
155         .registerDeserializer(blockDeserializer);
156   }
157 
158   /** Type of block. Header field 0. */
159   private BlockType blockType;
160 
161   /** Size on disk excluding header, including checksum. Header field 1. */
162   private int onDiskSizeWithoutHeader;
163 
164   /** Size of pure data. Does not include header or checksums. Header field 2. */
165   private final int uncompressedSizeWithoutHeader;
166 
167   /** The offset of the previous block on disk. Header field 3. */
168   private final long prevBlockOffset;
169 
170   /**
171    * Size on disk of header + data. Excludes checksum. Header field 6,
172    * OR calculated from {@link #onDiskSizeWithoutHeader} when using HDFS checksum.
173    */
174   private final int onDiskDataSizeWithHeader;
175 
176   /** The in-memory representation of the hfile block */
177   private ByteBuffer buf;
178 
179   /** Meta data that holds meta information on the hfileblock */
180   private HFileContext fileContext;
181 
182   /**
183    * The offset of this block in the file. Populated by the reader for
184    * convenience of access. This offset is not part of the block header.
185    */
186   private long offset = -1;
187 
188   /**
189    * The on-disk size of the next block, including the header, obtained by
190    * peeking into the first {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes of the next block's
191    * header, or -1 if unknown.
192    */
193   private int nextBlockOnDiskSizeWithHeader = -1;
194 
195   /**
196    * Creates a new {@link HFile} block from the given fields. This constructor
197    * is mostly used when the block data has already been read and uncompressed,
198    * and is sitting in a byte buffer.
199    *
200    * @param blockType the type of this block, see {@link BlockType}
201    * @param onDiskSizeWithoutHeader see {@link #onDiskSizeWithoutHeader}
202    * @param uncompressedSizeWithoutHeader see {@link #uncompressedSizeWithoutHeader}
203    * @param prevBlockOffset see {@link #prevBlockOffset}
204    * @param buf block header ({@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes) followed by
205    *          uncompressed data. This
206    * @param fillHeader when true, parse {@code buf} and override the first 4 header fields.
207    * @param offset the file offset the block was read from
208    * @param onDiskDataSizeWithHeader see {@link #onDiskDataSizeWithHeader}
209    * @param fileContext HFile meta data
210    */
211   HFileBlock(BlockType blockType, int onDiskSizeWithoutHeader, int uncompressedSizeWithoutHeader,
212       long prevBlockOffset, ByteBuffer buf, boolean fillHeader, long offset,
213       int onDiskDataSizeWithHeader, HFileContext fileContext) {
214     this.blockType = blockType;
215     this.onDiskSizeWithoutHeader = onDiskSizeWithoutHeader;
216     this.uncompressedSizeWithoutHeader = uncompressedSizeWithoutHeader;
217     this.prevBlockOffset = prevBlockOffset;
218     this.buf = buf;
219     this.offset = offset;
220     this.onDiskDataSizeWithHeader = onDiskDataSizeWithHeader;
221     this.fileContext = fileContext;
222     if (fillHeader)
223       overwriteHeader();
224     this.buf.rewind();
225   }
226 
227   /**
228    * Copy constructor. Creates a shallow copy of {@code that}'s buffer.
229    */
230   HFileBlock(HFileBlock that) {
231     this.blockType = that.blockType;
232     this.onDiskSizeWithoutHeader = that.onDiskSizeWithoutHeader;
233     this.uncompressedSizeWithoutHeader = that.uncompressedSizeWithoutHeader;
234     this.prevBlockOffset = that.prevBlockOffset;
235     this.buf = that.buf.duplicate();
236     this.offset = that.offset;
237     this.onDiskDataSizeWithHeader = that.onDiskDataSizeWithHeader;
238     this.fileContext = that.fileContext;
239     this.nextBlockOnDiskSizeWithHeader = that.nextBlockOnDiskSizeWithHeader;
240   }
241 
242   /**
243    * Creates a block from an existing buffer starting with a header. Rewinds
244    * and takes ownership of the buffer. By definition of rewind, ignores the
245    * buffer position, but if you slice the buffer beforehand, it will rewind
246    * to that point. The reason this has a minorNumber and not a majorNumber is
247    * because majorNumbers indicate the format of a HFile whereas minorNumbers
248    * indicate the format inside a HFileBlock.
249    */
250   HFileBlock(ByteBuffer b, boolean usesHBaseChecksum) throws IOException {
251     b.rewind();
252     blockType = BlockType.read(b);
253     onDiskSizeWithoutHeader = b.getInt();
254     uncompressedSizeWithoutHeader = b.getInt();
255     prevBlockOffset = b.getLong();
256     HFileContextBuilder contextBuilder = new HFileContextBuilder();
257     contextBuilder.withHBaseCheckSum(usesHBaseChecksum);
258     if (usesHBaseChecksum) {
259       contextBuilder.withChecksumType(ChecksumType.codeToType(b.get()));
260       contextBuilder.withBytesPerCheckSum(b.getInt());
261       this.onDiskDataSizeWithHeader = b.getInt();
262     } else {
263       contextBuilder.withChecksumType(ChecksumType.NULL);
264       contextBuilder.withBytesPerCheckSum(0);
265       this.onDiskDataSizeWithHeader = onDiskSizeWithoutHeader +
266                                        HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
267     }
268     this.fileContext = contextBuilder.build();
269     buf = b;
270     buf.rewind();
271   }
272 
273   public BlockType getBlockType() {
274     return blockType;
275   }
276 
277   /** @return get data block encoding id that was used to encode this block */
278   public short getDataBlockEncodingId() {
279     if (blockType != BlockType.ENCODED_DATA) {
280       throw new IllegalArgumentException("Querying encoder ID of a block " +
281           "of type other than " + BlockType.ENCODED_DATA + ": " + blockType);
282     }
283     return buf.getShort(headerSize());
284   }
285 
286   /**
287    * @return the on-disk size of header + data part + checksum.
288    */
289   public int getOnDiskSizeWithHeader() {
290     return onDiskSizeWithoutHeader + headerSize();
291   }
292 
293   /**
294    * @return the on-disk size of the data part + checksum (header excluded).
295    */
296   public int getOnDiskSizeWithoutHeader() {
297     return onDiskSizeWithoutHeader;
298   }
299 
300   /**
301    * @return the uncompressed size of data part (header and checksum excluded).
302    */
303    public int getUncompressedSizeWithoutHeader() {
304     return uncompressedSizeWithoutHeader;
305   }
306 
307   /**
308    * @return the offset of the previous block of the same type in the file, or
309    *         -1 if unknown
310    */
311   public long getPrevBlockOffset() {
312     return prevBlockOffset;
313   }
314 
315   /**
316    * Rewinds {@code buf} and writes first 4 header fields. {@code buf} position
317    * is modified as side-effect.
318    */
319   private void overwriteHeader() {
320     buf.rewind();
321     blockType.write(buf);
322     buf.putInt(onDiskSizeWithoutHeader);
323     buf.putInt(uncompressedSizeWithoutHeader);
324     buf.putLong(prevBlockOffset);
325     if (this.fileContext.isUseHBaseChecksum()) {
326       buf.put(fileContext.getChecksumType().getCode());
327       buf.putInt(fileContext.getBytesPerChecksum());
328       buf.putInt(onDiskDataSizeWithHeader);
329     }
330   }
331 
332   /**
333    * Returns a buffer that does not include the header or checksum.
334    *
335    * @return the buffer with header skipped and checksum omitted.
336    */
337   public ByteBuffer getBufferWithoutHeader() {
338     ByteBuffer dup = this.buf.duplicate();
339     dup.position(headerSize());
340     dup.limit(buf.limit() - totalChecksumBytes());
341     return dup.slice();
342   }
343 
344   /**
345    * Returns the buffer this block stores internally. The clients must not
346    * modify the buffer object. This method has to be public because it is
347    * used in {@link CompoundBloomFilter} to avoid object
348    *  creation on every Bloom filter lookup, but has to be used with caution.
349    *   Checksum data is not included in the returned buffer but header data is.
350    *
351    * @return the buffer of this block for read-only operations
352    */
353   public ByteBuffer getBufferReadOnly() {
354     ByteBuffer dup = this.buf.duplicate();
355     dup.limit(buf.limit() - totalChecksumBytes());
356     return dup.slice();
357   }
358 
359   /**
360    * Returns the buffer of this block, including header data. The clients must
361    * not modify the buffer object. This method has to be public because it is
362    * used in {@link org.apache.hadoop.hbase.io.hfile.bucket.BucketCache} to avoid buffer copy.
363    *
364    * @return the buffer with header and checksum included for read-only operations
365    */
366   public ByteBuffer getBufferReadOnlyWithHeader() {
367     ByteBuffer dup = this.buf.duplicate();
368     return dup.slice();
369   }
370 
371   /**
372    * Returns a byte buffer of this block, including header data and checksum, positioned at
373    * the beginning of header. The underlying data array is not copied.
374    *
375    * @return the byte buffer with header and checksum included
376    */
377   ByteBuffer getBufferWithHeader() {
378     ByteBuffer dupBuf = buf.duplicate();
379     dupBuf.rewind();
380     return dupBuf;
381   }
382 
383   private void sanityCheckAssertion(long valueFromBuf, long valueFromField,
384       String fieldName) throws IOException {
385     if (valueFromBuf != valueFromField) {
386       throw new AssertionError(fieldName + " in the buffer (" + valueFromBuf
387           + ") is different from that in the field (" + valueFromField + ")");
388     }
389   }
390 
391   private void sanityCheckAssertion(BlockType valueFromBuf, BlockType valueFromField)
392       throws IOException {
393     if (valueFromBuf != valueFromField) {
394       throw new IOException("Block type stored in the buffer: " +
395         valueFromBuf + ", block type field: " + valueFromField);
396     }
397   }
398 
399   /**
400    * Checks if the block is internally consistent, i.e. the first
401    * {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes of the buffer contain a
402    * valid header consistent with the fields. Assumes a packed block structure.
403    * This function is primary for testing and debugging, and is not
404    * thread-safe, because it alters the internal buffer pointer.
405    */
406   void sanityCheck() throws IOException {
407     buf.rewind();
408 
409     sanityCheckAssertion(BlockType.read(buf), blockType);
410 
411     sanityCheckAssertion(buf.getInt(), onDiskSizeWithoutHeader,
412         "onDiskSizeWithoutHeader");
413 
414     sanityCheckAssertion(buf.getInt(), uncompressedSizeWithoutHeader,
415         "uncompressedSizeWithoutHeader");
416 
417     sanityCheckAssertion(buf.getLong(), prevBlockOffset, "prevBlocKOffset");
418     if (this.fileContext.isUseHBaseChecksum()) {
419       sanityCheckAssertion(buf.get(), this.fileContext.getChecksumType().getCode(), "checksumType");
420       sanityCheckAssertion(buf.getInt(), this.fileContext.getBytesPerChecksum(), "bytesPerChecksum");
421       sanityCheckAssertion(buf.getInt(), onDiskDataSizeWithHeader, "onDiskDataSizeWithHeader");
422     }
423 
424     int cksumBytes = totalChecksumBytes();
425     int expectedBufLimit = onDiskDataSizeWithHeader + cksumBytes;
426     if (buf.limit() != expectedBufLimit) {
427       throw new AssertionError("Expected buffer limit " + expectedBufLimit
428           + ", got " + buf.limit());
429     }
430 
431     // We might optionally allocate HFILEBLOCK_HEADER_SIZE more bytes to read the next
432     // block's header, so there are two sensible values for buffer capacity.
433     int hdrSize = headerSize();
434     if (buf.capacity() != expectedBufLimit &&
435         buf.capacity() != expectedBufLimit + hdrSize) {
436       throw new AssertionError("Invalid buffer capacity: " + buf.capacity() +
437           ", expected " + expectedBufLimit + " or " + (expectedBufLimit + hdrSize));
438     }
439   }
440 
441   @Override
442   public String toString() {
443     StringBuilder sb = new StringBuilder()
444       .append("HFileBlock [")
445       .append(" fileOffset=").append(offset)
446       .append(" headerSize()=").append(headerSize())
447       .append(" blockType=").append(blockType)
448       .append(" onDiskSizeWithoutHeader=").append(onDiskSizeWithoutHeader)
449       .append(" uncompressedSizeWithoutHeader=").append(uncompressedSizeWithoutHeader)
450       .append(" prevBlockOffset=").append(prevBlockOffset)
451       .append(" isUseHBaseChecksum()=").append(fileContext.isUseHBaseChecksum());
452     if (fileContext.isUseHBaseChecksum()) {
453       sb.append(" checksumType=").append(ChecksumType.codeToType(this.buf.get(24)))
454         .append(" bytesPerChecksum=").append(this.buf.getInt(24 + 1))
455         .append(" onDiskDataSizeWithHeader=").append(onDiskDataSizeWithHeader);
456     } else {
457       sb.append(" onDiskDataSizeWithHeader=").append(onDiskDataSizeWithHeader)
458         .append("(").append(onDiskSizeWithoutHeader)
459         .append("+").append(HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM).append(")");
460     }
461     String dataBegin = null;
462     if (buf.hasArray()) {
463       dataBegin = Bytes.toStringBinary(buf.array(), buf.arrayOffset() + headerSize(),
464           Math.min(32, buf.limit() - buf.arrayOffset() - headerSize()));
465     } else {
466       ByteBuffer bufWithoutHeader = getBufferWithoutHeader();
467       byte[] dataBeginBytes = new byte[Math.min(32,
468           bufWithoutHeader.limit() - bufWithoutHeader.position())];
469       bufWithoutHeader.get(dataBeginBytes);
470       dataBegin = Bytes.toStringBinary(dataBeginBytes);
471     }
472     sb.append(" getOnDiskSizeWithHeader()=").append(getOnDiskSizeWithHeader())
473       .append(" totalChecksumBytes()=").append(totalChecksumBytes())
474       .append(" isUnpacked()=").append(isUnpacked())
475       .append(" buf=[ ").append(buf).append(" ]")
476       .append(" dataBeginsWith=").append(dataBegin)
477       .append(" fileContext=").append(fileContext)
478       .append(" ]");
479     return sb.toString();
480   }
481 
482   /**
483    * Called after reading a block with provided onDiskSizeWithHeader.
484    */
485   private void validateOnDiskSizeWithoutHeader(int expectedOnDiskSizeWithoutHeader)
486   throws IOException {
487     if (onDiskSizeWithoutHeader != expectedOnDiskSizeWithoutHeader) {
488       String dataBegin = null;
489       if (buf.hasArray()) {
490         dataBegin = Bytes.toStringBinary(buf.array(), buf.arrayOffset(), Math.min(32, buf.limit()));
491       } else {
492         ByteBuffer bufDup = getBufferReadOnly();
493         byte[] dataBeginBytes = new byte[Math.min(32, bufDup.limit() - bufDup.position())];
494         bufDup.get(dataBeginBytes);
495         dataBegin = Bytes.toStringBinary(dataBeginBytes);
496       }
497       String blockInfoMsg =
498         "Block offset: " + offset + ", data starts with: " + dataBegin;
499       throw new IOException("On-disk size without header provided is "
500           + expectedOnDiskSizeWithoutHeader + ", but block "
501           + "header contains " + onDiskSizeWithoutHeader + ". " +
502           blockInfoMsg);
503     }
504   }
505 
506   /**
507    * Retrieves the decompressed/decrypted view of this block. An encoded block remains in its
508    * encoded structure. Internal structures are shared between instances where applicable.
509    */
510   HFileBlock unpack(HFileContext fileContext, FSReader reader) throws IOException {
511     if (!fileContext.isCompressedOrEncrypted()) {
512       // TODO: cannot use our own fileContext here because HFileBlock(ByteBuffer, boolean),
513       // which is used for block serialization to L2 cache, does not preserve encoding and
514       // encryption details.
515       return this;
516     }
517 
518     HFileBlock unpacked = new HFileBlock(this);
519     unpacked.allocateBuffer(); // allocates space for the decompressed block
520 
521     HFileBlockDecodingContext ctx = blockType == BlockType.ENCODED_DATA ?
522       reader.getBlockDecodingContext() : reader.getDefaultBlockDecodingContext();
523 
524     ByteBuffer dup = this.buf.duplicate();
525     dup.position(this.headerSize());
526     dup = dup.slice();
527     ctx.prepareDecoding(unpacked.getOnDiskSizeWithoutHeader(),
528       unpacked.getUncompressedSizeWithoutHeader(), unpacked.getBufferWithoutHeader(),
529       dup);
530 
531     // Preserve the next block's header bytes in the new block if we have them.
532     if (unpacked.hasNextBlockHeader()) {
533       // Both the buffers are limited till checksum bytes and avoid the next block's header.
534       // Below call to copyFromBufferToBuffer() will try positional read/write from/to buffers when
535       // any of the buffer is DBB. So we change the limit on a dup buffer. No copying just create
536       // new BB objects
537       ByteBuffer inDup = this.buf.duplicate();
538       inDup.limit(inDup.limit() + headerSize());
539       ByteBuffer outDup = unpacked.buf.duplicate();
540       outDup.limit(outDup.limit() + unpacked.headerSize());
541       ByteBufferUtils.copyFromBufferToBuffer(
542           outDup,
543           inDup,
544           this.onDiskDataSizeWithHeader,
545           unpacked.headerSize() + unpacked.uncompressedSizeWithoutHeader
546               + unpacked.totalChecksumBytes(), unpacked.headerSize());
547     }
548     return unpacked;
549   }
550 
551   /**
552    * Return true when this buffer includes next block's header.
553    */
554   private boolean hasNextBlockHeader() {
555     return nextBlockOnDiskSizeWithHeader > 0;
556   }
557 
558   /**
559    * Always allocates a new buffer of the correct size. Copies header bytes
560    * from the existing buffer. Does not change header fields.
561    * Reserve room to keep checksum bytes too.
562    */
563   private void allocateBuffer() {
564     int cksumBytes = totalChecksumBytes();
565     int headerSize = headerSize();
566     int capacityNeeded = headerSize + uncompressedSizeWithoutHeader +
567         cksumBytes + (hasNextBlockHeader() ? headerSize : 0);
568 
569     // TODO we need consider allocating offheap here?
570     ByteBuffer newBuf = ByteBuffer.allocate(capacityNeeded);
571 
572     // Copy header bytes into newBuf.
573     // newBuf is HBB so no issue in calling array()
574     ByteBuffer dup = buf.duplicate();
575     dup.position(0);
576     dup.get(newBuf.array(), newBuf.arrayOffset(), headerSize);
577 
578     buf = newBuf;
579     // set limit to exclude next block's header
580     buf.limit(headerSize + uncompressedSizeWithoutHeader + cksumBytes);
581   }
582 
583   /**
584    * Return true when this block's buffer has been unpacked, false otherwise. Note this is a
585    * calculated heuristic, not tracked attribute of the block.
586    */
587   public boolean isUnpacked() {
588     final int cksumBytes = totalChecksumBytes();
589     final int headerSize = headerSize();
590     final int expectedCapacity = headerSize + uncompressedSizeWithoutHeader + cksumBytes;
591     final int bufCapacity = buf.capacity();
592     return bufCapacity == expectedCapacity || bufCapacity == expectedCapacity + headerSize;
593   }
594 
595   /** An additional sanity-check in case no compression or encryption is being used. */
596   public void assumeUncompressed() throws IOException {
597     if (onDiskSizeWithoutHeader != uncompressedSizeWithoutHeader +
598         totalChecksumBytes()) {
599       throw new IOException("Using no compression but "
600           + "onDiskSizeWithoutHeader=" + onDiskSizeWithoutHeader + ", "
601           + "uncompressedSizeWithoutHeader=" + uncompressedSizeWithoutHeader
602           + ", numChecksumbytes=" + totalChecksumBytes());
603     }
604   }
605 
606   /**
607    * @param expectedType the expected type of this block
608    * @throws IOException if this block's type is different than expected
609    */
610   public void expectType(BlockType expectedType) throws IOException {
611     if (blockType != expectedType) {
612       throw new IOException("Invalid block type: expected=" + expectedType
613           + ", actual=" + blockType);
614     }
615   }
616 
617   /** @return the offset of this block in the file it was read from */
618   public long getOffset() {
619     if (offset < 0) {
620       throw new IllegalStateException(
621           "HFile block offset not initialized properly");
622     }
623     return offset;
624   }
625 
626   /**
627    * @return a byte stream reading the data + checksum of this block
628    */
629   public DataInputStream getByteStream() {
630     ByteBuffer dup = this.buf.duplicate();
631     dup.position(this.headerSize());
632     return new DataInputStream(new ByteBufferInputStream(dup));
633   }
634 
635   @Override
636   public long heapSize() {
637     long size = ClassSize.align(
638         ClassSize.OBJECT +
639         // Block type, byte buffer and meta references
640         3 * ClassSize.REFERENCE +
641         // On-disk size, uncompressed size, and next block's on-disk size
642         // bytePerChecksum and onDiskDataSize
643         4 * Bytes.SIZEOF_INT +
644         // This and previous block offset
645         2 * Bytes.SIZEOF_LONG +
646         // Heap size of the meta object. meta will be always not null.
647         fileContext.heapSize()
648     );
649 
650     if (buf != null) {
651       // Deep overhead of the byte buffer. Needs to be aligned separately.
652       size += ClassSize.align(buf.capacity() + BYTE_BUFFER_HEAP_SIZE);
653     }
654 
655     return ClassSize.align(size);
656   }
657 
658   /**
659    * Read from an input stream. Analogous to
660    * {@link IOUtils#readFully(InputStream, byte[], int, int)}, but specifies a
661    * number of "extra" bytes that would be desirable but not absolutely
662    * necessary to read.
663    *
664    * @param in the input stream to read from
665    * @param buf the buffer to read into
666    * @param bufOffset the destination offset in the buffer
667    * @param necessaryLen the number of bytes that are absolutely necessary to
668    *          read
669    * @param extraLen the number of extra bytes that would be nice to read
670    * @return true if succeeded reading the extra bytes
671    * @throws IOException if failed to read the necessary bytes
672    */
673   public static boolean readWithExtra(InputStream in, byte[] buf,
674       int bufOffset, int necessaryLen, int extraLen) throws IOException {
675     int bytesRemaining = necessaryLen + extraLen;
676     while (bytesRemaining > 0) {
677       int ret = in.read(buf, bufOffset, bytesRemaining);
678       if (ret == -1 && bytesRemaining <= extraLen) {
679         // We could not read the "extra data", but that is OK.
680         break;
681       }
682 
683       if (ret < 0) {
684         throw new IOException("Premature EOF from inputStream (read "
685             + "returned " + ret + ", was trying to read " + necessaryLen
686             + " necessary bytes and " + extraLen + " extra bytes, "
687             + "successfully read "
688             + (necessaryLen + extraLen - bytesRemaining));
689       }
690       bufOffset += ret;
691       bytesRemaining -= ret;
692     }
693     return bytesRemaining <= 0;
694   }
695 
696   /**
697    * @return the on-disk size of the next block (including the header size)
698    *         that was read by peeking into the next block's header
699    */
700   public int getNextBlockOnDiskSizeWithHeader() {
701     return nextBlockOnDiskSizeWithHeader;
702   }
703 
704   /**
705    * Unified version 2 {@link HFile} block writer. The intended usage pattern
706    * is as follows:
707    * <ol>
708    * <li>Construct an {@link HFileBlock.Writer}, providing a compression algorithm.
709    * <li>Call {@link Writer#startWriting} and get a data stream to write to.
710    * <li>Write your data into the stream.
711    * <li>Call {@link Writer#writeHeaderAndData(FSDataOutputStream)} as many times as you need to.
712    * store the serialized block into an external stream.
713    * <li>Repeat to write more blocks.
714    * </ol>
715    * <p>
716    */
717   public static class Writer {
718 
719     private enum State {
720       INIT,
721       WRITING,
722       BLOCK_READY
723     };
724 
725     /** Writer state. Used to ensure the correct usage protocol. */
726     private State state = State.INIT;
727 
728     /** Data block encoder used for data blocks */
729     private final HFileDataBlockEncoder dataBlockEncoder;
730 
731     private HFileBlockEncodingContext dataBlockEncodingCtx;
732 
733     /** block encoding context for non-data blocks */
734     private HFileBlockDefaultEncodingContext defaultBlockEncodingCtx;
735 
736     /**
737      * The stream we use to accumulate data in uncompressed format for each
738      * block. We reset this stream at the end of each block and reuse it. The
739      * header is written as the first {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes into this
740      * stream.
741      */
742     private ByteArrayOutputStream baosInMemory;
743 
744     /**
745      * Current block type. Set in {@link #startWriting(BlockType)}. Could be
746      * changed in {@link #finishBlock()} from {@link BlockType#DATA}
747      * to {@link BlockType#ENCODED_DATA}.
748      */
749     private BlockType blockType;
750 
751     /**
752      * A stream that we write uncompressed bytes to, which compresses them and
753      * writes them to {@link #baosInMemory}.
754      */
755     private DataOutputStream userDataStream;
756 
757     // Size of actual data being written. Not considering the block encoding/compression. This
758     // includes the header size also.
759     private int unencodedDataSizeWritten;
760 
761     /**
762      * Bytes to be written to the file system, including the header. Compressed
763      * if compression is turned on. It also includes the checksum data that
764      * immediately follows the block data. (header + data + checksums)
765      */
766     private byte[] onDiskBytesWithHeader;
767 
768     /**
769      * The size of the checksum data on disk. It is used only if data is
770      * not compressed. If data is compressed, then the checksums are already
771      * part of onDiskBytesWithHeader. If data is uncompressed, then this
772      * variable stores the checksum data for this block.
773      */
774     private byte[] onDiskChecksum;
775 
776     /**
777      * Valid in the READY state. Contains the header and the uncompressed (but
778      * potentially encoded, if this is a data block) bytes, so the length is
779      * {@link #uncompressedSizeWithoutHeader} +
780      * {@link org.apache.hadoop.hbase.HConstants#HFILEBLOCK_HEADER_SIZE}.
781      * Does not store checksums.
782      */
783     private byte[] uncompressedBytesWithHeader;
784 
785     /**
786      * Current block's start offset in the {@link HFile}. Set in
787      * {@link #writeHeaderAndData(FSDataOutputStream)}.
788      */
789     private long startOffset;
790 
791     /**
792      * Offset of previous block by block type. Updated when the next block is
793      * started.
794      */
795     private long[] prevOffsetByType;
796 
797     /** The offset of the previous block of the same type */
798     private long prevOffset;
799     /** Meta data that holds information about the hfileblock**/
800     private HFileContext fileContext;
801 
802     /**
803      * @param dataBlockEncoder data block encoding algorithm to use
804      */
805     public Writer(HFileDataBlockEncoder dataBlockEncoder, HFileContext fileContext) {
806       this.dataBlockEncoder = dataBlockEncoder != null
807           ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
808       defaultBlockEncodingCtx = new HFileBlockDefaultEncodingContext(null,
809           HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext);
810       dataBlockEncodingCtx = this.dataBlockEncoder
811           .newDataBlockEncodingContext(HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext);
812 
813       if (fileContext.getBytesPerChecksum() < HConstants.HFILEBLOCK_HEADER_SIZE) {
814         throw new RuntimeException("Unsupported value of bytesPerChecksum. " +
815             " Minimum is " + HConstants.HFILEBLOCK_HEADER_SIZE + " but the configured value is " +
816             fileContext.getBytesPerChecksum());
817       }
818 
819       baosInMemory = new ByteArrayOutputStream();
820 
821       prevOffsetByType = new long[BlockType.values().length];
822       for (int i = 0; i < prevOffsetByType.length; ++i)
823         prevOffsetByType[i] = -1;
824 
825       this.fileContext = fileContext;
826     }
827 
828     /**
829      * Starts writing into the block. The previous block's data is discarded.
830      *
831      * @return the stream the user can write their data into
832      * @throws IOException
833      */
834     public DataOutputStream startWriting(BlockType newBlockType)
835         throws IOException {
836       if (state == State.BLOCK_READY && startOffset != -1) {
837         // We had a previous block that was written to a stream at a specific
838         // offset. Save that offset as the last offset of a block of that type.
839         prevOffsetByType[blockType.getId()] = startOffset;
840       }
841 
842       startOffset = -1;
843       blockType = newBlockType;
844 
845       baosInMemory.reset();
846       baosInMemory.write(HConstants.HFILEBLOCK_DUMMY_HEADER);
847 
848       state = State.WRITING;
849 
850       // We will compress it later in finishBlock()
851       userDataStream = new DataOutputStream(baosInMemory);
852       if (newBlockType == BlockType.DATA) {
853         this.dataBlockEncoder.startBlockEncoding(dataBlockEncodingCtx, userDataStream);
854       }
855       this.unencodedDataSizeWritten = 0;
856       return userDataStream;
857     }
858 
859     /**
860      * Writes the Cell to this block
861      * @param cell
862      * @throws IOException
863      */
864     public void write(Cell cell) throws IOException{
865       expectState(State.WRITING);
866       this.unencodedDataSizeWritten += this.dataBlockEncoder.encode(cell, dataBlockEncodingCtx,
867           this.userDataStream);
868     }
869 
870     /**
871      * Returns the stream for the user to write to. The block writer takes care
872      * of handling compression and buffering for caching on write. Can only be
873      * called in the "writing" state.
874      *
875      * @return the data output stream for the user to write to
876      */
877     DataOutputStream getUserDataStream() {
878       expectState(State.WRITING);
879       return userDataStream;
880     }
881 
882     /**
883      * Transitions the block writer from the "writing" state to the "block
884      * ready" state.  Does nothing if a block is already finished.
885      */
886     void ensureBlockReady() throws IOException {
887       Preconditions.checkState(state != State.INIT,
888           "Unexpected state: " + state);
889 
890       if (state == State.BLOCK_READY)
891         return;
892 
893       // This will set state to BLOCK_READY.
894       finishBlock();
895     }
896 
897     /**
898      * An internal method that flushes the compressing stream (if using
899      * compression), serializes the header, and takes care of the separate
900      * uncompressed stream for caching on write, if applicable. Sets block
901      * write state to "block ready".
902      */
903     private void finishBlock() throws IOException {
904       if (blockType == BlockType.DATA) {
905         BufferGrabbingByteArrayOutputStream baosInMemoryCopy =
906             new BufferGrabbingByteArrayOutputStream();
907         baosInMemory.writeTo(baosInMemoryCopy);
908         this.dataBlockEncoder.endBlockEncoding(dataBlockEncodingCtx, userDataStream,
909             baosInMemoryCopy.buf, blockType);
910         blockType = dataBlockEncodingCtx.getBlockType();
911       }
912       userDataStream.flush();
913       // This does an array copy, so it is safe to cache this byte array.
914       uncompressedBytesWithHeader = baosInMemory.toByteArray();
915       prevOffset = prevOffsetByType[blockType.getId()];
916 
917       // We need to set state before we can package the block up for
918       // cache-on-write. In a way, the block is ready, but not yet encoded or
919       // compressed.
920       state = State.BLOCK_READY;
921       if (blockType == BlockType.DATA || blockType == BlockType.ENCODED_DATA) {
922         onDiskBytesWithHeader = dataBlockEncodingCtx
923             .compressAndEncrypt(uncompressedBytesWithHeader);
924       } else {
925         onDiskBytesWithHeader = defaultBlockEncodingCtx
926             .compressAndEncrypt(uncompressedBytesWithHeader);
927       }
928       int numBytes = (int) ChecksumUtil.numBytes(
929           onDiskBytesWithHeader.length,
930           fileContext.getBytesPerChecksum());
931 
932       // put the header for on disk bytes
933       putHeader(onDiskBytesWithHeader, 0,
934           onDiskBytesWithHeader.length + numBytes,
935           uncompressedBytesWithHeader.length, onDiskBytesWithHeader.length);
936       // set the header for the uncompressed bytes (for cache-on-write)
937       putHeader(uncompressedBytesWithHeader, 0,
938           onDiskBytesWithHeader.length + numBytes,
939           uncompressedBytesWithHeader.length, onDiskBytesWithHeader.length);
940 
941       onDiskChecksum = new byte[numBytes];
942       ChecksumUtil.generateChecksums(
943           onDiskBytesWithHeader, 0, onDiskBytesWithHeader.length,
944           onDiskChecksum, 0, fileContext.getChecksumType(), fileContext.getBytesPerChecksum());
945     }
946 
947     public static class BufferGrabbingByteArrayOutputStream extends ByteArrayOutputStream {
948       private byte[] buf;
949 
950       @Override
951       public void write(byte[] b, int off, int len) {
952         this.buf = b;
953       }
954 
955       public byte[] getBuffer() {
956         return this.buf;
957       }
958     }
959 
960     /**
961      * Put the header into the given byte array at the given offset.
962      * @param onDiskSize size of the block on disk header + data + checksum
963      * @param uncompressedSize size of the block after decompression (but
964      *          before optional data block decoding) including header
965      * @param onDiskDataSize size of the block on disk with header
966      *        and data but not including the checksums
967      */
968     private void putHeader(byte[] dest, int offset, int onDiskSize,
969         int uncompressedSize, int onDiskDataSize) {
970       offset = blockType.put(dest, offset);
971       offset = Bytes.putInt(dest, offset, onDiskSize - HConstants.HFILEBLOCK_HEADER_SIZE);
972       offset = Bytes.putInt(dest, offset, uncompressedSize - HConstants.HFILEBLOCK_HEADER_SIZE);
973       offset = Bytes.putLong(dest, offset, prevOffset);
974       offset = Bytes.putByte(dest, offset, fileContext.getChecksumType().getCode());
975       offset = Bytes.putInt(dest, offset, fileContext.getBytesPerChecksum());
976       Bytes.putInt(dest, offset, onDiskDataSize);
977     }
978 
979     /**
980      * Similar to {@link #writeHeaderAndData(FSDataOutputStream)}, but records
981      * the offset of this block so that it can be referenced in the next block
982      * of the same type.
983      *
984      * @param out
985      * @throws IOException
986      */
987     public void writeHeaderAndData(FSDataOutputStream out) throws IOException {
988       long offset = out.getPos();
989       if (startOffset != -1 && offset != startOffset) {
990         throw new IOException("A " + blockType + " block written to a "
991             + "stream twice, first at offset " + startOffset + ", then at "
992             + offset);
993       }
994       startOffset = offset;
995 
996       finishBlockAndWriteHeaderAndData((DataOutputStream) out);
997     }
998 
999     /**
1000      * Writes the header and the compressed data of this block (or uncompressed
1001      * data when not using compression) into the given stream. Can be called in
1002      * the "writing" state or in the "block ready" state. If called in the
1003      * "writing" state, transitions the writer to the "block ready" state.
1004      *
1005      * @param out the output stream to write the
1006      * @throws IOException
1007      */
1008     protected void finishBlockAndWriteHeaderAndData(DataOutputStream out)
1009       throws IOException {
1010       ensureBlockReady();
1011       out.write(onDiskBytesWithHeader);
1012       out.write(onDiskChecksum);
1013     }
1014 
1015     /**
1016      * Returns the header or the compressed data (or uncompressed data when not
1017      * using compression) as a byte array. Can be called in the "writing" state
1018      * or in the "block ready" state. If called in the "writing" state,
1019      * transitions the writer to the "block ready" state. This returns
1020      * the header + data + checksums stored on disk.
1021      *
1022      * @return header and data as they would be stored on disk in a byte array
1023      * @throws IOException
1024      */
1025     byte[] getHeaderAndDataForTest() throws IOException {
1026       ensureBlockReady();
1027       // This is not very optimal, because we are doing an extra copy.
1028       // But this method is used only by unit tests.
1029       byte[] output =
1030           new byte[onDiskBytesWithHeader.length
1031               + onDiskChecksum.length];
1032       System.arraycopy(onDiskBytesWithHeader, 0, output, 0,
1033           onDiskBytesWithHeader.length);
1034       System.arraycopy(onDiskChecksum, 0, output,
1035           onDiskBytesWithHeader.length, onDiskChecksum.length);
1036       return output;
1037     }
1038 
1039     /**
1040      * Releases resources used by this writer.
1041      */
1042     public void release() {
1043       if (dataBlockEncodingCtx != null) {
1044         dataBlockEncodingCtx.close();
1045         dataBlockEncodingCtx = null;
1046       }
1047       if (defaultBlockEncodingCtx != null) {
1048         defaultBlockEncodingCtx.close();
1049         defaultBlockEncodingCtx = null;
1050       }
1051     }
1052 
1053     /**
1054      * Returns the on-disk size of the data portion of the block. This is the
1055      * compressed size if compression is enabled. Can only be called in the
1056      * "block ready" state. Header is not compressed, and its size is not
1057      * included in the return value.
1058      *
1059      * @return the on-disk size of the block, not including the header.
1060      */
1061     int getOnDiskSizeWithoutHeader() {
1062       expectState(State.BLOCK_READY);
1063       return onDiskBytesWithHeader.length
1064           + onDiskChecksum.length
1065           - HConstants.HFILEBLOCK_HEADER_SIZE;
1066     }
1067 
1068     /**
1069      * Returns the on-disk size of the block. Can only be called in the
1070      * "block ready" state.
1071      *
1072      * @return the on-disk size of the block ready to be written, including the
1073      *         header size, the data and the checksum data.
1074      */
1075     int getOnDiskSizeWithHeader() {
1076       expectState(State.BLOCK_READY);
1077       return onDiskBytesWithHeader.length + onDiskChecksum.length;
1078     }
1079 
1080     /**
1081      * The uncompressed size of the block data. Does not include header size.
1082      */
1083     int getUncompressedSizeWithoutHeader() {
1084       expectState(State.BLOCK_READY);
1085       return uncompressedBytesWithHeader.length - HConstants.HFILEBLOCK_HEADER_SIZE;
1086     }
1087 
1088     /**
1089      * The uncompressed size of the block data, including header size.
1090      */
1091     int getUncompressedSizeWithHeader() {
1092       expectState(State.BLOCK_READY);
1093       return uncompressedBytesWithHeader.length;
1094     }
1095 
1096     /** @return true if a block is being written  */
1097     public boolean isWriting() {
1098       return state == State.WRITING;
1099     }
1100 
1101     /**
1102      * Returns the number of bytes written into the current block so far, or
1103      * zero if not writing the block at the moment. Note that this will return
1104      * zero in the "block ready" state as well.
1105      *
1106      * @return the number of bytes written
1107      */
1108     public int blockSizeWritten() {
1109       if (state != State.WRITING) return 0;
1110       return this.unencodedDataSizeWritten;
1111     }
1112 
1113     /**
1114      * Returns the header followed by the uncompressed data, even if using
1115      * compression. This is needed for storing uncompressed blocks in the block
1116      * cache. Can be called in the "writing" state or the "block ready" state.
1117      * Returns only the header and data, does not include checksum data.
1118      *
1119      * @return uncompressed block bytes for caching on write
1120      */
1121     ByteBuffer getUncompressedBufferWithHeader() {
1122       expectState(State.BLOCK_READY);
1123       return ByteBuffer.wrap(uncompressedBytesWithHeader);
1124     }
1125 
1126     /**
1127      * Returns the header followed by the on-disk (compressed/encoded/encrypted) data. This is
1128      * needed for storing packed blocks in the block cache. Expects calling semantics identical to
1129      * {@link #getUncompressedBufferWithHeader()}. Returns only the header and data,
1130      * Does not include checksum data.
1131      *
1132      * @return packed block bytes for caching on write
1133      */
1134     ByteBuffer getOnDiskBufferWithHeader() {
1135       expectState(State.BLOCK_READY);
1136       return ByteBuffer.wrap(onDiskBytesWithHeader);
1137     }
1138 
1139     private void expectState(State expectedState) {
1140       if (state != expectedState) {
1141         throw new IllegalStateException("Expected state: " + expectedState +
1142             ", actual state: " + state);
1143       }
1144     }
1145 
1146     /**
1147      * Takes the given {@link BlockWritable} instance, creates a new block of
1148      * its appropriate type, writes the writable into this block, and flushes
1149      * the block into the output stream. The writer is instructed not to buffer
1150      * uncompressed bytes for cache-on-write.
1151      *
1152      * @param bw the block-writable object to write as a block
1153      * @param out the file system output stream
1154      * @throws IOException
1155      */
1156     public void writeBlock(BlockWritable bw, FSDataOutputStream out)
1157         throws IOException {
1158       bw.writeToBlock(startWriting(bw.getBlockType()));
1159       writeHeaderAndData(out);
1160     }
1161 
1162     /**
1163      * Creates a new HFileBlock. Checksums have already been validated, so
1164      * the byte buffer passed into the constructor of this newly created
1165      * block does not have checksum data even though the header minor
1166      * version is MINOR_VERSION_WITH_CHECKSUM. This is indicated by setting a
1167      * 0 value in bytesPerChecksum.
1168      */
1169     public HFileBlock getBlockForCaching(CacheConfig cacheConf) {
1170       HFileContext newContext = new HFileContextBuilder()
1171                                 .withBlockSize(fileContext.getBlocksize())
1172                                 .withBytesPerCheckSum(0)
1173                                 .withChecksumType(ChecksumType.NULL) // no checksums in cached data
1174                                 .withCompression(fileContext.getCompression())
1175                                 .withDataBlockEncoding(fileContext.getDataBlockEncoding())
1176                                 .withHBaseCheckSum(fileContext.isUseHBaseChecksum())
1177                                 .withCompressTags(fileContext.isCompressTags())
1178                                 .withIncludesMvcc(fileContext.isIncludesMvcc())
1179                                 .withIncludesTags(fileContext.isIncludesTags())
1180                                 .build();
1181       return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(),
1182           getUncompressedSizeWithoutHeader(), prevOffset,
1183           cacheConf.shouldCacheCompressed(blockType.getCategory()) ?
1184             getOnDiskBufferWithHeader() :
1185             getUncompressedBufferWithHeader(),
1186           FILL_HEADER, startOffset,
1187           onDiskBytesWithHeader.length + onDiskChecksum.length, newContext);
1188     }
1189   }
1190 
1191   /** Something that can be written into a block. */
1192   public interface BlockWritable {
1193 
1194     /** The type of block this data should use. */
1195     BlockType getBlockType();
1196 
1197     /**
1198      * Writes the block to the provided stream. Must not write any magic
1199      * records.
1200      *
1201      * @param out a stream to write uncompressed data into
1202      */
1203     void writeToBlock(DataOutput out) throws IOException;
1204   }
1205 
1206   // Block readers and writers
1207 
1208   /** An interface allowing to iterate {@link HFileBlock}s. */
1209   public interface BlockIterator {
1210 
1211     /**
1212      * Get the next block, or null if there are no more blocks to iterate.
1213      */
1214     HFileBlock nextBlock() throws IOException;
1215 
1216     /**
1217      * Similar to {@link #nextBlock()} but checks block type, throws an
1218      * exception if incorrect, and returns the HFile block
1219      */
1220     HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException;
1221   }
1222 
1223   /** A full-fledged reader with iteration ability. */
1224   public interface FSReader {
1225 
1226     /**
1227      * Reads the block at the given offset in the file with the given on-disk
1228      * size and uncompressed size.
1229      *
1230      * @param offset
1231      * @param onDiskSize the on-disk size of the entire block, including all
1232      *          applicable headers, or -1 if unknown
1233      * @param uncompressedSize the uncompressed size of the compressed part of
1234      *          the block, or -1 if unknown
1235      * @return the newly read block
1236      */
1237     HFileBlock readBlockData(long offset, long onDiskSize,
1238         int uncompressedSize, boolean pread) throws IOException;
1239 
1240     /**
1241      * Creates a block iterator over the given portion of the {@link HFile}.
1242      * The iterator returns blocks starting with offset such that offset &lt;=
1243      * startOffset &lt; endOffset. Returned blocks are always unpacked.
1244      *
1245      * @param startOffset the offset of the block to start iteration with
1246      * @param endOffset the offset to end iteration at (exclusive)
1247      * @return an iterator of blocks between the two given offsets
1248      */
1249     BlockIterator blockRange(long startOffset, long endOffset);
1250 
1251     /** Closes the backing streams */
1252     void closeStreams() throws IOException;
1253 
1254     /** Get a decoder for {@link BlockType#ENCODED_DATA} blocks from this file. */
1255     HFileBlockDecodingContext getBlockDecodingContext();
1256 
1257     /** Get the default decoder for blocks from this file. */
1258     HFileBlockDecodingContext getDefaultBlockDecodingContext();
1259 
1260     void setIncludesMemstoreTS(boolean includesMemstoreTS);
1261     void setDataBlockEncoder(HFileDataBlockEncoder encoder);
1262   }
1263 
1264   /**
1265    * We always prefetch the header of the next block, so that we know its
1266    * on-disk size in advance and can read it in one operation.
1267    */
1268   private static class PrefetchedHeader {
1269     long offset = -1;
1270     byte[] header = new byte[HConstants.HFILEBLOCK_HEADER_SIZE];
1271     final ByteBuffer buf = ByteBuffer.wrap(header, 0, HConstants.HFILEBLOCK_HEADER_SIZE);
1272   }
1273 
1274   /** Reads version 2 blocks from the filesystem. */
1275   static class FSReaderImpl implements FSReader {
1276     /** The file system stream of the underlying {@link HFile} that
1277      * does or doesn't do checksum validations in the filesystem */
1278     protected FSDataInputStreamWrapper streamWrapper;
1279 
1280     private HFileBlockDecodingContext encodedBlockDecodingCtx;
1281 
1282     /** Default context used when BlockType != {@link BlockType#ENCODED_DATA}. */
1283     private final HFileBlockDefaultDecodingContext defaultDecodingCtx;
1284 
1285     private ThreadLocal<PrefetchedHeader> prefetchedHeaderForThread =
1286         new ThreadLocal<PrefetchedHeader>() {
1287       @Override
1288       public PrefetchedHeader initialValue() {
1289         return new PrefetchedHeader();
1290       }
1291     };
1292 
1293     /** Compression algorithm used by the {@link HFile} */
1294 
1295     /** The size of the file we are reading from, or -1 if unknown. */
1296     protected long fileSize;
1297 
1298     /** The size of the header */
1299     protected final int hdrSize;
1300 
1301     /** The filesystem used to access data */
1302     protected HFileSystem hfs;
1303 
1304     /** The path (if any) where this data is coming from */
1305     protected Path path;
1306 
1307     private final Lock streamLock = new ReentrantLock();
1308 
1309     /** The default buffer size for our buffered streams */
1310     public static final int DEFAULT_BUFFER_SIZE = 1 << 20;
1311 
1312     protected HFileContext fileContext;
1313 
1314     public FSReaderImpl(FSDataInputStreamWrapper stream, long fileSize, HFileSystem hfs, Path path,
1315         HFileContext fileContext) throws IOException {
1316       this.fileSize = fileSize;
1317       this.hfs = hfs;
1318       this.path = path;
1319       this.fileContext = fileContext;
1320       this.hdrSize = headerSize(fileContext.isUseHBaseChecksum());
1321 
1322       this.streamWrapper = stream;
1323       // Older versions of HBase didn't support checksum.
1324       this.streamWrapper.prepareForBlockReader(!fileContext.isUseHBaseChecksum());
1325       defaultDecodingCtx = new HFileBlockDefaultDecodingContext(fileContext);
1326       encodedBlockDecodingCtx = defaultDecodingCtx;
1327     }
1328 
1329     /**
1330      * A constructor that reads files with the latest minor version.
1331      * This is used by unit tests only.
1332      */
1333     FSReaderImpl(FSDataInputStream istream, long fileSize, HFileContext fileContext)
1334     throws IOException {
1335       this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext);
1336     }
1337 
1338     public BlockIterator blockRange(final long startOffset, final long endOffset) {
1339       final FSReader owner = this; // handle for inner class
1340       return new BlockIterator() {
1341         private long offset = startOffset;
1342 
1343         @Override
1344         public HFileBlock nextBlock() throws IOException {
1345           if (offset >= endOffset)
1346             return null;
1347           HFileBlock b = readBlockData(offset, -1, -1, false);
1348           offset += b.getOnDiskSizeWithHeader();
1349           return b.unpack(fileContext, owner);
1350         }
1351 
1352         @Override
1353         public HFileBlock nextBlockWithBlockType(BlockType blockType)
1354             throws IOException {
1355           HFileBlock blk = nextBlock();
1356           if (blk.getBlockType() != blockType) {
1357             throw new IOException("Expected block of type " + blockType
1358                 + " but found " + blk.getBlockType());
1359           }
1360           return blk;
1361         }
1362       };
1363     }
1364 
1365     /**
1366      * Does a positional read or a seek and read into the given buffer. Returns
1367      * the on-disk size of the next block, or -1 if it could not be determined.
1368      *
1369      * @param dest destination buffer
1370      * @param destOffset offset in the destination buffer
1371      * @param size size of the block to be read
1372      * @param peekIntoNextBlock whether to read the next block's on-disk size
1373      * @param fileOffset position in the stream to read at
1374      * @param pread whether we should do a positional read
1375      * @param istream The input source of data
1376      * @return the on-disk size of the next block with header size included, or
1377      *         -1 if it could not be determined
1378      * @throws IOException
1379      */
1380     protected int readAtOffset(FSDataInputStream istream,
1381         byte[] dest, int destOffset, int size,
1382         boolean peekIntoNextBlock, long fileOffset, boolean pread)
1383         throws IOException {
1384       if (peekIntoNextBlock &&
1385           destOffset + size + hdrSize > dest.length) {
1386         // We are asked to read the next block's header as well, but there is
1387         // not enough room in the array.
1388         throw new IOException("Attempted to read " + size + " bytes and " +
1389             hdrSize + " bytes of next header into a " + dest.length +
1390             "-byte array at offset " + destOffset);
1391       }
1392 
1393       if (!pread && streamLock.tryLock()) {
1394         // Seek + read. Better for scanning.
1395         try {
1396           istream.seek(fileOffset);
1397 
1398           long realOffset = istream.getPos();
1399           if (realOffset != fileOffset) {
1400             throw new IOException("Tried to seek to " + fileOffset + " to "
1401                 + "read " + size + " bytes, but pos=" + realOffset
1402                 + " after seek");
1403           }
1404 
1405           if (!peekIntoNextBlock) {
1406             IOUtils.readFully(istream, dest, destOffset, size);
1407             return -1;
1408           }
1409 
1410           // Try to read the next block header.
1411           if (!readWithExtra(istream, dest, destOffset, size, hdrSize))
1412             return -1;
1413         } finally {
1414           streamLock.unlock();
1415         }
1416       } else {
1417         // Positional read. Better for random reads; or when the streamLock is already locked.
1418         int extraSize = peekIntoNextBlock ? hdrSize : 0;
1419         int ret = istream.read(fileOffset, dest, destOffset, size + extraSize);
1420         if (ret < size) {
1421           throw new IOException("Positional read of " + size + " bytes " +
1422               "failed at offset " + fileOffset + " (returned " + ret + ")");
1423         }
1424 
1425         if (ret == size || ret < size + extraSize) {
1426           // Could not read the next block's header, or did not try.
1427           return -1;
1428         }
1429       }
1430 
1431       assert peekIntoNextBlock;
1432       return Bytes.toInt(dest, destOffset + size + BlockType.MAGIC_LENGTH) + hdrSize;
1433     }
1434 
1435     /**
1436      * Reads a version 2 block (version 1 blocks not supported and not expected). Tries to do as
1437      * little memory allocation as possible, using the provided on-disk size.
1438      *
1439      * @param offset the offset in the stream to read at
1440      * @param onDiskSizeWithHeaderL the on-disk size of the block, including
1441      *          the header, or -1 if unknown
1442      * @param uncompressedSize the uncompressed size of the the block. Always
1443      *          expected to be -1. This parameter is only used in version 1.
1444      * @param pread whether to use a positional read
1445      */
1446     @Override
1447     public HFileBlock readBlockData(long offset, long onDiskSizeWithHeaderL,
1448         int uncompressedSize, boolean pread)
1449     throws IOException {
1450 
1451       // get a copy of the current state of whether to validate
1452       // hbase checksums or not for this read call. This is not
1453       // thread-safe but the one constaint is that if we decide
1454       // to skip hbase checksum verification then we are
1455       // guaranteed to use hdfs checksum verification.
1456       boolean doVerificationThruHBaseChecksum = streamWrapper.shouldUseHBaseChecksum();
1457       FSDataInputStream is = streamWrapper.getStream(doVerificationThruHBaseChecksum);
1458 
1459       HFileBlock blk = readBlockDataInternal(is, offset,
1460                          onDiskSizeWithHeaderL,
1461                          uncompressedSize, pread,
1462                          doVerificationThruHBaseChecksum);
1463       if (blk == null) {
1464         HFile.LOG.warn("HBase checksum verification failed for file " +
1465                        path + " at offset " +
1466                        offset + " filesize " + fileSize +
1467                        ". Retrying read with HDFS checksums turned on...");
1468 
1469         if (!doVerificationThruHBaseChecksum) {
1470           String msg = "HBase checksum verification failed for file " +
1471                        path + " at offset " +
1472                        offset + " filesize " + fileSize +
1473                        " but this cannot happen because doVerify is " +
1474                        doVerificationThruHBaseChecksum;
1475           HFile.LOG.warn(msg);
1476           throw new IOException(msg); // cannot happen case here
1477         }
1478         HFile.checksumFailures.incrementAndGet(); // update metrics
1479 
1480         // If we have a checksum failure, we fall back into a mode where
1481         // the next few reads use HDFS level checksums. We aim to make the
1482         // next CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD reads avoid
1483         // hbase checksum verification, but since this value is set without
1484         // holding any locks, it can so happen that we might actually do
1485         // a few more than precisely this number.
1486         is = this.streamWrapper.fallbackToFsChecksum(CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD);
1487         doVerificationThruHBaseChecksum = false;
1488         blk = readBlockDataInternal(is, offset, onDiskSizeWithHeaderL,
1489                                     uncompressedSize, pread,
1490                                     doVerificationThruHBaseChecksum);
1491         if (blk != null) {
1492           HFile.LOG.warn("HDFS checksum verification suceeded for file " +
1493                          path + " at offset " +
1494                          offset + " filesize " + fileSize);
1495         }
1496       }
1497       if (blk == null && !doVerificationThruHBaseChecksum) {
1498         String msg = "readBlockData failed, possibly due to " +
1499                      "checksum verification failed for file " + path +
1500                      " at offset " + offset + " filesize " + fileSize;
1501         HFile.LOG.warn(msg);
1502         throw new IOException(msg);
1503       }
1504 
1505       // If there is a checksum mismatch earlier, then retry with
1506       // HBase checksums switched off and use HDFS checksum verification.
1507       // This triggers HDFS to detect and fix corrupt replicas. The
1508       // next checksumOffCount read requests will use HDFS checksums.
1509       // The decrementing of this.checksumOffCount is not thread-safe,
1510       // but it is harmless because eventually checksumOffCount will be
1511       // a negative number.
1512       streamWrapper.checksumOk();
1513       return blk;
1514     }
1515 
1516     /**
1517      * Reads a version 2 block.
1518      *
1519      * @param offset the offset in the stream to read at
1520      * @param onDiskSizeWithHeaderL the on-disk size of the block, including
1521      *          the header, or -1 if unknown
1522      * @param uncompressedSize the uncompressed size of the the block. Always
1523      *          expected to be -1. This parameter is only used in version 1.
1524      * @param pread whether to use a positional read
1525      * @param verifyChecksum Whether to use HBase checksums.
1526      *        If HBase checksum is switched off, then use HDFS checksum.
1527      * @return the HFileBlock or null if there is a HBase checksum mismatch
1528      */
1529     private HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
1530         long onDiskSizeWithHeaderL, int uncompressedSize, boolean pread,
1531         boolean verifyChecksum)
1532     throws IOException {
1533       if (offset < 0) {
1534         throw new IOException("Invalid offset=" + offset + " trying to read "
1535             + "block (onDiskSize=" + onDiskSizeWithHeaderL
1536             + ", uncompressedSize=" + uncompressedSize + ")");
1537       }
1538 
1539       if (uncompressedSize != -1) {
1540         throw new IOException("Version 2 block reader API does not need " +
1541             "the uncompressed size parameter");
1542       }
1543 
1544       if ((onDiskSizeWithHeaderL < hdrSize && onDiskSizeWithHeaderL != -1)
1545           || onDiskSizeWithHeaderL >= Integer.MAX_VALUE) {
1546         throw new IOException("Invalid onDisksize=" + onDiskSizeWithHeaderL
1547             + ": expected to be at least " + hdrSize
1548             + " and at most " + Integer.MAX_VALUE + ", or -1 (offset="
1549             + offset + ", uncompressedSize=" + uncompressedSize + ")");
1550       }
1551 
1552       int onDiskSizeWithHeader = (int) onDiskSizeWithHeaderL;
1553       // See if we can avoid reading the header. This is desirable, because
1554       // we will not incur a backward seek operation if we have already
1555       // read this block's header as part of the previous read's look-ahead.
1556       // And we also want to skip reading the header again if it has already
1557       // been read.
1558       // TODO: How often does this optimization fire? Has to be same thread so the thread local
1559       // is pertinent and we have to be reading next block as in a big scan.
1560       PrefetchedHeader prefetchedHeader = prefetchedHeaderForThread.get();
1561       ByteBuffer headerBuf = prefetchedHeader.offset == offset? prefetchedHeader.buf: null;
1562 
1563       // Allocate enough space to fit the next block's header too.
1564       int nextBlockOnDiskSize = 0;
1565       byte[] onDiskBlock = null;
1566 
1567       HFileBlock b = null;
1568       if (onDiskSizeWithHeader > 0) {
1569         // We know the total on-disk size. Read the entire block into memory,
1570         // then parse the header. This code path is used when
1571         // doing a random read operation relying on the block index, as well as
1572         // when the client knows the on-disk size from peeking into the next
1573         // block's header (e.g. this block's header) when reading the previous
1574         // block. This is the faster and more preferable case.
1575 
1576         // Size that we have to skip in case we have already read the header.
1577         int preReadHeaderSize = headerBuf == null ? 0 : hdrSize;
1578         onDiskBlock = new byte[onDiskSizeWithHeader + hdrSize]; // room for this block plus the
1579                                                                 // next block's header
1580         nextBlockOnDiskSize = readAtOffset(is, onDiskBlock,
1581             preReadHeaderSize, onDiskSizeWithHeader - preReadHeaderSize,
1582             true, offset + preReadHeaderSize, pread);
1583         if (headerBuf != null) {
1584           // the header has been read when reading the previous block, copy
1585           // to this block's header
1586           // headerBuf is HBB
1587           assert headerBuf.hasArray();
1588           System.arraycopy(headerBuf.array(),
1589               headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize);
1590         } else {
1591           headerBuf = ByteBuffer.wrap(onDiskBlock, 0, hdrSize);
1592         }
1593         // We know the total on-disk size but not the uncompressed size. Parse the header.
1594         try {
1595           // TODO: FIX!!! Expensive parse just to get a length
1596           b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum());
1597         } catch (IOException ex) {
1598           // Seen in load testing. Provide comprehensive debug info.
1599           throw new IOException("Failed to read compressed block at "
1600               + offset
1601               + ", onDiskSizeWithoutHeader="
1602               + onDiskSizeWithHeader
1603               + ", preReadHeaderSize="
1604               + hdrSize
1605               + ", header.length="
1606               + prefetchedHeader.header.length
1607               + ", header bytes: "
1608               + Bytes.toStringBinary(prefetchedHeader.header, 0,
1609                   hdrSize), ex);
1610         }
1611         // if the caller specifies a onDiskSizeWithHeader, validate it.
1612         int onDiskSizeWithoutHeader = onDiskSizeWithHeader - hdrSize;
1613         assert onDiskSizeWithoutHeader >= 0;
1614         b.validateOnDiskSizeWithoutHeader(onDiskSizeWithoutHeader);
1615       } else {
1616         // Check headerBuf to see if we have read this block's header as part of
1617         // reading the previous block. This is an optimization of peeking into
1618         // the next block's header (e.g.this block's header) when reading the
1619         // previous block. This is the faster and more preferable case. If the
1620         // header is already there, don't read the header again.
1621 
1622         // Unfortunately, we still have to do a separate read operation to
1623         // read the header.
1624         if (headerBuf == null) {
1625           // From the header, determine the on-disk size of the given hfile
1626           // block, and read the remaining data, thereby incurring two read
1627           // operations. This might happen when we are doing the first read
1628           // in a series of reads or a random read, and we don't have access
1629           // to the block index. This is costly and should happen very rarely.
1630           headerBuf = ByteBuffer.allocate(hdrSize);
1631           // headerBuf is HBB
1632           readAtOffset(is, headerBuf.array(), headerBuf.arrayOffset(),
1633               hdrSize, false, offset, pread);
1634         }
1635         // TODO: FIX!!! Expensive parse just to get a length
1636         b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum());
1637         onDiskBlock = new byte[b.getOnDiskSizeWithHeader() + hdrSize];
1638         // headerBuf is HBB
1639         System.arraycopy(headerBuf.array(), headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize);
1640         nextBlockOnDiskSize =
1641           readAtOffset(is, onDiskBlock, hdrSize, b.getOnDiskSizeWithHeader()
1642               - hdrSize, true, offset + hdrSize, pread);
1643         onDiskSizeWithHeader = b.onDiskSizeWithoutHeader + hdrSize;
1644       }
1645 
1646       if (!fileContext.isCompressedOrEncrypted()) {
1647         b.assumeUncompressed();
1648       }
1649 
1650       if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) {
1651         return null;             // checksum mismatch
1652       }
1653 
1654       // The onDiskBlock will become the headerAndDataBuffer for this block.
1655       // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
1656       // contains the header of next block, so no need to set next
1657       // block's header in it.
1658       b = new HFileBlock(ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader),
1659         this.fileContext.isUseHBaseChecksum());
1660 
1661       b.nextBlockOnDiskSizeWithHeader = nextBlockOnDiskSize;
1662 
1663       // Set prefetched header
1664       if (b.hasNextBlockHeader()) {
1665         prefetchedHeader.offset = offset + b.getOnDiskSizeWithHeader();
1666         System.arraycopy(onDiskBlock, onDiskSizeWithHeader, prefetchedHeader.header, 0, hdrSize);
1667       }
1668 
1669       b.offset = offset;
1670       b.fileContext.setIncludesTags(this.fileContext.isIncludesTags());
1671       b.fileContext.setIncludesMvcc(this.fileContext.isIncludesMvcc());
1672       return b;
1673     }
1674 
1675     public void setIncludesMemstoreTS(boolean includesMemstoreTS) {
1676       this.fileContext.setIncludesMvcc(includesMemstoreTS);
1677     }
1678 
1679     public void setDataBlockEncoder(HFileDataBlockEncoder encoder) {
1680       encodedBlockDecodingCtx = encoder.newDataBlockDecodingContext(this.fileContext);
1681     }
1682 
1683     @Override
1684     public HFileBlockDecodingContext getBlockDecodingContext() {
1685       return this.encodedBlockDecodingCtx;
1686     }
1687 
1688     @Override
1689     public HFileBlockDecodingContext getDefaultBlockDecodingContext() {
1690       return this.defaultDecodingCtx;
1691     }
1692 
1693     /**
1694      * Generates the checksum for the header as well as the data and
1695      * then validates that it matches the value stored in the header.
1696      * If there is a checksum mismatch, then return false. Otherwise
1697      * return true.
1698      */
1699     protected boolean validateBlockChecksum(HFileBlock block,  byte[] data, int hdrSize)
1700         throws IOException {
1701       return ChecksumUtil.validateBlockChecksum(path, block, data, hdrSize);
1702     }
1703 
1704     @Override
1705     public void closeStreams() throws IOException {
1706       streamWrapper.close();
1707     }
1708 
1709     @Override
1710     public String toString() {
1711       return "hfs=" + hfs + ", path=" + path + ", fileContext=" + fileContext;
1712     }
1713   }
1714 
1715   @Override
1716   public int getSerializedLength() {
1717     if (buf != null) {
1718       // include extra bytes for the next header when it's available.
1719       int extraSpace = hasNextBlockHeader() ? headerSize() : 0;
1720       return this.buf.limit() + extraSpace + HFileBlock.EXTRA_SERIALIZATION_SPACE;
1721     }
1722     return 0;
1723   }
1724 
1725   @Override
1726   public void serialize(ByteBuffer destination) {
1727     ByteBufferUtils.copyFromBufferToBuffer(destination, this.buf, 0, getSerializedLength()
1728         - EXTRA_SERIALIZATION_SPACE);
1729     serializeExtraInfo(destination);
1730   }
1731 
1732   public void serializeExtraInfo(ByteBuffer destination) {
1733     destination.put(this.fileContext.isUseHBaseChecksum() ? (byte) 1 : (byte) 0);
1734     destination.putLong(this.offset);
1735     destination.putInt(this.nextBlockOnDiskSizeWithHeader);
1736     destination.rewind();
1737   }
1738 
1739   @Override
1740   public CacheableDeserializer<Cacheable> getDeserializer() {
1741     return HFileBlock.blockDeserializer;
1742   }
1743 
1744   @Override
1745   public int hashCode() {
1746     int result = 1;
1747     result = result * 31 + blockType.hashCode();
1748     result = result * 31 + nextBlockOnDiskSizeWithHeader;
1749     result = result * 31 + (int) (offset ^ (offset >>> 32));
1750     result = result * 31 + onDiskSizeWithoutHeader;
1751     result = result * 31 + (int) (prevBlockOffset ^ (prevBlockOffset >>> 32));
1752     result = result * 31 + uncompressedSizeWithoutHeader;
1753     result = result * 31 + buf.hashCode();
1754     return result;
1755   }
1756 
1757   @Override
1758   public boolean equals(Object comparison) {
1759     if (this == comparison) {
1760       return true;
1761     }
1762     if (comparison == null) {
1763       return false;
1764     }
1765     if (comparison.getClass() != this.getClass()) {
1766       return false;
1767     }
1768 
1769     HFileBlock castedComparison = (HFileBlock) comparison;
1770 
1771     if (castedComparison.blockType != this.blockType) {
1772       return false;
1773     }
1774     if (castedComparison.nextBlockOnDiskSizeWithHeader != this.nextBlockOnDiskSizeWithHeader) {
1775       return false;
1776     }
1777     if (castedComparison.offset != this.offset) {
1778       return false;
1779     }
1780     if (castedComparison.onDiskSizeWithoutHeader != this.onDiskSizeWithoutHeader) {
1781       return false;
1782     }
1783     if (castedComparison.prevBlockOffset != this.prevBlockOffset) {
1784       return false;
1785     }
1786     if (castedComparison.uncompressedSizeWithoutHeader != this.uncompressedSizeWithoutHeader) {
1787       return false;
1788     }
1789     if (ByteBufferUtils.compareTo(this.buf, 0, this.buf.limit(), castedComparison.buf, 0,
1790         castedComparison.buf.limit()) != 0) {
1791       return false;
1792     }
1793     return true;
1794   }
1795 
1796   public DataBlockEncoding getDataBlockEncoding() {
1797     if (blockType == BlockType.ENCODED_DATA) {
1798       return DataBlockEncoding.getEncodingById(getDataBlockEncodingId());
1799     }
1800     return DataBlockEncoding.NONE;
1801   }
1802 
1803   byte getChecksumType() {
1804     return this.fileContext.getChecksumType().getCode();
1805   }
1806 
1807   int getBytesPerChecksum() {
1808     return this.fileContext.getBytesPerChecksum();
1809   }
1810 
1811   /** @return the size of data on disk + header. Excludes checksum. */
1812   int getOnDiskDataSizeWithHeader() {
1813     return this.onDiskDataSizeWithHeader;
1814   }
1815 
1816   /**
1817    * Calcuate the number of bytes required to store all the checksums
1818    * for this block. Each checksum value is a 4 byte integer.
1819    */
1820   int totalChecksumBytes() {
1821     // If the hfile block has minorVersion 0, then there are no checksum
1822     // data to validate. Similarly, a zero value in this.bytesPerChecksum
1823     // indicates that cached blocks do not have checksum data because
1824     // checksums were already validated when the block was read from disk.
1825     if (!fileContext.isUseHBaseChecksum() || this.fileContext.getBytesPerChecksum() == 0) {
1826       return 0;
1827     }
1828     return (int) ChecksumUtil.numBytes(onDiskDataSizeWithHeader,
1829         this.fileContext.getBytesPerChecksum());
1830   }
1831 
1832   /**
1833    * Returns the size of this block header.
1834    */
1835   public int headerSize() {
1836     return headerSize(this.fileContext.isUseHBaseChecksum());
1837   }
1838 
1839   /**
1840    * Maps a minor version to the size of the header.
1841    */
1842   public static int headerSize(boolean usesHBaseChecksum) {
1843     if (usesHBaseChecksum) {
1844       return HConstants.HFILEBLOCK_HEADER_SIZE;
1845     }
1846     return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
1847   }
1848 
1849   /**
1850    * Return the appropriate DUMMY_HEADER for the minor version
1851    */
1852   public byte[] getDummyHeaderForVersion() {
1853     return getDummyHeaderForVersion(this.fileContext.isUseHBaseChecksum());
1854   }
1855 
1856   /**
1857    * Return the appropriate DUMMY_HEADER for the minor version
1858    */
1859   static private byte[] getDummyHeaderForVersion(boolean usesHBaseChecksum) {
1860     if (usesHBaseChecksum) {
1861       return HConstants.HFILEBLOCK_DUMMY_HEADER;
1862     }
1863     return DUMMY_HEADER_NO_CHECKSUM;
1864   }
1865 
1866   /**
1867    * @return the HFileContext used to create this HFileBlock. Not necessary the
1868    * fileContext for the file from which this block's data was originally read.
1869    */
1870   public HFileContext getHFileContext() {
1871     return this.fileContext;
1872   }
1873 
1874   /**
1875    * Convert the contents of the block header into a human readable string.
1876    * This is mostly helpful for debugging. This assumes that the block
1877    * has minor version > 0.
1878    */
1879   static String toStringHeader(ByteBuffer buf) throws IOException {
1880     byte[] magicBuf = new byte[Math.min(buf.limit() - buf.position(), BlockType.MAGIC_LENGTH)];
1881     buf.get(magicBuf);
1882     BlockType bt = BlockType.parse(magicBuf, 0, BlockType.MAGIC_LENGTH);
1883     int compressedBlockSizeNoHeader = buf.getInt();
1884     int uncompressedBlockSizeNoHeader = buf.getInt();
1885     long prevBlockOffset = buf.getLong();
1886     byte cksumtype = buf.get();
1887     long bytesPerChecksum = buf.getInt();
1888     long onDiskDataSizeWithHeader = buf.getInt();
1889     return " Header dump: magic: " + Bytes.toString(magicBuf) +
1890                    " blockType " + bt +
1891                    " compressedBlockSizeNoHeader " +
1892                    compressedBlockSizeNoHeader +
1893                    " uncompressedBlockSizeNoHeader " +
1894                    uncompressedBlockSizeNoHeader +
1895                    " prevBlockOffset " + prevBlockOffset +
1896                    " checksumType " + ChecksumType.codeToType(cksumtype) +
1897                    " bytesPerChecksum " + bytesPerChecksum +
1898                    " onDiskDataSizeWithHeader " + onDiskDataSizeWithHeader;
1899   }
1900 }