View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.ByteArrayInputStream;
21  import java.io.ByteArrayOutputStream;
22  import java.io.DataInputStream;
23  import java.io.DataOutput;
24  import java.io.DataOutputStream;
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.nio.ByteBuffer;
28  import java.util.concurrent.locks.Lock;
29  import java.util.concurrent.locks.ReentrantLock;
30  
31  import org.apache.hadoop.fs.FSDataInputStream;
32  import org.apache.hadoop.fs.FSDataOutputStream;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.Cell;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.classification.InterfaceAudience;
37  import org.apache.hadoop.hbase.fs.HFileSystem;
38  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
39  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
40  import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
41  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
42  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
43  import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
44  import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.apache.hadoop.hbase.util.ChecksumType;
47  import org.apache.hadoop.hbase.util.ClassSize;
48  import org.apache.hadoop.hbase.util.CompoundBloomFilter;
49  import org.apache.hadoop.io.IOUtils;
50  
51  import com.google.common.base.Preconditions;
52  
53  /**
54   * Reading {@link HFile} version 1 and 2 blocks, and writing version 2 blocks.
55   * <ul>
56   * <li>In version 1 all blocks are always compressed or uncompressed, as
57   * specified by the {@link HFile}'s compression algorithm, with a type-specific
58   * magic record stored in the beginning of the compressed data (i.e. one needs
59   * to uncompress the compressed block to determine the block type). There is
60   * only a single compression algorithm setting for all blocks. Offset and size
61   * information from the block index are required to read a block.
62   * <li>In version 2 a block is structured as follows:
63   * <ul>
64   * <li>header (see Writer#finishBlock())
65   * <ul>
66   * <li>Magic record identifying the block type (8 bytes)
67   * <li>Compressed block size, excluding header, including checksum (4 bytes)
68   * <li>Uncompressed block size, excluding header, excluding checksum (4 bytes)
69   * <li>The offset of the previous block of the same type (8 bytes). This is
70   * used to be able to navigate to the previous block without going to the block
71   * <li>For minorVersions >=1, the ordinal describing checksum type (1 byte)
72   * <li>For minorVersions >=1, the number of data bytes/checksum chunk (4 bytes)
73   * <li>For minorVersions >=1, the size of data on disk, including header,
74   * excluding checksums (4 bytes)
75   * </ul>
76   * </li>
77   * <li>Raw/Compressed/Encrypted/Encoded data. The compression algorithm is the
78   * same for all the blocks in the {@link HFile}, similarly to what was done in
79   * version 1.
80   * <li>For minorVersions >=1, a series of 4 byte checksums, one each for
81   * the number of bytes specified by bytesPerChecksum.
82   * </ul>
83   * </ul>
84   */
85  @InterfaceAudience.Private
86  public class HFileBlock implements Cacheable {
87  
88    /**
89     * On a checksum failure on a Reader, these many suceeding read
90     * requests switch back to using hdfs checksums before auto-reenabling
91     * hbase checksum verification.
92     */
93    static final int CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD = 3;
94  
95    public static final boolean FILL_HEADER = true;
96    public static final boolean DONT_FILL_HEADER = false;
97  
98    /**
99     * The size of block header when blockType is {@link BlockType#ENCODED_DATA}.
100    * This extends normal header by adding the id of encoder.
101    */
102   public static final int ENCODED_HEADER_SIZE = HConstants.HFILEBLOCK_HEADER_SIZE
103       + DataBlockEncoding.ID_SIZE;
104 
105   static final byte[] DUMMY_HEADER_NO_CHECKSUM =
106      new byte[HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM];
107 
108   public static final int BYTE_BUFFER_HEAP_SIZE = (int) ClassSize.estimateBase(
109       ByteBuffer.wrap(new byte[0], 0, 0).getClass(), false);
110 
111   // meta.usesHBaseChecksum+offset+nextBlockOnDiskSizeWithHeader
112   public static final int EXTRA_SERIALIZATION_SPACE = Bytes.SIZEOF_BYTE + Bytes.SIZEOF_INT
113       + Bytes.SIZEOF_LONG;
114 
115   /**
116    * Each checksum value is an integer that can be stored in 4 bytes.
117    */
118   static final int CHECKSUM_SIZE = Bytes.SIZEOF_INT;
119 
120   private static final CacheableDeserializer<Cacheable> blockDeserializer =
121       new CacheableDeserializer<Cacheable>() {
122         public HFileBlock deserialize(ByteBuffer buf, boolean reuse) throws IOException{
123           buf.limit(buf.limit() - HFileBlock.EXTRA_SERIALIZATION_SPACE).rewind();
124           ByteBuffer newByteBuffer;
125           if (reuse) {
126             newByteBuffer = buf.slice();
127           } else {
128            newByteBuffer = ByteBuffer.allocate(buf.limit());
129            newByteBuffer.put(buf);
130           }
131           buf.position(buf.limit());
132           buf.limit(buf.limit() + HFileBlock.EXTRA_SERIALIZATION_SPACE);
133           boolean usesChecksum = buf.get() == (byte)1;
134           HFileBlock ourBuffer = new HFileBlock(newByteBuffer, usesChecksum);
135           ourBuffer.offset = buf.getLong();
136           ourBuffer.nextBlockOnDiskSizeWithHeader = buf.getInt();
137           if (ourBuffer.hasNextBlockHeader()) {
138             ourBuffer.buf.limit(ourBuffer.buf.limit() - ourBuffer.headerSize());
139           }
140           return ourBuffer;
141         }
142         
143         @Override
144         public int getDeserialiserIdentifier() {
145           return deserializerIdentifier;
146         }
147 
148         @Override
149         public HFileBlock deserialize(ByteBuffer b) throws IOException {
150           return deserialize(b, false);
151         }
152       };
153   private static final int deserializerIdentifier;
154   static {
155     deserializerIdentifier = CacheableDeserializerIdManager
156         .registerDeserializer(blockDeserializer);
157   }
158 
159   /** Type of block. Header field 0. */
160   private BlockType blockType;
161 
162   /** Size on disk excluding header, including checksum. Header field 1. */
163   private int onDiskSizeWithoutHeader;
164 
165   /** Size of pure data. Does not include header or checksums. Header field 2. */
166   private final int uncompressedSizeWithoutHeader;
167 
168   /** The offset of the previous block on disk. Header field 3. */
169   private final long prevBlockOffset;
170 
171   /**
172    * Size on disk of header + data. Excludes checksum. Header field 6,
173    * OR calculated from {@link #onDiskSizeWithoutHeader} when using HDFS checksum.
174    */
175   private final int onDiskDataSizeWithHeader;
176 
177   /** The in-memory representation of the hfile block */
178   private ByteBuffer buf;
179 
180   /** Meta data that holds meta information on the hfileblock */
181   private HFileContext fileContext;
182 
183   /**
184    * The offset of this block in the file. Populated by the reader for
185    * convenience of access. This offset is not part of the block header.
186    */
187   private long offset = -1;
188 
189   /**
190    * The on-disk size of the next block, including the header, obtained by
191    * peeking into the first {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes of the next block's
192    * header, or -1 if unknown.
193    */
194   private int nextBlockOnDiskSizeWithHeader = -1;
195 
196   /**
197    * Creates a new {@link HFile} block from the given fields. This constructor
198    * is mostly used when the block data has already been read and uncompressed,
199    * and is sitting in a byte buffer. 
200    *
201    * @param blockType the type of this block, see {@link BlockType}
202    * @param onDiskSizeWithoutHeader see {@link #onDiskSizeWithoutHeader}
203    * @param uncompressedSizeWithoutHeader see {@link #uncompressedSizeWithoutHeader}
204    * @param prevBlockOffset see {@link #prevBlockOffset}
205    * @param buf block header ({@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes) followed by
206    *          uncompressed data. This
207    * @param fillHeader when true, parse {@code buf} and override the first 4 header fields.
208    * @param offset the file offset the block was read from
209    * @param onDiskDataSizeWithHeader see {@link #onDiskDataSizeWithHeader}
210    * @param fileContext HFile meta data
211    */
212   HFileBlock(BlockType blockType, int onDiskSizeWithoutHeader, int uncompressedSizeWithoutHeader,
213       long prevBlockOffset, ByteBuffer buf, boolean fillHeader, long offset,
214       int onDiskDataSizeWithHeader, HFileContext fileContext) {
215     this.blockType = blockType;
216     this.onDiskSizeWithoutHeader = onDiskSizeWithoutHeader;
217     this.uncompressedSizeWithoutHeader = uncompressedSizeWithoutHeader;
218     this.prevBlockOffset = prevBlockOffset;
219     this.buf = buf;
220     if (fillHeader)
221       overwriteHeader();
222     this.offset = offset;
223     this.onDiskDataSizeWithHeader = onDiskDataSizeWithHeader;
224     this.fileContext = fileContext;
225     this.buf.rewind();
226   }
227 
228   /**
229    * Copy constructor. Creates a shallow copy of {@code that}'s buffer.
230    */
231   HFileBlock(HFileBlock that) {
232     this.blockType = that.blockType;
233     this.onDiskSizeWithoutHeader = that.onDiskSizeWithoutHeader;
234     this.uncompressedSizeWithoutHeader = that.uncompressedSizeWithoutHeader;
235     this.prevBlockOffset = that.prevBlockOffset;
236     this.buf = that.buf.duplicate();
237     this.offset = that.offset;
238     this.onDiskDataSizeWithHeader = that.onDiskDataSizeWithHeader;
239     this.fileContext = that.fileContext;
240     this.nextBlockOnDiskSizeWithHeader = that.nextBlockOnDiskSizeWithHeader;
241   }
242 
243   /**
244    * Creates a block from an existing buffer starting with a header. Rewinds
245    * and takes ownership of the buffer. By definition of rewind, ignores the
246    * buffer position, but if you slice the buffer beforehand, it will rewind
247    * to that point. The reason this has a minorNumber and not a majorNumber is
248    * because majorNumbers indicate the format of a HFile whereas minorNumbers 
249    * indicate the format inside a HFileBlock.
250    */
251   HFileBlock(ByteBuffer b, boolean usesHBaseChecksum) throws IOException {
252     b.rewind();
253     blockType = BlockType.read(b);
254     onDiskSizeWithoutHeader = b.getInt();
255     uncompressedSizeWithoutHeader = b.getInt();
256     prevBlockOffset = b.getLong();
257     HFileContextBuilder contextBuilder = new HFileContextBuilder();
258     contextBuilder.withHBaseCheckSum(usesHBaseChecksum);
259     if (usesHBaseChecksum) {
260       contextBuilder.withChecksumType(ChecksumType.codeToType(b.get()));
261       contextBuilder.withBytesPerCheckSum(b.getInt());
262       this.onDiskDataSizeWithHeader = b.getInt();
263     } else {
264       contextBuilder.withChecksumType(ChecksumType.NULL);
265       contextBuilder.withBytesPerCheckSum(0);
266       this.onDiskDataSizeWithHeader = onDiskSizeWithoutHeader +
267                                        HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
268     }
269     this.fileContext = contextBuilder.build();
270     buf = b;
271     buf.rewind();
272   }
273 
274   public BlockType getBlockType() {
275     return blockType;
276   }
277 
278   /** @return get data block encoding id that was used to encode this block */
279   public short getDataBlockEncodingId() {
280     if (blockType != BlockType.ENCODED_DATA) {
281       throw new IllegalArgumentException("Querying encoder ID of a block " +
282           "of type other than " + BlockType.ENCODED_DATA + ": " + blockType);
283     }
284     return buf.getShort(headerSize());
285   }
286 
287   /**
288    * @return the on-disk size of header + data part + checksum.
289    */
290   public int getOnDiskSizeWithHeader() {
291     return onDiskSizeWithoutHeader + headerSize();
292   }
293 
294   /**
295    * @return the on-disk size of the data part + checksum (header excluded).
296    */
297   public int getOnDiskSizeWithoutHeader() {
298     return onDiskSizeWithoutHeader;
299   }
300 
301   /**
302    * @return the uncompressed size of data part (header and checksum excluded).
303    */
304    public int getUncompressedSizeWithoutHeader() {
305     return uncompressedSizeWithoutHeader;
306   }
307 
308   /**
309    * @return the offset of the previous block of the same type in the file, or
310    *         -1 if unknown
311    */
312   public long getPrevBlockOffset() {
313     return prevBlockOffset;
314   }
315 
316   /**
317    * Rewinds {@code buf} and writes first 4 header fields. {@code buf} position
318    * is modified as side-effect.
319    */
320   private void overwriteHeader() {
321     buf.rewind();
322     blockType.write(buf);
323     buf.putInt(onDiskSizeWithoutHeader);
324     buf.putInt(uncompressedSizeWithoutHeader);
325     buf.putLong(prevBlockOffset);
326   }
327 
328   /**
329    * Returns a buffer that does not include the header or checksum.
330    *
331    * @return the buffer with header skipped and checksum omitted.
332    */
333   public ByteBuffer getBufferWithoutHeader() {
334     return ByteBuffer.wrap(buf.array(), buf.arrayOffset() + headerSize(),
335         buf.limit() - headerSize() - totalChecksumBytes()).slice();
336   }
337 
338   /**
339    * Returns the buffer this block stores internally. The clients must not
340    * modify the buffer object. This method has to be public because it is
341    * used in {@link CompoundBloomFilter} to avoid object creation on every
342    * Bloom filter lookup, but has to be used with caution. Checksum data
343    * is not included in the returned buffer but header data is.
344    *
345    * @return the buffer of this block for read-only operations
346    */
347   public ByteBuffer getBufferReadOnly() {
348     return ByteBuffer.wrap(buf.array(), buf.arrayOffset(),
349         buf.limit() - totalChecksumBytes()).slice();
350   }
351 
352   /**
353    * Returns the buffer of this block, including header data. The clients must
354    * not modify the buffer object. This method has to be public because it is
355    * used in {@link BucketCache} to avoid buffer copy.
356    * 
357    * @return the buffer with header and checksum included for read-only operations
358    */
359   public ByteBuffer getBufferReadOnlyWithHeader() {
360     return ByteBuffer.wrap(buf.array(), buf.arrayOffset(), buf.limit()).slice();
361   }
362 
363   /**
364    * Returns a byte buffer of this block, including header data and checksum, positioned at
365    * the beginning of header. The underlying data array is not copied.
366    *
367    * @return the byte buffer with header and checksum included
368    */
369   ByteBuffer getBufferWithHeader() {
370     ByteBuffer dupBuf = buf.duplicate();
371     dupBuf.rewind();
372     return dupBuf;
373   }
374 
375   private void sanityCheckAssertion(long valueFromBuf, long valueFromField,
376       String fieldName) throws IOException {
377     if (valueFromBuf != valueFromField) {
378       throw new AssertionError(fieldName + " in the buffer (" + valueFromBuf
379           + ") is different from that in the field (" + valueFromField + ")");
380     }
381   }
382 
383   private void sanityCheckAssertion(BlockType valueFromBuf, BlockType valueFromField)
384       throws IOException {
385     if (valueFromBuf != valueFromField) {
386       throw new IOException("Block type stored in the buffer: " +
387         valueFromBuf + ", block type field: " + valueFromField);
388     }
389   }
390 
391   /**
392    * Checks if the block is internally consistent, i.e. the first
393    * {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes of the buffer contain a
394    * valid header consistent with the fields. Assumes a packed block structure.
395    * This function is primary for testing and debugging, and is not
396    * thread-safe, because it alters the internal buffer pointer.
397    */
398   void sanityCheck() throws IOException {
399     buf.rewind();
400 
401     sanityCheckAssertion(BlockType.read(buf), blockType);
402 
403     sanityCheckAssertion(buf.getInt(), onDiskSizeWithoutHeader,
404         "onDiskSizeWithoutHeader");
405 
406     sanityCheckAssertion(buf.getInt(), uncompressedSizeWithoutHeader,
407         "uncompressedSizeWithoutHeader");
408 
409     sanityCheckAssertion(buf.getLong(), prevBlockOffset, "prevBlocKOffset");
410     if (this.fileContext.isUseHBaseChecksum()) {
411       sanityCheckAssertion(buf.get(), this.fileContext.getChecksumType().getCode(), "checksumType");
412       sanityCheckAssertion(buf.getInt(), this.fileContext.getBytesPerChecksum(), "bytesPerChecksum");
413       sanityCheckAssertion(buf.getInt(), onDiskDataSizeWithHeader, "onDiskDataSizeWithHeader");
414     }
415 
416     int cksumBytes = totalChecksumBytes();
417     int expectedBufLimit = onDiskDataSizeWithHeader + cksumBytes;
418     if (buf.limit() != expectedBufLimit) {
419       throw new AssertionError("Expected buffer limit " + expectedBufLimit
420           + ", got " + buf.limit());
421     }
422 
423     // We might optionally allocate HFILEBLOCK_HEADER_SIZE more bytes to read the next
424     // block's header, so there are two sensible values for buffer capacity.
425     int hdrSize = headerSize();
426     if (buf.capacity() != expectedBufLimit &&
427         buf.capacity() != expectedBufLimit + hdrSize) {
428       throw new AssertionError("Invalid buffer capacity: " + buf.capacity() +
429           ", expected " + expectedBufLimit + " or " + (expectedBufLimit + hdrSize));
430     }
431   }
432 
433   @Override
434   public String toString() {
435     StringBuilder sb = new StringBuilder()
436       .append("HFileBlock [")
437       .append(" fileOffset=").append(offset)
438       .append(" headerSize()=").append(headerSize())
439       .append(" blockType=").append(blockType)
440       .append(" onDiskSizeWithoutHeader=").append(onDiskSizeWithoutHeader)
441       .append(" uncompressedSizeWithoutHeader=").append(uncompressedSizeWithoutHeader)
442       .append(" prevBlockOffset=").append(prevBlockOffset)
443       .append(" isUseHBaseChecksum()=").append(fileContext.isUseHBaseChecksum());
444     if (fileContext.isUseHBaseChecksum()) {
445       sb.append(" checksumType=").append(ChecksumType.codeToType(this.buf.get(24)))
446         .append(" bytesPerChecksum=").append(this.buf.getInt(24 + 1))
447         .append(" onDiskDataSizeWithHeader=").append(onDiskDataSizeWithHeader);
448     } else {
449       sb.append(" onDiskDataSizeWithHeader=").append(onDiskDataSizeWithHeader)
450         .append("(").append(onDiskSizeWithoutHeader)
451         .append("+").append(HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM).append(")");
452     }
453     sb.append(" getOnDiskSizeWithHeader()=").append(getOnDiskSizeWithHeader())
454       .append(" totalChecksumBytes()=").append(totalChecksumBytes())
455       .append(" isUnpacked()=").append(isUnpacked())
456       .append(" buf=[ ")
457         .append(buf)
458         .append(", array().length=").append(buf.array().length)
459         .append(", arrayOffset()=").append(buf.arrayOffset())
460       .append(" ]")
461       .append(" dataBeginsWith=")
462       .append(Bytes.toStringBinary(buf.array(), buf.arrayOffset() + headerSize(),
463         Math.min(32, buf.limit() - buf.arrayOffset() - headerSize())))
464       .append(" fileContext=").append(fileContext)
465       .append(" ]");
466     return sb.toString();
467   }
468 
469   /**
470    * Called after reading a block with provided onDiskSizeWithHeader.
471    */
472   private void validateOnDiskSizeWithoutHeader(
473       int expectedOnDiskSizeWithoutHeader) throws IOException {
474     if (onDiskSizeWithoutHeader != expectedOnDiskSizeWithoutHeader) {
475       String blockInfoMsg =
476         "Block offset: " + offset + ", data starts with: "
477           + Bytes.toStringBinary(buf.array(), buf.arrayOffset(),
478               buf.arrayOffset() + Math.min(32, buf.limit()));
479       throw new IOException("On-disk size without header provided is "
480           + expectedOnDiskSizeWithoutHeader + ", but block "
481           + "header contains " + onDiskSizeWithoutHeader + ". " +
482           blockInfoMsg);
483     }
484   }
485 
486   /**
487    * Retrieves the decompressed/decrypted view of this block. An encoded block remains in its
488    * encoded structure. Internal structures are shared between instances where applicable.
489    */
490   HFileBlock unpack(HFileContext fileContext, FSReader reader) throws IOException {
491     if (!fileContext.isCompressedOrEncrypted()) {
492       // TODO: cannot use our own fileContext here because HFileBlock(ByteBuffer, boolean),
493       // which is used for block serialization to L2 cache, does not preserve encoding and
494       // encryption details.
495       return this;
496     }
497 
498     HFileBlock unpacked = new HFileBlock(this);
499     unpacked.allocateBuffer(); // allocates space for the decompressed block
500 
501     HFileBlockDecodingContext ctx = blockType == BlockType.ENCODED_DATA ?
502       reader.getBlockDecodingContext() : reader.getDefaultBlockDecodingContext();
503     ctx.prepareDecoding(unpacked.getOnDiskSizeWithoutHeader(),
504       unpacked.getUncompressedSizeWithoutHeader(), unpacked.getBufferWithoutHeader(),
505       this.getBufferReadOnlyWithHeader().array(), this.headerSize());
506 
507     // Preserve the next block's header bytes in the new block if we have them.
508     if (unpacked.hasNextBlockHeader()) {
509       System.arraycopy(this.buf.array(), this.buf.arrayOffset() + this.onDiskDataSizeWithHeader,
510         unpacked.buf.array(), unpacked.buf.arrayOffset() + unpacked.headerSize() +
511           unpacked.uncompressedSizeWithoutHeader + unpacked.totalChecksumBytes(),
512         unpacked.headerSize());
513     }
514     return unpacked;
515   }
516 
517   /**
518    * Return true when this buffer includes next block's header.
519    */
520   private boolean hasNextBlockHeader() {
521     return nextBlockOnDiskSizeWithHeader > 0;
522   }
523 
524   /**
525    * Always allocates a new buffer of the correct size. Copies header bytes
526    * from the existing buffer. Does not change header fields. 
527    * Reserve room to keep checksum bytes too.
528    */
529   private void allocateBuffer() {
530     int cksumBytes = totalChecksumBytes();
531     int headerSize = headerSize();
532     int capacityNeeded = headerSize + uncompressedSizeWithoutHeader +
533         cksumBytes + (hasNextBlockHeader() ? headerSize : 0);
534 
535     ByteBuffer newBuf = ByteBuffer.allocate(capacityNeeded);
536 
537     // Copy header bytes.
538     System.arraycopy(buf.array(), buf.arrayOffset(), newBuf.array(),
539         newBuf.arrayOffset(), headerSize);
540 
541     buf = newBuf;
542     // set limit to exclude next block's header
543     buf.limit(headerSize + uncompressedSizeWithoutHeader + cksumBytes);
544   }
545 
546   /**
547    * Return true when this block's buffer has been unpacked, false otherwise. Note this is a
548    * calculated heuristic, not tracked attribute of the block.
549    */
550   public boolean isUnpacked() {
551     final int cksumBytes = totalChecksumBytes();
552     final int headerSize = headerSize();
553     final int expectedCapacity = headerSize + uncompressedSizeWithoutHeader + cksumBytes;
554     final int bufCapacity = buf.capacity();
555     return bufCapacity == expectedCapacity || bufCapacity == expectedCapacity + headerSize;
556   }
557 
558   /** An additional sanity-check in case no compression or encryption is being used. */
559   public void assumeUncompressed() throws IOException {
560     if (onDiskSizeWithoutHeader != uncompressedSizeWithoutHeader +
561         totalChecksumBytes()) {
562       throw new IOException("Using no compression but "
563           + "onDiskSizeWithoutHeader=" + onDiskSizeWithoutHeader + ", "
564           + "uncompressedSizeWithoutHeader=" + uncompressedSizeWithoutHeader
565           + ", numChecksumbytes=" + totalChecksumBytes());
566     }
567   }
568 
569   /**
570    * @param expectedType the expected type of this block
571    * @throws IOException if this block's type is different than expected
572    */
573   public void expectType(BlockType expectedType) throws IOException {
574     if (blockType != expectedType) {
575       throw new IOException("Invalid block type: expected=" + expectedType
576           + ", actual=" + blockType);
577     }
578   }
579 
580   /** @return the offset of this block in the file it was read from */
581   public long getOffset() {
582     if (offset < 0) {
583       throw new IllegalStateException(
584           "HFile block offset not initialized properly");
585     }
586     return offset;
587   }
588 
589   /**
590    * @return a byte stream reading the data + checksum of this block
591    */
592   public DataInputStream getByteStream() {
593     return new DataInputStream(new ByteArrayInputStream(buf.array(),
594         buf.arrayOffset() + headerSize(), buf.limit() - headerSize()));
595   }
596 
597   @Override
598   public long heapSize() {
599     long size = ClassSize.align(
600         ClassSize.OBJECT +
601         // Block type, byte buffer and meta references
602         3 * ClassSize.REFERENCE +
603         // On-disk size, uncompressed size, and next block's on-disk size
604         // bytePerChecksum and onDiskDataSize
605         4 * Bytes.SIZEOF_INT +
606         // This and previous block offset
607         2 * Bytes.SIZEOF_LONG +
608         // Heap size of the meta object. meta will be always not null.
609         fileContext.heapSize()
610     );
611 
612     if (buf != null) {
613       // Deep overhead of the byte buffer. Needs to be aligned separately.
614       size += ClassSize.align(buf.capacity() + BYTE_BUFFER_HEAP_SIZE);
615     }
616 
617     return ClassSize.align(size);
618   }
619 
620   /**
621    * Read from an input stream. Analogous to
622    * {@link IOUtils#readFully(InputStream, byte[], int, int)}, but specifies a
623    * number of "extra" bytes that would be desirable but not absolutely
624    * necessary to read.
625    *
626    * @param in the input stream to read from
627    * @param buf the buffer to read into
628    * @param bufOffset the destination offset in the buffer
629    * @param necessaryLen the number of bytes that are absolutely necessary to
630    *          read
631    * @param extraLen the number of extra bytes that would be nice to read
632    * @return true if succeeded reading the extra bytes
633    * @throws IOException if failed to read the necessary bytes
634    */
635   public static boolean readWithExtra(InputStream in, byte buf[],
636       int bufOffset, int necessaryLen, int extraLen) throws IOException {
637     int bytesRemaining = necessaryLen + extraLen;
638     while (bytesRemaining > 0) {
639       int ret = in.read(buf, bufOffset, bytesRemaining);
640       if (ret == -1 && bytesRemaining <= extraLen) {
641         // We could not read the "extra data", but that is OK.
642         break;
643       }
644 
645       if (ret < 0) {
646         throw new IOException("Premature EOF from inputStream (read "
647             + "returned " + ret + ", was trying to read " + necessaryLen
648             + " necessary bytes and " + extraLen + " extra bytes, "
649             + "successfully read "
650             + (necessaryLen + extraLen - bytesRemaining));
651       }
652       bufOffset += ret;
653       bytesRemaining -= ret;
654     }
655     return bytesRemaining <= 0;
656   }
657 
658   /**
659    * @return the on-disk size of the next block (including the header size)
660    *         that was read by peeking into the next block's header
661    */
662   public int getNextBlockOnDiskSizeWithHeader() {
663     return nextBlockOnDiskSizeWithHeader;
664   }
665 
666   /**
667    * Unified version 2 {@link HFile} block writer. The intended usage pattern
668    * is as follows:
669    * <ol>
670    * <li>Construct an {@link HFileBlock.Writer}, providing a compression algorithm.
671    * <li>Call {@link Writer#startWriting} and get a data stream to write to.
672    * <li>Write your data into the stream.
673    * <li>Call {@link Writer#writeHeaderAndData(FSDataOutputStream)} as many times as you need to.
674    * store the serialized block into an external stream.
675    * <li>Repeat to write more blocks.
676    * </ol>
677    * <p>
678    */
679   public static class Writer {
680 
681     private enum State {
682       INIT,
683       WRITING,
684       BLOCK_READY
685     };
686 
687     /** Writer state. Used to ensure the correct usage protocol. */
688     private State state = State.INIT;
689 
690     /** Data block encoder used for data blocks */
691     private final HFileDataBlockEncoder dataBlockEncoder;
692 
693     private HFileBlockEncodingContext dataBlockEncodingCtx;
694 
695     /** block encoding context for non-data blocks */
696     private HFileBlockDefaultEncodingContext defaultBlockEncodingCtx;
697 
698     /**
699      * The stream we use to accumulate data in uncompressed format for each
700      * block. We reset this stream at the end of each block and reuse it. The
701      * header is written as the first {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes into this
702      * stream.
703      */
704     private ByteArrayOutputStream baosInMemory;
705 
706     /**
707      * Current block type. Set in {@link #startWriting(BlockType)}. Could be
708      * changed in {@link #finishBlock()} from {@link BlockType#DATA}
709      * to {@link BlockType#ENCODED_DATA}.
710      */
711     private BlockType blockType;
712 
713     /**
714      * A stream that we write uncompressed bytes to, which compresses them and
715      * writes them to {@link #baosInMemory}.
716      */
717     private DataOutputStream userDataStream;
718 
719     // Size of actual data being written. Not considering the block encoding/compression. This
720     // includes the header size also.
721     private int unencodedDataSizeWritten;
722 
723     /**
724      * Bytes to be written to the file system, including the header. Compressed
725      * if compression is turned on. It also includes the checksum data that
726      * immediately follows the block data. (header + data + checksums)
727      */
728     private byte[] onDiskBytesWithHeader;
729 
730     /**
731      * The size of the checksum data on disk. It is used only if data is
732      * not compressed. If data is compressed, then the checksums are already
733      * part of onDiskBytesWithHeader. If data is uncompressed, then this
734      * variable stores the checksum data for this block.
735      */
736     private byte[] onDiskChecksum;
737 
738     /**
739      * Valid in the READY state. Contains the header and the uncompressed (but
740      * potentially encoded, if this is a data block) bytes, so the length is
741      * {@link #uncompressedSizeWithoutHeader} + {@link org.apache.hadoop.hbase.HConstants#HFILEBLOCK_HEADER_SIZE}.
742      * Does not store checksums.
743      */
744     private byte[] uncompressedBytesWithHeader;
745 
746     /**
747      * Current block's start offset in the {@link HFile}. Set in
748      * {@link #writeHeaderAndData(FSDataOutputStream)}.
749      */
750     private long startOffset;
751 
752     /**
753      * Offset of previous block by block type. Updated when the next block is
754      * started.
755      */
756     private long[] prevOffsetByType;
757 
758     /** The offset of the previous block of the same type */
759     private long prevOffset;
760     /** Meta data that holds information about the hfileblock**/
761     private HFileContext fileContext;
762 
763     /**
764      * @param dataBlockEncoder data block encoding algorithm to use
765      */
766     public Writer(HFileDataBlockEncoder dataBlockEncoder, HFileContext fileContext) {
767       this.dataBlockEncoder = dataBlockEncoder != null
768           ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
769       defaultBlockEncodingCtx = new HFileBlockDefaultEncodingContext(null,
770           HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext);
771       dataBlockEncodingCtx = this.dataBlockEncoder
772           .newDataBlockEncodingContext(HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext);
773 
774       if (fileContext.getBytesPerChecksum() < HConstants.HFILEBLOCK_HEADER_SIZE) {
775         throw new RuntimeException("Unsupported value of bytesPerChecksum. " +
776             " Minimum is " + HConstants.HFILEBLOCK_HEADER_SIZE + " but the configured value is " +
777             fileContext.getBytesPerChecksum());
778       }
779 
780       baosInMemory = new ByteArrayOutputStream();
781       
782       prevOffsetByType = new long[BlockType.values().length];
783       for (int i = 0; i < prevOffsetByType.length; ++i)
784         prevOffsetByType[i] = -1;
785 
786       this.fileContext = fileContext;
787     }
788 
789     /**
790      * Starts writing into the block. The previous block's data is discarded.
791      *
792      * @return the stream the user can write their data into
793      * @throws IOException
794      */
795     public DataOutputStream startWriting(BlockType newBlockType)
796         throws IOException {
797       if (state == State.BLOCK_READY && startOffset != -1) {
798         // We had a previous block that was written to a stream at a specific
799         // offset. Save that offset as the last offset of a block of that type.
800         prevOffsetByType[blockType.getId()] = startOffset;
801       }
802 
803       startOffset = -1;
804       blockType = newBlockType;
805 
806       baosInMemory.reset();
807       baosInMemory.write(HConstants.HFILEBLOCK_DUMMY_HEADER);
808 
809       state = State.WRITING;
810 
811       // We will compress it later in finishBlock()
812       userDataStream = new DataOutputStream(baosInMemory);
813       if (newBlockType == BlockType.DATA) {
814         this.dataBlockEncoder.startBlockEncoding(dataBlockEncodingCtx, userDataStream);
815       }
816       this.unencodedDataSizeWritten = 0;
817       return userDataStream;
818     }
819 
820     /**
821      * Writes the Cell to this block
822      * @param cell
823      * @throws IOException
824      */
825     public void write(Cell cell) throws IOException{
826       expectState(State.WRITING);
827       this.unencodedDataSizeWritten += this.dataBlockEncoder.encode(cell, dataBlockEncodingCtx,
828           this.userDataStream);
829     }
830 
831     /**
832      * Returns the stream for the user to write to. The block writer takes care
833      * of handling compression and buffering for caching on write. Can only be
834      * called in the "writing" state.
835      *
836      * @return the data output stream for the user to write to
837      */
838     DataOutputStream getUserDataStream() {
839       expectState(State.WRITING);
840       return userDataStream;
841     }
842 
843     /**
844      * Transitions the block writer from the "writing" state to the "block
845      * ready" state.  Does nothing if a block is already finished.
846      */
847     void ensureBlockReady() throws IOException {
848       Preconditions.checkState(state != State.INIT,
849           "Unexpected state: " + state);
850 
851       if (state == State.BLOCK_READY)
852         return;
853 
854       // This will set state to BLOCK_READY.
855       finishBlock();
856     }
857 
858     /**
859      * An internal method that flushes the compressing stream (if using
860      * compression), serializes the header, and takes care of the separate
861      * uncompressed stream for caching on write, if applicable. Sets block
862      * write state to "block ready".
863      */
864     private void finishBlock() throws IOException {
865       if (blockType == BlockType.DATA) {
866         BufferGrabbingByteArrayOutputStream baosInMemoryCopy = 
867             new BufferGrabbingByteArrayOutputStream();
868         baosInMemory.writeTo(baosInMemoryCopy);
869         this.dataBlockEncoder.endBlockEncoding(dataBlockEncodingCtx, userDataStream,
870             baosInMemoryCopy.buf, blockType);
871         blockType = dataBlockEncodingCtx.getBlockType();
872       }
873       userDataStream.flush();
874       // This does an array copy, so it is safe to cache this byte array.
875       uncompressedBytesWithHeader = baosInMemory.toByteArray();
876       prevOffset = prevOffsetByType[blockType.getId()];
877 
878       // We need to set state before we can package the block up for
879       // cache-on-write. In a way, the block is ready, but not yet encoded or
880       // compressed.
881       state = State.BLOCK_READY;
882       if (blockType == BlockType.DATA || blockType == BlockType.ENCODED_DATA) {
883         onDiskBytesWithHeader = dataBlockEncodingCtx
884             .compressAndEncrypt(uncompressedBytesWithHeader);
885       } else {
886         onDiskBytesWithHeader = defaultBlockEncodingCtx
887             .compressAndEncrypt(uncompressedBytesWithHeader);
888       }
889       int numBytes = (int) ChecksumUtil.numBytes(
890           onDiskBytesWithHeader.length,
891           fileContext.getBytesPerChecksum());
892 
893       // put the header for on disk bytes
894       putHeader(onDiskBytesWithHeader, 0,
895           onDiskBytesWithHeader.length + numBytes,
896           uncompressedBytesWithHeader.length, onDiskBytesWithHeader.length);
897       // set the header for the uncompressed bytes (for cache-on-write)
898       putHeader(uncompressedBytesWithHeader, 0,
899           onDiskBytesWithHeader.length + numBytes,
900           uncompressedBytesWithHeader.length, onDiskBytesWithHeader.length);
901 
902       onDiskChecksum = new byte[numBytes];
903       ChecksumUtil.generateChecksums(
904           onDiskBytesWithHeader, 0, onDiskBytesWithHeader.length,
905           onDiskChecksum, 0, fileContext.getChecksumType(), fileContext.getBytesPerChecksum());
906     }
907 
908     public static class BufferGrabbingByteArrayOutputStream extends ByteArrayOutputStream {
909       private byte[] buf;
910 
911       @Override
912       public void write(byte[] b, int off, int len) {
913         this.buf = b;
914       }
915 
916       public byte[] getBuffer() {
917         return this.buf;
918       }
919     }
920 
921     /**
922      * Put the header into the given byte array at the given offset.
923      * @param onDiskSize size of the block on disk header + data + checksum
924      * @param uncompressedSize size of the block after decompression (but
925      *          before optional data block decoding) including header
926      * @param onDiskDataSize size of the block on disk with header
927      *        and data but not including the checksums
928      */
929     private void putHeader(byte[] dest, int offset, int onDiskSize,
930         int uncompressedSize, int onDiskDataSize) {
931       offset = blockType.put(dest, offset);
932       offset = Bytes.putInt(dest, offset, onDiskSize - HConstants.HFILEBLOCK_HEADER_SIZE);
933       offset = Bytes.putInt(dest, offset, uncompressedSize - HConstants.HFILEBLOCK_HEADER_SIZE);
934       offset = Bytes.putLong(dest, offset, prevOffset);
935       offset = Bytes.putByte(dest, offset, fileContext.getChecksumType().getCode());
936       offset = Bytes.putInt(dest, offset, fileContext.getBytesPerChecksum());
937       Bytes.putInt(dest, offset, onDiskDataSize);
938     }
939 
940     /**
941      * Similar to {@link #writeHeaderAndData(FSDataOutputStream)}, but records
942      * the offset of this block so that it can be referenced in the next block
943      * of the same type.
944      *
945      * @param out
946      * @throws IOException
947      */
948     public void writeHeaderAndData(FSDataOutputStream out) throws IOException {
949       long offset = out.getPos();
950       if (startOffset != -1 && offset != startOffset) {
951         throw new IOException("A " + blockType + " block written to a "
952             + "stream twice, first at offset " + startOffset + ", then at "
953             + offset);
954       }
955       startOffset = offset;
956 
957       finishBlockAndWriteHeaderAndData((DataOutputStream) out);
958     }
959 
960     /**
961      * Writes the header and the compressed data of this block (or uncompressed
962      * data when not using compression) into the given stream. Can be called in
963      * the "writing" state or in the "block ready" state. If called in the
964      * "writing" state, transitions the writer to the "block ready" state.
965      *
966      * @param out the output stream to write the
967      * @throws IOException
968      */
969     protected void finishBlockAndWriteHeaderAndData(DataOutputStream out)
970       throws IOException {
971       ensureBlockReady();
972       out.write(onDiskBytesWithHeader);
973       out.write(onDiskChecksum);
974     }
975 
976     /**
977      * Returns the header or the compressed data (or uncompressed data when not
978      * using compression) as a byte array. Can be called in the "writing" state
979      * or in the "block ready" state. If called in the "writing" state,
980      * transitions the writer to the "block ready" state. This returns
981      * the header + data + checksums stored on disk.
982      *
983      * @return header and data as they would be stored on disk in a byte array
984      * @throws IOException
985      */
986     byte[] getHeaderAndDataForTest() throws IOException {
987       ensureBlockReady();
988       // This is not very optimal, because we are doing an extra copy.
989       // But this method is used only by unit tests.
990       byte[] output =
991           new byte[onDiskBytesWithHeader.length
992               + onDiskChecksum.length];
993       System.arraycopy(onDiskBytesWithHeader, 0, output, 0,
994           onDiskBytesWithHeader.length);
995       System.arraycopy(onDiskChecksum, 0, output,
996           onDiskBytesWithHeader.length, onDiskChecksum.length);
997       return output;
998     }
999 
1000     /**
1001      * Releases resources used by this writer.
1002      */
1003     public void release() {
1004       if (dataBlockEncodingCtx != null) {
1005         dataBlockEncodingCtx.close();
1006         dataBlockEncodingCtx = null;
1007       }
1008       if (defaultBlockEncodingCtx != null) {
1009         defaultBlockEncodingCtx.close();
1010         defaultBlockEncodingCtx = null;
1011       }
1012     }
1013 
1014     /**
1015      * Returns the on-disk size of the data portion of the block. This is the
1016      * compressed size if compression is enabled. Can only be called in the
1017      * "block ready" state. Header is not compressed, and its size is not
1018      * included in the return value.
1019      *
1020      * @return the on-disk size of the block, not including the header.
1021      */
1022     int getOnDiskSizeWithoutHeader() {
1023       expectState(State.BLOCK_READY);
1024       return onDiskBytesWithHeader.length + onDiskChecksum.length - HConstants.HFILEBLOCK_HEADER_SIZE;
1025     }
1026 
1027     /**
1028      * Returns the on-disk size of the block. Can only be called in the
1029      * "block ready" state.
1030      *
1031      * @return the on-disk size of the block ready to be written, including the
1032      *         header size, the data and the checksum data.
1033      */
1034     int getOnDiskSizeWithHeader() {
1035       expectState(State.BLOCK_READY);
1036       return onDiskBytesWithHeader.length + onDiskChecksum.length;
1037     }
1038 
1039     /**
1040      * The uncompressed size of the block data. Does not include header size.
1041      */
1042     int getUncompressedSizeWithoutHeader() {
1043       expectState(State.BLOCK_READY);
1044       return uncompressedBytesWithHeader.length - HConstants.HFILEBLOCK_HEADER_SIZE;
1045     }
1046 
1047     /**
1048      * The uncompressed size of the block data, including header size.
1049      */
1050     int getUncompressedSizeWithHeader() {
1051       expectState(State.BLOCK_READY);
1052       return uncompressedBytesWithHeader.length;
1053     }
1054 
1055     /** @return true if a block is being written  */
1056     public boolean isWriting() {
1057       return state == State.WRITING;
1058     }
1059 
1060     /**
1061      * Returns the number of bytes written into the current block so far, or
1062      * zero if not writing the block at the moment. Note that this will return
1063      * zero in the "block ready" state as well.
1064      *
1065      * @return the number of bytes written
1066      */
1067     public int blockSizeWritten() {
1068       if (state != State.WRITING) return 0;
1069       return this.unencodedDataSizeWritten;
1070     }
1071 
1072     /**
1073      * Returns the header followed by the uncompressed data, even if using
1074      * compression. This is needed for storing uncompressed blocks in the block
1075      * cache. Can be called in the "writing" state or the "block ready" state.
1076      * Returns only the header and data, does not include checksum data.
1077      *
1078      * @return uncompressed block bytes for caching on write
1079      */
1080     ByteBuffer getUncompressedBufferWithHeader() {
1081       expectState(State.BLOCK_READY);
1082       return ByteBuffer.wrap(uncompressedBytesWithHeader);
1083     }
1084 
1085     /**
1086      * Returns the header followed by the on-disk (compressed/encoded/encrypted) data. This is
1087      * needed for storing packed blocks in the block cache. Expects calling semantics identical to
1088      * {@link #getUncompressedBufferWithHeader()}. Returns only the header and data,
1089      * Does not include checksum data.
1090      *
1091      * @return packed block bytes for caching on write
1092      */
1093     ByteBuffer getOnDiskBufferWithHeader() {
1094       expectState(State.BLOCK_READY);
1095       return ByteBuffer.wrap(onDiskBytesWithHeader);
1096     }
1097 
1098     private void expectState(State expectedState) {
1099       if (state != expectedState) {
1100         throw new IllegalStateException("Expected state: " + expectedState +
1101             ", actual state: " + state);
1102       }
1103     }
1104 
1105     /**
1106      * Takes the given {@link BlockWritable} instance, creates a new block of
1107      * its appropriate type, writes the writable into this block, and flushes
1108      * the block into the output stream. The writer is instructed not to buffer
1109      * uncompressed bytes for cache-on-write.
1110      *
1111      * @param bw the block-writable object to write as a block
1112      * @param out the file system output stream
1113      * @throws IOException
1114      */
1115     public void writeBlock(BlockWritable bw, FSDataOutputStream out)
1116         throws IOException {
1117       bw.writeToBlock(startWriting(bw.getBlockType()));
1118       writeHeaderAndData(out);
1119     }
1120 
1121     /**
1122      * Creates a new HFileBlock. Checksums have already been validated, so
1123      * the byte buffer passed into the constructor of this newly created
1124      * block does not have checksum data even though the header minor 
1125      * version is MINOR_VERSION_WITH_CHECKSUM. This is indicated by setting a
1126      * 0 value in bytesPerChecksum.
1127      */
1128     public HFileBlock getBlockForCaching(CacheConfig cacheConf) {
1129       HFileContext newContext = new HFileContextBuilder()
1130                                 .withBlockSize(fileContext.getBlocksize())
1131                                 .withBytesPerCheckSum(0)
1132                                 .withChecksumType(ChecksumType.NULL) // no checksums in cached data
1133                                 .withCompression(fileContext.getCompression())
1134                                 .withDataBlockEncoding(fileContext.getDataBlockEncoding())
1135                                 .withHBaseCheckSum(fileContext.isUseHBaseChecksum())
1136                                 .withCompressTags(fileContext.isCompressTags())
1137                                 .withIncludesMvcc(fileContext.isIncludesMvcc())
1138                                 .withIncludesTags(fileContext.isIncludesTags())
1139                                 .build();
1140       return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(),
1141           getUncompressedSizeWithoutHeader(), prevOffset,
1142           cacheConf.shouldCacheCompressed(blockType.getCategory()) ?
1143             getOnDiskBufferWithHeader() :
1144             getUncompressedBufferWithHeader(),
1145           DONT_FILL_HEADER, startOffset,
1146           onDiskBytesWithHeader.length + onDiskChecksum.length, newContext);
1147     }
1148   }
1149 
1150   /** Something that can be written into a block. */
1151   public interface BlockWritable {
1152 
1153     /** The type of block this data should use. */
1154     BlockType getBlockType();
1155 
1156     /**
1157      * Writes the block to the provided stream. Must not write any magic
1158      * records.
1159      *
1160      * @param out a stream to write uncompressed data into
1161      */
1162     void writeToBlock(DataOutput out) throws IOException;
1163   }
1164 
1165   // Block readers and writers
1166 
1167   /** An interface allowing to iterate {@link HFileBlock}s. */
1168   public interface BlockIterator {
1169 
1170     /**
1171      * Get the next block, or null if there are no more blocks to iterate.
1172      */
1173     HFileBlock nextBlock() throws IOException;
1174 
1175     /**
1176      * Similar to {@link #nextBlock()} but checks block type, throws an
1177      * exception if incorrect, and returns the HFile block
1178      */
1179     HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException;
1180   }
1181 
1182   /** A full-fledged reader with iteration ability. */
1183   public interface FSReader {
1184 
1185     /**
1186      * Reads the block at the given offset in the file with the given on-disk
1187      * size and uncompressed size.
1188      *
1189      * @param offset
1190      * @param onDiskSize the on-disk size of the entire block, including all
1191      *          applicable headers, or -1 if unknown
1192      * @param uncompressedSize the uncompressed size of the compressed part of
1193      *          the block, or -1 if unknown
1194      * @return the newly read block
1195      */
1196     HFileBlock readBlockData(long offset, long onDiskSize,
1197         int uncompressedSize, boolean pread) throws IOException;
1198 
1199     /**
1200      * Creates a block iterator over the given portion of the {@link HFile}.
1201      * The iterator returns blocks starting with offset such that offset <=
1202      * startOffset < endOffset. Returned blocks are always unpacked.
1203      *
1204      * @param startOffset the offset of the block to start iteration with
1205      * @param endOffset the offset to end iteration at (exclusive)
1206      * @return an iterator of blocks between the two given offsets
1207      */
1208     BlockIterator blockRange(long startOffset, long endOffset);
1209 
1210     /** Closes the backing streams */
1211     void closeStreams() throws IOException;
1212 
1213     /** Get a decoder for {@link BlockType#ENCODED_DATA} blocks from this file. */
1214     HFileBlockDecodingContext getBlockDecodingContext();
1215 
1216     /** Get the default decoder for blocks from this file. */
1217     HFileBlockDecodingContext getDefaultBlockDecodingContext();
1218   }
1219 
1220   /**
1221    * A common implementation of some methods of {@link FSReader} and some
1222    * tools for implementing HFile format version-specific block readers.
1223    */
1224   private abstract static class AbstractFSReader implements FSReader {
1225     /** Compression algorithm used by the {@link HFile} */
1226 
1227     /** The size of the file we are reading from, or -1 if unknown. */
1228     protected long fileSize;
1229 
1230     /** The size of the header */
1231     protected final int hdrSize;
1232 
1233     /** The filesystem used to access data */
1234     protected HFileSystem hfs;
1235 
1236     /** The path (if any) where this data is coming from */
1237     protected Path path;
1238 
1239     private final Lock streamLock = new ReentrantLock();
1240 
1241     /** The default buffer size for our buffered streams */
1242     public static final int DEFAULT_BUFFER_SIZE = 1 << 20;
1243 
1244     protected HFileContext fileContext;
1245 
1246     public AbstractFSReader(long fileSize, HFileSystem hfs, Path path, HFileContext fileContext)
1247         throws IOException {
1248       this.fileSize = fileSize;
1249       this.hfs = hfs;
1250       this.path = path;
1251       this.fileContext = fileContext;
1252       this.hdrSize = headerSize(fileContext.isUseHBaseChecksum());
1253     }
1254 
1255     @Override
1256     public BlockIterator blockRange(final long startOffset,
1257         final long endOffset) {
1258       final FSReader owner = this; // handle for inner class
1259       return new BlockIterator() {
1260         private long offset = startOffset;
1261 
1262         @Override
1263         public HFileBlock nextBlock() throws IOException {
1264           if (offset >= endOffset)
1265             return null;
1266           HFileBlock b = readBlockData(offset, -1, -1, false);
1267           offset += b.getOnDiskSizeWithHeader();
1268           return b.unpack(fileContext, owner);
1269         }
1270 
1271         @Override
1272         public HFileBlock nextBlockWithBlockType(BlockType blockType)
1273             throws IOException {
1274           HFileBlock blk = nextBlock();
1275           if (blk.getBlockType() != blockType) {
1276             throw new IOException("Expected block of type " + blockType
1277                 + " but found " + blk.getBlockType());
1278           }
1279           return blk;
1280         }
1281       };
1282     }
1283 
1284     /**
1285      * Does a positional read or a seek and read into the given buffer. Returns
1286      * the on-disk size of the next block, or -1 if it could not be determined.
1287      *
1288      * @param dest destination buffer
1289      * @param destOffset offset in the destination buffer
1290      * @param size size of the block to be read
1291      * @param peekIntoNextBlock whether to read the next block's on-disk size
1292      * @param fileOffset position in the stream to read at
1293      * @param pread whether we should do a positional read
1294      * @param istream The input source of data
1295      * @return the on-disk size of the next block with header size included, or
1296      *         -1 if it could not be determined
1297      * @throws IOException
1298      */
1299     protected int readAtOffset(FSDataInputStream istream,
1300         byte[] dest, int destOffset, int size,
1301         boolean peekIntoNextBlock, long fileOffset, boolean pread)
1302         throws IOException {
1303       if (peekIntoNextBlock &&
1304           destOffset + size + hdrSize > dest.length) {
1305         // We are asked to read the next block's header as well, but there is
1306         // not enough room in the array.
1307         throw new IOException("Attempted to read " + size + " bytes and " +
1308             hdrSize + " bytes of next header into a " + dest.length +
1309             "-byte array at offset " + destOffset);
1310       }
1311 
1312       if (!pread && streamLock.tryLock()) {
1313         // Seek + read. Better for scanning.
1314         try {
1315           istream.seek(fileOffset);
1316 
1317           long realOffset = istream.getPos();
1318           if (realOffset != fileOffset) {
1319             throw new IOException("Tried to seek to " + fileOffset + " to "
1320                 + "read " + size + " bytes, but pos=" + realOffset
1321                 + " after seek");
1322           }
1323 
1324           if (!peekIntoNextBlock) {
1325             IOUtils.readFully(istream, dest, destOffset, size);
1326             return -1;
1327           }
1328 
1329           // Try to read the next block header.
1330           if (!readWithExtra(istream, dest, destOffset, size, hdrSize))
1331             return -1;
1332         } finally {
1333           streamLock.unlock();
1334         }
1335       } else {
1336         // Positional read. Better for random reads; or when the streamLock is already locked.
1337         int extraSize = peekIntoNextBlock ? hdrSize : 0;
1338         int ret = istream.read(fileOffset, dest, destOffset, size + extraSize);
1339         if (ret < size) {
1340           throw new IOException("Positional read of " + size + " bytes " +
1341               "failed at offset " + fileOffset + " (returned " + ret + ")");
1342         }
1343 
1344         if (ret == size || ret < size + extraSize) {
1345           // Could not read the next block's header, or did not try.
1346           return -1;
1347         }
1348       }
1349 
1350       assert peekIntoNextBlock;
1351       return Bytes.toInt(dest, destOffset + size + BlockType.MAGIC_LENGTH) + hdrSize;
1352     }
1353 
1354   }
1355 
1356   /**
1357    * We always prefetch the header of the next block, so that we know its
1358    * on-disk size in advance and can read it in one operation.
1359    */
1360   private static class PrefetchedHeader {
1361     long offset = -1;
1362     byte[] header = new byte[HConstants.HFILEBLOCK_HEADER_SIZE];
1363     ByteBuffer buf = ByteBuffer.wrap(header, 0, HConstants.HFILEBLOCK_HEADER_SIZE);
1364   }
1365 
1366   /** Reads version 2 blocks from the filesystem. */
1367   static class FSReaderV2 extends AbstractFSReader {
1368     /** The file system stream of the underlying {@link HFile} that 
1369      * does or doesn't do checksum validations in the filesystem */
1370     protected FSDataInputStreamWrapper streamWrapper;
1371 
1372     private HFileBlockDecodingContext encodedBlockDecodingCtx;
1373 
1374     /** Default context used when BlockType != {@link BlockType#ENCODED_DATA}. */
1375     private final HFileBlockDefaultDecodingContext defaultDecodingCtx;
1376 
1377     private ThreadLocal<PrefetchedHeader> prefetchedHeaderForThread =
1378         new ThreadLocal<PrefetchedHeader>() {
1379           @Override
1380           public PrefetchedHeader initialValue() {
1381             return new PrefetchedHeader();
1382           }
1383         };
1384 
1385     public FSReaderV2(FSDataInputStreamWrapper stream, long fileSize, HFileSystem hfs, Path path,
1386         HFileContext fileContext) throws IOException {
1387       super(fileSize, hfs, path, fileContext);
1388       this.streamWrapper = stream;
1389       // Older versions of HBase didn't support checksum.
1390       this.streamWrapper.prepareForBlockReader(!fileContext.isUseHBaseChecksum());
1391       defaultDecodingCtx = new HFileBlockDefaultDecodingContext(fileContext);
1392       encodedBlockDecodingCtx = defaultDecodingCtx;
1393     }
1394 
1395     /**
1396      * A constructor that reads files with the latest minor version.
1397      * This is used by unit tests only.
1398      */
1399     FSReaderV2(FSDataInputStream istream, long fileSize, HFileContext fileContext) throws IOException {
1400       this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext);
1401     }
1402 
1403     /**
1404      * Reads a version 2 block. Tries to do as little memory allocation as
1405      * possible, using the provided on-disk size.
1406      *
1407      * @param offset the offset in the stream to read at
1408      * @param onDiskSizeWithHeaderL the on-disk size of the block, including
1409      *          the header, or -1 if unknown
1410      * @param uncompressedSize the uncompressed size of the the block. Always
1411      *          expected to be -1. This parameter is only used in version 1.
1412      * @param pread whether to use a positional read
1413      */
1414     @Override
1415     public HFileBlock readBlockData(long offset, long onDiskSizeWithHeaderL,
1416         int uncompressedSize, boolean pread) throws IOException {
1417 
1418       // get a copy of the current state of whether to validate
1419       // hbase checksums or not for this read call. This is not 
1420       // thread-safe but the one constaint is that if we decide 
1421       // to skip hbase checksum verification then we are 
1422       // guaranteed to use hdfs checksum verification.
1423       boolean doVerificationThruHBaseChecksum = streamWrapper.shouldUseHBaseChecksum();
1424       FSDataInputStream is = streamWrapper.getStream(doVerificationThruHBaseChecksum);
1425 
1426       HFileBlock blk = readBlockDataInternal(is, offset, 
1427                          onDiskSizeWithHeaderL, 
1428                          uncompressedSize, pread,
1429                          doVerificationThruHBaseChecksum);
1430       if (blk == null) {
1431         HFile.LOG.warn("HBase checksum verification failed for file " +
1432                        path + " at offset " +
1433                        offset + " filesize " + fileSize +
1434                        ". Retrying read with HDFS checksums turned on...");
1435 
1436         if (!doVerificationThruHBaseChecksum) {
1437           String msg = "HBase checksum verification failed for file " +
1438                        path + " at offset " +
1439                        offset + " filesize " + fileSize + 
1440                        " but this cannot happen because doVerify is " +
1441                        doVerificationThruHBaseChecksum;
1442           HFile.LOG.warn(msg);
1443           throw new IOException(msg); // cannot happen case here
1444         }
1445         HFile.checksumFailures.incrementAndGet(); // update metrics
1446 
1447         // If we have a checksum failure, we fall back into a mode where
1448         // the next few reads use HDFS level checksums. We aim to make the
1449         // next CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD reads avoid
1450         // hbase checksum verification, but since this value is set without
1451         // holding any locks, it can so happen that we might actually do
1452         // a few more than precisely this number.
1453         is = this.streamWrapper.fallbackToFsChecksum(CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD);
1454         doVerificationThruHBaseChecksum = false;
1455         blk = readBlockDataInternal(is, offset, onDiskSizeWithHeaderL,
1456                                     uncompressedSize, pread,
1457                                     doVerificationThruHBaseChecksum);
1458         if (blk != null) {
1459           HFile.LOG.warn("HDFS checksum verification suceeded for file " +
1460                          path + " at offset " +
1461                          offset + " filesize " + fileSize);
1462         }
1463       } 
1464       if (blk == null && !doVerificationThruHBaseChecksum) {
1465         String msg = "readBlockData failed, possibly due to " +
1466                      "checksum verification failed for file " + path +
1467                      " at offset " + offset + " filesize " + fileSize;
1468         HFile.LOG.warn(msg);
1469         throw new IOException(msg);
1470       }
1471 
1472       // If there is a checksum mismatch earlier, then retry with 
1473       // HBase checksums switched off and use HDFS checksum verification.
1474       // This triggers HDFS to detect and fix corrupt replicas. The
1475       // next checksumOffCount read requests will use HDFS checksums.
1476       // The decrementing of this.checksumOffCount is not thread-safe,
1477       // but it is harmless because eventually checksumOffCount will be
1478       // a negative number.
1479       streamWrapper.checksumOk();
1480       return blk;
1481     }
1482 
1483     /**
1484      * Reads a version 2 block. 
1485      *
1486      * @param offset the offset in the stream to read at
1487      * @param onDiskSizeWithHeaderL the on-disk size of the block, including
1488      *          the header, or -1 if unknown
1489      * @param uncompressedSize the uncompressed size of the the block. Always
1490      *          expected to be -1. This parameter is only used in version 1.
1491      * @param pread whether to use a positional read
1492      * @param verifyChecksum Whether to use HBase checksums. 
1493      *        If HBase checksum is switched off, then use HDFS checksum.
1494      * @return the HFileBlock or null if there is a HBase checksum mismatch
1495      */
1496     private HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, 
1497         long onDiskSizeWithHeaderL, int uncompressedSize, boolean pread,
1498         boolean verifyChecksum) throws IOException {
1499       if (offset < 0) {
1500         throw new IOException("Invalid offset=" + offset + " trying to read "
1501             + "block (onDiskSize=" + onDiskSizeWithHeaderL
1502             + ", uncompressedSize=" + uncompressedSize + ")");
1503       }
1504       if (uncompressedSize != -1) {
1505         throw new IOException("Version 2 block reader API does not need " +
1506             "the uncompressed size parameter");
1507       }
1508 
1509       if ((onDiskSizeWithHeaderL < hdrSize && onDiskSizeWithHeaderL != -1)
1510           || onDiskSizeWithHeaderL >= Integer.MAX_VALUE) {
1511         throw new IOException("Invalid onDisksize=" + onDiskSizeWithHeaderL
1512             + ": expected to be at least " + hdrSize
1513             + " and at most " + Integer.MAX_VALUE + ", or -1 (offset="
1514             + offset + ", uncompressedSize=" + uncompressedSize + ")");
1515       }
1516 
1517       int onDiskSizeWithHeader = (int) onDiskSizeWithHeaderL;
1518       // See if we can avoid reading the header. This is desirable, because
1519       // we will not incur a backward seek operation if we have already
1520       // read this block's header as part of the previous read's look-ahead.
1521       // And we also want to skip reading the header again if it has already
1522       // been read.
1523       PrefetchedHeader prefetchedHeader = prefetchedHeaderForThread.get();
1524       ByteBuffer headerBuf = prefetchedHeader.offset == offset ?
1525           prefetchedHeader.buf : null;
1526 
1527       int nextBlockOnDiskSize = 0;
1528       // Allocate enough space to fit the next block's header too.
1529       byte[] onDiskBlock = null;
1530 
1531       HFileBlock b = null;
1532       if (onDiskSizeWithHeader > 0) {
1533         // We know the total on-disk size. Read the entire block into memory,
1534         // then parse the header. This code path is used when
1535         // doing a random read operation relying on the block index, as well as
1536         // when the client knows the on-disk size from peeking into the next
1537         // block's header (e.g. this block's header) when reading the previous
1538         // block. This is the faster and more preferable case.
1539 
1540         // Size that we have to skip in case we have already read the header.
1541         int preReadHeaderSize = headerBuf == null ? 0 : hdrSize;
1542         onDiskBlock = new byte[onDiskSizeWithHeader + hdrSize]; // room for this block plus the
1543                                                                 // next block's header
1544         nextBlockOnDiskSize = readAtOffset(is, onDiskBlock,
1545             preReadHeaderSize, onDiskSizeWithHeader - preReadHeaderSize,
1546             true, offset + preReadHeaderSize, pread);
1547         if (headerBuf != null) {
1548           // the header has been read when reading the previous block, copy
1549           // to this block's header
1550           System.arraycopy(headerBuf.array(),
1551               headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize);
1552         } else {
1553           headerBuf = ByteBuffer.wrap(onDiskBlock, 0, hdrSize);
1554         }
1555         // We know the total on-disk size but not the uncompressed size. Read
1556         // the entire block into memory, then parse the header. Here we have
1557         // already read the block's header
1558         try {
1559           b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum());
1560         } catch (IOException ex) {
1561           // Seen in load testing. Provide comprehensive debug info.
1562           throw new IOException("Failed to read compressed block at "
1563               + offset
1564               + ", onDiskSizeWithoutHeader="
1565               + onDiskSizeWithHeader
1566               + ", preReadHeaderSize="
1567               + hdrSize
1568               + ", header.length="
1569               + prefetchedHeader.header.length
1570               + ", header bytes: "
1571               + Bytes.toStringBinary(prefetchedHeader.header, 0,
1572                   hdrSize), ex);
1573         }
1574         // if the caller specifies a onDiskSizeWithHeader, validate it.
1575         int onDiskSizeWithoutHeader = onDiskSizeWithHeader - hdrSize;
1576         assert onDiskSizeWithoutHeader >= 0;
1577         b.validateOnDiskSizeWithoutHeader(onDiskSizeWithoutHeader);
1578       } else {
1579         // Check headerBuf to see if we have read this block's header as part of
1580         // reading the previous block. This is an optimization of peeking into
1581         // the next block's header (e.g.this block's header) when reading the
1582         // previous block. This is the faster and more preferable case. If the
1583         // header is already there, don't read the header again.
1584 
1585         // Unfortunately, we still have to do a separate read operation to
1586         // read the header.
1587         if (headerBuf == null) {
1588           // From the header, determine the on-disk size of the given hfile
1589           // block, and read the remaining data, thereby incurring two read
1590           // operations. This might happen when we are doing the first read
1591           // in a series of reads or a random read, and we don't have access
1592           // to the block index. This is costly and should happen very rarely.
1593           headerBuf = ByteBuffer.allocate(hdrSize);
1594           readAtOffset(is, headerBuf.array(), headerBuf.arrayOffset(),
1595               hdrSize, false, offset, pread);
1596         }
1597         b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum());
1598         onDiskBlock = new byte[b.getOnDiskSizeWithHeader() + hdrSize];
1599         System.arraycopy(headerBuf.array(), headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize);
1600         nextBlockOnDiskSize =
1601           readAtOffset(is, onDiskBlock, hdrSize, b.getOnDiskSizeWithHeader()
1602               - hdrSize, true, offset + hdrSize, pread);
1603         onDiskSizeWithHeader = b.onDiskSizeWithoutHeader + hdrSize;
1604       }
1605 
1606       if (!fileContext.isCompressedOrEncrypted()) {
1607         b.assumeUncompressed();
1608       }
1609 
1610       if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) {
1611         return null;             // checksum mismatch
1612       }
1613 
1614       // The onDiskBlock will become the headerAndDataBuffer for this block.
1615       // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
1616       // contains the header of next block, so no need to set next
1617       // block's header in it.
1618       b = new HFileBlock(ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader),
1619         this.fileContext.isUseHBaseChecksum());
1620 
1621       b.nextBlockOnDiskSizeWithHeader = nextBlockOnDiskSize;
1622 
1623       // Set prefetched header
1624       if (b.hasNextBlockHeader()) {
1625         prefetchedHeader.offset = offset + b.getOnDiskSizeWithHeader();
1626         System.arraycopy(onDiskBlock, onDiskSizeWithHeader,
1627             prefetchedHeader.header, 0, hdrSize);
1628       }
1629 
1630       b.offset = offset;
1631       b.fileContext.setIncludesTags(this.fileContext.isIncludesTags());
1632       b.fileContext.setIncludesMvcc(this.fileContext.isIncludesMvcc());
1633       return b;
1634     }
1635 
1636     void setIncludesMemstoreTS(boolean includesMemstoreTS) {
1637       this.fileContext.setIncludesMvcc(includesMemstoreTS);
1638     }
1639 
1640     void setDataBlockEncoder(HFileDataBlockEncoder encoder) {
1641       encodedBlockDecodingCtx = encoder.newDataBlockDecodingContext(this.fileContext);
1642     }
1643 
1644     @Override
1645     public HFileBlockDecodingContext getBlockDecodingContext() {
1646       return this.encodedBlockDecodingCtx;
1647     }
1648 
1649     @Override
1650     public HFileBlockDecodingContext getDefaultBlockDecodingContext() {
1651       return this.defaultDecodingCtx;
1652     }
1653 
1654     /**
1655      * Generates the checksum for the header as well as the data and
1656      * then validates that it matches the value stored in the header.
1657      * If there is a checksum mismatch, then return false. Otherwise
1658      * return true.
1659      */
1660     protected boolean validateBlockChecksum(HFileBlock block,  byte[] data, int hdrSize)
1661         throws IOException {
1662       return ChecksumUtil.validateBlockChecksum(path, block, data, hdrSize);
1663     }
1664 
1665     @Override
1666     public void closeStreams() throws IOException {
1667       streamWrapper.close();
1668     }
1669 
1670     @Override
1671     public String toString() {
1672       return "FSReaderV2 [ hfs=" + hfs + " path=" + path + " fileContext=" + fileContext + " ]";
1673     }
1674   }
1675 
1676   @Override
1677   public int getSerializedLength() {
1678     if (buf != null) {
1679       // include extra bytes for the next header when it's available.
1680       int extraSpace = hasNextBlockHeader() ? headerSize() : 0;
1681       return this.buf.limit() + extraSpace + HFileBlock.EXTRA_SERIALIZATION_SPACE;
1682     }
1683     return 0;
1684   }
1685 
1686   @Override
1687   public void serialize(ByteBuffer destination) {
1688     // assumes HeapByteBuffer
1689     destination.put(this.buf.array(), this.buf.arrayOffset(),
1690       getSerializedLength() - EXTRA_SERIALIZATION_SPACE);
1691     serializeExtraInfo(destination);
1692   }
1693 
1694   public void serializeExtraInfo(ByteBuffer destination) {
1695     destination.put(this.fileContext.isUseHBaseChecksum() ? (byte) 1 : (byte) 0);
1696     destination.putLong(this.offset);
1697     destination.putInt(this.nextBlockOnDiskSizeWithHeader);
1698     destination.rewind();
1699   }
1700 
1701   @Override
1702   public CacheableDeserializer<Cacheable> getDeserializer() {
1703     return HFileBlock.blockDeserializer;
1704   }
1705 
1706   @Override
1707   public boolean equals(Object comparison) {
1708     if (this == comparison) {
1709       return true;
1710     }
1711     if (comparison == null) {
1712       return false;
1713     }
1714     if (comparison.getClass() != this.getClass()) {
1715       return false;
1716     }
1717 
1718     HFileBlock castedComparison = (HFileBlock) comparison;
1719 
1720     if (castedComparison.blockType != this.blockType) {
1721       return false;
1722     }
1723     if (castedComparison.nextBlockOnDiskSizeWithHeader != this.nextBlockOnDiskSizeWithHeader) {
1724       return false;
1725     }
1726     if (castedComparison.offset != this.offset) {
1727       return false;
1728     }
1729     if (castedComparison.onDiskSizeWithoutHeader != this.onDiskSizeWithoutHeader) {
1730       return false;
1731     }
1732     if (castedComparison.prevBlockOffset != this.prevBlockOffset) {
1733       return false;
1734     }
1735     if (castedComparison.uncompressedSizeWithoutHeader != this.uncompressedSizeWithoutHeader) {
1736       return false;
1737     }
1738     if (Bytes.compareTo(this.buf.array(), this.buf.arrayOffset(), this.buf.limit(),
1739       castedComparison.buf.array(), castedComparison.buf.arrayOffset(),
1740       castedComparison.buf.limit()) != 0) {
1741       return false;
1742     }
1743     return true;
1744   }
1745 
1746   public DataBlockEncoding getDataBlockEncoding() {
1747     if (blockType == BlockType.ENCODED_DATA) {
1748       return DataBlockEncoding.getEncodingById(getDataBlockEncodingId());
1749     }
1750     return DataBlockEncoding.NONE;
1751   }
1752 
1753   byte getChecksumType() {
1754     return this.fileContext.getChecksumType().getCode();
1755   }
1756 
1757   int getBytesPerChecksum() {
1758     return this.fileContext.getBytesPerChecksum();
1759   }
1760 
1761   /** @return the size of data on disk + header. Excludes checksum. */
1762   int getOnDiskDataSizeWithHeader() {
1763     return this.onDiskDataSizeWithHeader;
1764   }
1765 
1766   /** 
1767    * Calcuate the number of bytes required to store all the checksums
1768    * for this block. Each checksum value is a 4 byte integer.
1769    */
1770   int totalChecksumBytes() {
1771     // If the hfile block has minorVersion 0, then there are no checksum
1772     // data to validate. Similarly, a zero value in this.bytesPerChecksum
1773     // indicates that cached blocks do not have checksum data because
1774     // checksums were already validated when the block was read from disk.
1775     if (!fileContext.isUseHBaseChecksum() || this.fileContext.getBytesPerChecksum() == 0) {
1776       return 0;
1777     }
1778     return (int)ChecksumUtil.numBytes(onDiskDataSizeWithHeader, this.fileContext.getBytesPerChecksum());
1779   }
1780 
1781   /**
1782    * Returns the size of this block header.
1783    */
1784   public int headerSize() {
1785     return headerSize(this.fileContext.isUseHBaseChecksum());
1786   }
1787 
1788   /**
1789    * Maps a minor version to the size of the header.
1790    */
1791   public static int headerSize(boolean usesHBaseChecksum) {
1792     if (usesHBaseChecksum) {
1793       return HConstants.HFILEBLOCK_HEADER_SIZE;
1794     }
1795     return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
1796   }
1797 
1798   /**
1799    * Return the appropriate DUMMY_HEADER for the minor version
1800    */
1801   public byte[] getDummyHeaderForVersion() {
1802     return getDummyHeaderForVersion(this.fileContext.isUseHBaseChecksum());
1803   }
1804 
1805   /**
1806    * Return the appropriate DUMMY_HEADER for the minor version
1807    */
1808   static private byte[] getDummyHeaderForVersion(boolean usesHBaseChecksum) {
1809     if (usesHBaseChecksum) {
1810       return HConstants.HFILEBLOCK_DUMMY_HEADER;
1811     }
1812     return DUMMY_HEADER_NO_CHECKSUM;
1813   }
1814 
1815   /**
1816    * @return the HFileContext used to create this HFileBlock. Not necessary the
1817    * fileContext for the file from which this block's data was originally read.
1818    */
1819   public HFileContext getHFileContext() {
1820     return this.fileContext;
1821   }
1822 
1823   /**
1824    * Convert the contents of the block header into a human readable string.
1825    * This is mostly helpful for debugging. This assumes that the block
1826    * has minor version > 0.
1827    */
1828   static String toStringHeader(ByteBuffer buf) throws IOException {
1829     int offset = buf.arrayOffset();
1830     byte[] b = buf.array();
1831     long magic = Bytes.toLong(b, offset);
1832     BlockType bt = BlockType.read(buf);
1833     offset += Bytes.SIZEOF_LONG;
1834     int compressedBlockSizeNoHeader = Bytes.toInt(b, offset);
1835     offset += Bytes.SIZEOF_INT;
1836     int uncompressedBlockSizeNoHeader = Bytes.toInt(b, offset);
1837     offset += Bytes.SIZEOF_INT;
1838     long prevBlockOffset = Bytes.toLong(b, offset); 
1839     offset += Bytes.SIZEOF_LONG;
1840     byte cksumtype = b[offset];
1841     offset += Bytes.SIZEOF_BYTE;
1842     long bytesPerChecksum = Bytes.toInt(b, offset); 
1843     offset += Bytes.SIZEOF_INT;
1844     long onDiskDataSizeWithHeader = Bytes.toInt(b, offset); 
1845     offset += Bytes.SIZEOF_INT;
1846     return " Header dump: magic: " + magic +
1847                    " blockType " + bt +
1848                    " compressedBlockSizeNoHeader " + 
1849                    compressedBlockSizeNoHeader +
1850                    " uncompressedBlockSizeNoHeader " + 
1851                    uncompressedBlockSizeNoHeader +
1852                    " prevBlockOffset " + prevBlockOffset +
1853                    " checksumType " + ChecksumType.codeToType(cksumtype) +
1854                    " bytesPerChecksum " + bytesPerChecksum +
1855                    " onDiskDataSizeWithHeader " + onDiskDataSizeWithHeader;
1856   }
1857 }