View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.ByteArrayInputStream;
21  import java.io.ByteArrayOutputStream;
22  import java.io.DataInputStream;
23  import java.io.DataOutput;
24  import java.io.DataOutputStream;
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.nio.ByteBuffer;
28  import java.util.concurrent.locks.Lock;
29  import java.util.concurrent.locks.ReentrantLock;
30  
31  import org.apache.hadoop.classification.InterfaceAudience;
32  import org.apache.hadoop.fs.FSDataInputStream;
33  import org.apache.hadoop.fs.FSDataOutputStream;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.Cell;
36  import org.apache.hadoop.hbase.HConstants;
37  import org.apache.hadoop.hbase.KeyValue;
38  import org.apache.hadoop.hbase.fs.HFileSystem;
39  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
40  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
41  import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
42  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
43  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
44  import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
45  import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
46  import org.apache.hadoop.hbase.util.Bytes;
47  import org.apache.hadoop.hbase.util.ChecksumType;
48  import org.apache.hadoop.hbase.util.ClassSize;
49  import org.apache.hadoop.hbase.util.CompoundBloomFilter;
50  import org.apache.hadoop.io.IOUtils;
51  
52  import com.google.common.base.Preconditions;
53  
54  /**
55   * Reading {@link HFile} version 1 and 2 blocks, and writing version 2 blocks.
56   * <ul>
57   * <li>In version 1 all blocks are always compressed or uncompressed, as
58   * specified by the {@link HFile}'s compression algorithm, with a type-specific
59   * magic record stored in the beginning of the compressed data (i.e. one needs
60   * to uncompress the compressed block to determine the block type). There is
61   * only a single compression algorithm setting for all blocks. Offset and size
62   * information from the block index are required to read a block.
63   * <li>In version 2 a block is structured as follows:
64   * <ul>
65   * <li>header (see {@link Writer#finishBlock()})
66   * <ul>
67   * <li>Magic record identifying the block type (8 bytes)
68   * <li>Compressed block size, excluding header, including checksum (4 bytes)
69   * <li>Uncompressed block size, excluding header, excluding checksum (4 bytes)
70   * <li>The offset of the previous block of the same type (8 bytes). This is
71   * used to be able to navigate to the previous block without going to the block
72   * <li>For minorVersions >=1, the ordinal describing checksum type (1 byte)
73   * <li>For minorVersions >=1, the number of data bytes/checksum chunk (4 bytes)
74   * <li>For minorVersions >=1, the size of data on disk, including header,
75   * excluding checksums (4 bytes)
76   * </ul>
77   * </li>
78   * <li>Raw/Compressed/Encrypted/Encoded data. The compression algorithm is the
79   * same for all the blocks in the {@link HFile}, similarly to what was done in
80   * version 1.
81   * <li>For minorVersions >=1, a series of 4 byte checksums, one each for
82   * the number of bytes specified by bytesPerChecksum.
83   * </ul>
84   * </ul>
85   */
86  @InterfaceAudience.Private
87  public class HFileBlock implements Cacheable {
88  
89    /**
90     * On a checksum failure on a Reader, these many suceeding read
91     * requests switch back to using hdfs checksums before auto-reenabling
92     * hbase checksum verification.
93     */
94    static final int CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD = 3;
95  
96    public static final boolean FILL_HEADER = true;
97    public static final boolean DONT_FILL_HEADER = false;
98  
99    /**
100    * The size of block header when blockType is {@link BlockType#ENCODED_DATA}.
101    * This extends normal header by adding the id of encoder.
102    */
103   public static final int ENCODED_HEADER_SIZE = HConstants.HFILEBLOCK_HEADER_SIZE
104       + DataBlockEncoding.ID_SIZE;
105 
106   static final byte[] DUMMY_HEADER_NO_CHECKSUM =
107      new byte[HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM];
108 
109   public static final int BYTE_BUFFER_HEAP_SIZE = (int) ClassSize.estimateBase(
110       ByteBuffer.wrap(new byte[0], 0, 0).getClass(), false);
111 
112   // meta.usesHBaseChecksum+offset+nextBlockOnDiskSizeWithHeader
113   public static final int EXTRA_SERIALIZATION_SPACE = Bytes.SIZEOF_BYTE + Bytes.SIZEOF_INT
114       + Bytes.SIZEOF_LONG;
115 
116   /**
117    * Each checksum value is an integer that can be stored in 4 bytes.
118    */
119   static final int CHECKSUM_SIZE = Bytes.SIZEOF_INT;
120 
121   private static final CacheableDeserializer<Cacheable> blockDeserializer =
122       new CacheableDeserializer<Cacheable>() {
123         public HFileBlock deserialize(ByteBuffer buf, boolean reuse) throws IOException{
124           buf.limit(buf.limit() - HFileBlock.EXTRA_SERIALIZATION_SPACE).rewind();
125           ByteBuffer newByteBuffer;
126           if (reuse) {
127             newByteBuffer = buf.slice();
128           } else {
129            newByteBuffer = ByteBuffer.allocate(buf.limit());
130            newByteBuffer.put(buf);
131           }
132           buf.position(buf.limit());
133           buf.limit(buf.limit() + HFileBlock.EXTRA_SERIALIZATION_SPACE);
134           boolean usesChecksum = buf.get() == (byte)1;
135           HFileBlock ourBuffer = new HFileBlock(newByteBuffer, usesChecksum);
136           ourBuffer.offset = buf.getLong();
137           ourBuffer.nextBlockOnDiskSizeWithHeader = buf.getInt();
138           if (ourBuffer.hasNextBlockHeader()) {
139             ourBuffer.buf.limit(ourBuffer.buf.limit() - ourBuffer.headerSize());
140           }
141           return ourBuffer;
142         }
143         
144         @Override
145         public int getDeserialiserIdentifier() {
146           return deserializerIdentifier;
147         }
148 
149         @Override
150         public HFileBlock deserialize(ByteBuffer b) throws IOException {
151           return deserialize(b, false);
152         }
153       };
154   private static final int deserializerIdentifier;
155   static {
156     deserializerIdentifier = CacheableDeserializerIdManager
157         .registerDeserializer(blockDeserializer);
158   }
159 
160   /** Type of block. Header field 0. */
161   private BlockType blockType;
162 
163   /** Size on disk excluding header, including checksum. Header field 1. */
164   private int onDiskSizeWithoutHeader;
165 
166   /** Size of pure data. Does not include header or checksums. Header field 2. */
167   private final int uncompressedSizeWithoutHeader;
168 
169   /** The offset of the previous block on disk. Header field 3. */
170   private final long prevBlockOffset;
171 
172   /**
173    * Size on disk of header + data. Excludes checksum. Header field 6,
174    * OR calculated from {@link #onDiskSizeWithoutHeader} when using HDFS checksum.
175    */
176   private final int onDiskDataSizeWithHeader;
177 
178   /** The in-memory representation of the hfile block */
179   private ByteBuffer buf;
180 
181   /** Meta data that holds meta information on the hfileblock */
182   private HFileContext fileContext;
183 
184   /**
185    * The offset of this block in the file. Populated by the reader for
186    * convenience of access. This offset is not part of the block header.
187    */
188   private long offset = -1;
189 
190   /**
191    * The on-disk size of the next block, including the header, obtained by
192    * peeking into the first {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes of the next block's
193    * header, or -1 if unknown.
194    */
195   private int nextBlockOnDiskSizeWithHeader = -1;
196 
197   /**
198    * Creates a new {@link HFile} block from the given fields. This constructor
199    * is mostly used when the block data has already been read and uncompressed,
200    * and is sitting in a byte buffer. 
201    *
202    * @param blockType the type of this block, see {@link BlockType}
203    * @param onDiskSizeWithoutHeader see {@link #onDiskSizeWithoutHeader}
204    * @param uncompressedSizeWithoutHeader see {@link #uncompressedSizeWithoutHeader}
205    * @param prevBlockOffset see {@link #prevBlockOffset}
206    * @param buf block header ({@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes) followed by
207    *          uncompressed data. This
208    * @param fillHeader when true, parse {@code buf} and override the first 4 header fields.
209    * @param offset the file offset the block was read from
210    * @param onDiskDataSizeWithHeader see {@link #onDiskDataSizeWithHeader}
211    * @param fileContext HFile meta data
212    */
213   HFileBlock(BlockType blockType, int onDiskSizeWithoutHeader, int uncompressedSizeWithoutHeader,
214       long prevBlockOffset, ByteBuffer buf, boolean fillHeader, long offset,
215       int onDiskDataSizeWithHeader, HFileContext fileContext) {
216     this.blockType = blockType;
217     this.onDiskSizeWithoutHeader = onDiskSizeWithoutHeader;
218     this.uncompressedSizeWithoutHeader = uncompressedSizeWithoutHeader;
219     this.prevBlockOffset = prevBlockOffset;
220     this.buf = buf;
221     if (fillHeader)
222       overwriteHeader();
223     this.offset = offset;
224     this.onDiskDataSizeWithHeader = onDiskDataSizeWithHeader;
225     this.fileContext = fileContext;
226     this.buf.rewind();
227   }
228 
229   /**
230    * Copy constructor. Creates a shallow copy of {@code that}'s buffer.
231    */
232   HFileBlock(HFileBlock that) {
233     this.blockType = that.blockType;
234     this.onDiskSizeWithoutHeader = that.onDiskSizeWithoutHeader;
235     this.uncompressedSizeWithoutHeader = that.uncompressedSizeWithoutHeader;
236     this.prevBlockOffset = that.prevBlockOffset;
237     this.buf = that.buf.duplicate();
238     this.offset = that.offset;
239     this.onDiskDataSizeWithHeader = that.onDiskDataSizeWithHeader;
240     this.fileContext = that.fileContext;
241     this.nextBlockOnDiskSizeWithHeader = that.nextBlockOnDiskSizeWithHeader;
242   }
243 
244   /**
245    * Creates a block from an existing buffer starting with a header. Rewinds
246    * and takes ownership of the buffer. By definition of rewind, ignores the
247    * buffer position, but if you slice the buffer beforehand, it will rewind
248    * to that point. The reason this has a minorNumber and not a majorNumber is
249    * because majorNumbers indicate the format of a HFile whereas minorNumbers 
250    * indicate the format inside a HFileBlock.
251    */
252   HFileBlock(ByteBuffer b, boolean usesHBaseChecksum) throws IOException {
253     b.rewind();
254     blockType = BlockType.read(b);
255     onDiskSizeWithoutHeader = b.getInt();
256     uncompressedSizeWithoutHeader = b.getInt();
257     prevBlockOffset = b.getLong();
258     HFileContextBuilder contextBuilder = new HFileContextBuilder();
259     contextBuilder.withHBaseCheckSum(usesHBaseChecksum);
260     if (usesHBaseChecksum) {
261       contextBuilder.withChecksumType(ChecksumType.codeToType(b.get()));
262       contextBuilder.withBytesPerCheckSum(b.getInt());
263       this.onDiskDataSizeWithHeader = b.getInt();
264     } else {
265       contextBuilder.withChecksumType(ChecksumType.NULL);
266       contextBuilder.withBytesPerCheckSum(0);
267       this.onDiskDataSizeWithHeader = onDiskSizeWithoutHeader +
268                                        HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
269     }
270     this.fileContext = contextBuilder.build();
271     buf = b;
272     buf.rewind();
273   }
274 
275   public BlockType getBlockType() {
276     return blockType;
277   }
278 
279   /** @return get data block encoding id that was used to encode this block */
280   public short getDataBlockEncodingId() {
281     if (blockType != BlockType.ENCODED_DATA) {
282       throw new IllegalArgumentException("Querying encoder ID of a block " +
283           "of type other than " + BlockType.ENCODED_DATA + ": " + blockType);
284     }
285     return buf.getShort(headerSize());
286   }
287 
288   /**
289    * @return the on-disk size of header + data part + checksum.
290    */
291   public int getOnDiskSizeWithHeader() {
292     return onDiskSizeWithoutHeader + headerSize();
293   }
294 
295   /**
296    * @return the on-disk size of the data part + checksum (header excluded).
297    */
298   public int getOnDiskSizeWithoutHeader() {
299     return onDiskSizeWithoutHeader;
300   }
301 
302   /**
303    * @return the uncompressed size of data part (header and checksum excluded).
304    */
305    public int getUncompressedSizeWithoutHeader() {
306     return uncompressedSizeWithoutHeader;
307   }
308 
309   /**
310    * @return the offset of the previous block of the same type in the file, or
311    *         -1 if unknown
312    */
313   public long getPrevBlockOffset() {
314     return prevBlockOffset;
315   }
316 
317   /**
318    * Rewinds {@code buf} and writes first 4 header fields. {@code buf} position
319    * is modified as side-effect.
320    */
321   private void overwriteHeader() {
322     buf.rewind();
323     blockType.write(buf);
324     buf.putInt(onDiskSizeWithoutHeader);
325     buf.putInt(uncompressedSizeWithoutHeader);
326     buf.putLong(prevBlockOffset);
327   }
328 
329   /**
330    * Returns a buffer that does not include the header or checksum.
331    *
332    * @return the buffer with header skipped and checksum omitted.
333    */
334   public ByteBuffer getBufferWithoutHeader() {
335     return ByteBuffer.wrap(buf.array(), buf.arrayOffset() + headerSize(),
336         buf.limit() - headerSize() - totalChecksumBytes()).slice();
337   }
338 
339   /**
340    * Returns the buffer this block stores internally. The clients must not
341    * modify the buffer object. This method has to be public because it is
342    * used in {@link CompoundBloomFilter} to avoid object creation on every
343    * Bloom filter lookup, but has to be used with caution. Checksum data
344    * is not included in the returned buffer but header data is.
345    *
346    * @return the buffer of this block for read-only operations
347    */
348   public ByteBuffer getBufferReadOnly() {
349     return ByteBuffer.wrap(buf.array(), buf.arrayOffset(),
350         buf.limit() - totalChecksumBytes()).slice();
351   }
352 
353   /**
354    * Returns the buffer of this block, including header data. The clients must
355    * not modify the buffer object. This method has to be public because it is
356    * used in {@link BucketCache} to avoid buffer copy.
357    * 
358    * @return the buffer with header and checksum included for read-only operations
359    */
360   public ByteBuffer getBufferReadOnlyWithHeader() {
361     return ByteBuffer.wrap(buf.array(), buf.arrayOffset(), buf.limit()).slice();
362   }
363 
364   /**
365    * Returns a byte buffer of this block, including header data and checksum, positioned at
366    * the beginning of header. The underlying data array is not copied.
367    *
368    * @return the byte buffer with header and checksum included
369    */
370   ByteBuffer getBufferWithHeader() {
371     ByteBuffer dupBuf = buf.duplicate();
372     dupBuf.rewind();
373     return dupBuf;
374   }
375 
376   private void sanityCheckAssertion(long valueFromBuf, long valueFromField,
377       String fieldName) throws IOException {
378     if (valueFromBuf != valueFromField) {
379       throw new AssertionError(fieldName + " in the buffer (" + valueFromBuf
380           + ") is different from that in the field (" + valueFromField + ")");
381     }
382   }
383 
384   private void sanityCheckAssertion(BlockType valueFromBuf, BlockType valueFromField)
385       throws IOException {
386     if (valueFromBuf != valueFromField) {
387       throw new IOException("Block type stored in the buffer: " +
388         valueFromBuf + ", block type field: " + valueFromField);
389     }
390   }
391 
392   /**
393    * Checks if the block is internally consistent, i.e. the first
394    * {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes of the buffer contain a
395    * valid header consistent with the fields. Assumes a packed block structure.
396    * This function is primary for testing and debugging, and is not
397    * thread-safe, because it alters the internal buffer pointer.
398    */
399   void sanityCheck() throws IOException {
400     buf.rewind();
401 
402     sanityCheckAssertion(BlockType.read(buf), blockType);
403 
404     sanityCheckAssertion(buf.getInt(), onDiskSizeWithoutHeader,
405         "onDiskSizeWithoutHeader");
406 
407     sanityCheckAssertion(buf.getInt(), uncompressedSizeWithoutHeader,
408         "uncompressedSizeWithoutHeader");
409 
410     sanityCheckAssertion(buf.getLong(), prevBlockOffset, "prevBlocKOffset");
411     if (this.fileContext.isUseHBaseChecksum()) {
412       sanityCheckAssertion(buf.get(), this.fileContext.getChecksumType().getCode(), "checksumType");
413       sanityCheckAssertion(buf.getInt(), this.fileContext.getBytesPerChecksum(), "bytesPerChecksum");
414       sanityCheckAssertion(buf.getInt(), onDiskDataSizeWithHeader, "onDiskDataSizeWithHeader");
415     }
416 
417     int cksumBytes = totalChecksumBytes();
418     int expectedBufLimit = onDiskDataSizeWithHeader + cksumBytes;
419     if (buf.limit() != expectedBufLimit) {
420       throw new AssertionError("Expected buffer limit " + expectedBufLimit
421           + ", got " + buf.limit());
422     }
423 
424     // We might optionally allocate HFILEBLOCK_HEADER_SIZE more bytes to read the next
425     // block's header, so there are two sensible values for buffer capacity.
426     int hdrSize = headerSize();
427     if (buf.capacity() != expectedBufLimit &&
428         buf.capacity() != expectedBufLimit + hdrSize) {
429       throw new AssertionError("Invalid buffer capacity: " + buf.capacity() +
430           ", expected " + expectedBufLimit + " or " + (expectedBufLimit + hdrSize));
431     }
432   }
433 
434   @Override
435   public String toString() {
436     StringBuilder sb = new StringBuilder()
437       .append("HFileBlock [")
438       .append(" fileOffset=").append(offset)
439       .append(" headerSize()=").append(headerSize())
440       .append(" blockType=").append(blockType)
441       .append(" onDiskSizeWithoutHeader=").append(onDiskSizeWithoutHeader)
442       .append(" uncompressedSizeWithoutHeader=").append(uncompressedSizeWithoutHeader)
443       .append(" prevBlockOffset=").append(prevBlockOffset)
444       .append(" isUseHBaseChecksum()=").append(fileContext.isUseHBaseChecksum());
445     if (fileContext.isUseHBaseChecksum()) {
446       sb.append(" checksumType=").append(ChecksumType.codeToType(this.buf.get(24)))
447         .append(" bytesPerChecksum=").append(this.buf.getInt(24 + 1))
448         .append(" onDiskDataSizeWithHeader=").append(onDiskDataSizeWithHeader);
449     } else {
450       sb.append(" onDiskDataSizeWithHeader=").append(onDiskDataSizeWithHeader)
451         .append("(").append(onDiskSizeWithoutHeader)
452         .append("+").append(HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM).append(")");
453     }
454     sb.append(" getOnDiskSizeWithHeader()=").append(getOnDiskSizeWithHeader())
455       .append(" totalChecksumBytes()=").append(totalChecksumBytes())
456       .append(" isUnpacked()=").append(isUnpacked())
457       .append(" buf=[ ")
458         .append(buf)
459         .append(", array().length=").append(buf.array().length)
460         .append(", arrayOffset()=").append(buf.arrayOffset())
461       .append(" ]")
462       .append(" dataBeginsWith=")
463       .append(Bytes.toStringBinary(buf.array(), buf.arrayOffset() + headerSize(),
464         Math.min(32, buf.limit() - buf.arrayOffset() - headerSize())))
465       .append(" fileContext=").append(fileContext)
466       .append(" ]");
467     return sb.toString();
468   }
469 
470   /**
471    * Called after reading a block with provided onDiskSizeWithHeader.
472    */
473   private void validateOnDiskSizeWithoutHeader(
474       int expectedOnDiskSizeWithoutHeader) throws IOException {
475     if (onDiskSizeWithoutHeader != expectedOnDiskSizeWithoutHeader) {
476       String blockInfoMsg =
477         "Block offset: " + offset + ", data starts with: "
478           + Bytes.toStringBinary(buf.array(), buf.arrayOffset(),
479               buf.arrayOffset() + Math.min(32, buf.limit()));
480       throw new IOException("On-disk size without header provided is "
481           + expectedOnDiskSizeWithoutHeader + ", but block "
482           + "header contains " + onDiskSizeWithoutHeader + ". " +
483           blockInfoMsg);
484     }
485   }
486 
487   /**
488    * Retrieves the decompressed/decrypted view of this block. An encoded block remains in its
489    * encoded structure. Internal structures are shared between instances where applicable.
490    */
491   HFileBlock unpack(HFileContext fileContext, FSReader reader) throws IOException {
492     if (!fileContext.isCompressedOrEncrypted()) {
493       // TODO: cannot use our own fileContext here because HFileBlock(ByteBuffer, boolean),
494       // which is used for block serialization to L2 cache, does not preserve encoding and
495       // encryption details.
496       return this;
497     }
498 
499     HFileBlock unpacked = new HFileBlock(this);
500     unpacked.allocateBuffer(); // allocates space for the decompressed block
501 
502     HFileBlockDecodingContext ctx = blockType == BlockType.ENCODED_DATA ?
503       reader.getBlockDecodingContext() : reader.getDefaultBlockDecodingContext();
504     ctx.prepareDecoding(unpacked.getOnDiskSizeWithoutHeader(),
505       unpacked.getUncompressedSizeWithoutHeader(), unpacked.getBufferWithoutHeader(),
506       this.getBufferReadOnlyWithHeader().array(), this.headerSize());
507 
508     // Preserve the next block's header bytes in the new block if we have them.
509     if (unpacked.hasNextBlockHeader()) {
510       System.arraycopy(this.buf.array(), this.buf.arrayOffset() + this.onDiskDataSizeWithHeader,
511         unpacked.buf.array(), unpacked.buf.arrayOffset() + unpacked.headerSize() +
512           unpacked.uncompressedSizeWithoutHeader + unpacked.totalChecksumBytes(),
513         unpacked.headerSize());
514     }
515     return unpacked;
516   }
517 
518   /**
519    * Return true when this buffer includes next block's header.
520    */
521   private boolean hasNextBlockHeader() {
522     return nextBlockOnDiskSizeWithHeader > 0;
523   }
524 
525   /**
526    * Always allocates a new buffer of the correct size. Copies header bytes
527    * from the existing buffer. Does not change header fields. 
528    * Reserve room to keep checksum bytes too.
529    */
530   private void allocateBuffer() {
531     int cksumBytes = totalChecksumBytes();
532     int headerSize = headerSize();
533     int capacityNeeded = headerSize + uncompressedSizeWithoutHeader +
534         cksumBytes + (hasNextBlockHeader() ? headerSize : 0);
535 
536     ByteBuffer newBuf = ByteBuffer.allocate(capacityNeeded);
537 
538     // Copy header bytes.
539     System.arraycopy(buf.array(), buf.arrayOffset(), newBuf.array(),
540         newBuf.arrayOffset(), headerSize);
541 
542     buf = newBuf;
543     // set limit to exclude next block's header
544     buf.limit(headerSize + uncompressedSizeWithoutHeader + cksumBytes);
545   }
546 
547   /**
548    * Return true when this block's buffer has been unpacked, false otherwise. Note this is a
549    * calculated heuristic, not tracked attribute of the block.
550    */
551   public boolean isUnpacked() {
552     final int cksumBytes = totalChecksumBytes();
553     final int headerSize = headerSize();
554     final int expectedCapacity = headerSize + uncompressedSizeWithoutHeader + cksumBytes;
555     final int bufCapacity = buf.capacity();
556     return bufCapacity == expectedCapacity || bufCapacity == expectedCapacity + headerSize;
557   }
558 
559   /** An additional sanity-check in case no compression or encryption is being used. */
560   public void assumeUncompressed() throws IOException {
561     if (onDiskSizeWithoutHeader != uncompressedSizeWithoutHeader +
562         totalChecksumBytes()) {
563       throw new IOException("Using no compression but "
564           + "onDiskSizeWithoutHeader=" + onDiskSizeWithoutHeader + ", "
565           + "uncompressedSizeWithoutHeader=" + uncompressedSizeWithoutHeader
566           + ", numChecksumbytes=" + totalChecksumBytes());
567     }
568   }
569 
570   /**
571    * @param expectedType the expected type of this block
572    * @throws IOException if this block's type is different than expected
573    */
574   public void expectType(BlockType expectedType) throws IOException {
575     if (blockType != expectedType) {
576       throw new IOException("Invalid block type: expected=" + expectedType
577           + ", actual=" + blockType);
578     }
579   }
580 
581   /** @return the offset of this block in the file it was read from */
582   public long getOffset() {
583     if (offset < 0) {
584       throw new IllegalStateException(
585           "HFile block offset not initialized properly");
586     }
587     return offset;
588   }
589 
590   /**
591    * @return a byte stream reading the data + checksum of this block
592    */
593   public DataInputStream getByteStream() {
594     return new DataInputStream(new ByteArrayInputStream(buf.array(),
595         buf.arrayOffset() + headerSize(), buf.limit() - headerSize()));
596   }
597 
598   @Override
599   public long heapSize() {
600     long size = ClassSize.align(
601         ClassSize.OBJECT +
602         // Block type, byte buffer and meta references
603         3 * ClassSize.REFERENCE +
604         // On-disk size, uncompressed size, and next block's on-disk size
605         // bytePerChecksum and onDiskDataSize
606         4 * Bytes.SIZEOF_INT +
607         // This and previous block offset
608         2 * Bytes.SIZEOF_LONG +
609         // Heap size of the meta object. meta will be always not null.
610         fileContext.heapSize()
611     );
612 
613     if (buf != null) {
614       // Deep overhead of the byte buffer. Needs to be aligned separately.
615       size += ClassSize.align(buf.capacity() + BYTE_BUFFER_HEAP_SIZE);
616     }
617 
618     return ClassSize.align(size);
619   }
620 
621   /**
622    * Read from an input stream. Analogous to
623    * {@link IOUtils#readFully(InputStream, byte[], int, int)}, but specifies a
624    * number of "extra" bytes that would be desirable but not absolutely
625    * necessary to read.
626    *
627    * @param in the input stream to read from
628    * @param buf the buffer to read into
629    * @param bufOffset the destination offset in the buffer
630    * @param necessaryLen the number of bytes that are absolutely necessary to
631    *          read
632    * @param extraLen the number of extra bytes that would be nice to read
633    * @return true if succeeded reading the extra bytes
634    * @throws IOException if failed to read the necessary bytes
635    */
636   public static boolean readWithExtra(InputStream in, byte buf[],
637       int bufOffset, int necessaryLen, int extraLen) throws IOException {
638     int bytesRemaining = necessaryLen + extraLen;
639     while (bytesRemaining > 0) {
640       int ret = in.read(buf, bufOffset, bytesRemaining);
641       if (ret == -1 && bytesRemaining <= extraLen) {
642         // We could not read the "extra data", but that is OK.
643         break;
644       }
645 
646       if (ret < 0) {
647         throw new IOException("Premature EOF from inputStream (read "
648             + "returned " + ret + ", was trying to read " + necessaryLen
649             + " necessary bytes and " + extraLen + " extra bytes, "
650             + "successfully read "
651             + (necessaryLen + extraLen - bytesRemaining));
652       }
653       bufOffset += ret;
654       bytesRemaining -= ret;
655     }
656     return bytesRemaining <= 0;
657   }
658 
659   /**
660    * @return the on-disk size of the next block (including the header size)
661    *         that was read by peeking into the next block's header
662    */
663   public int getNextBlockOnDiskSizeWithHeader() {
664     return nextBlockOnDiskSizeWithHeader;
665   }
666 
667   /**
668    * Unified version 2 {@link HFile} block writer. The intended usage pattern
669    * is as follows:
670    * <ol>
671    * <li>Construct an {@link HFileBlock.Writer}, providing a compression algorithm.
672    * <li>Call {@link Writer#startWriting} and get a data stream to write to.
673    * <li>Write your data into the stream.
674    * <li>Call {@link Writer#writeHeaderAndData(FSDataOutputStream)} as many times as you need to.
675    * store the serialized block into an external stream.
676    * <li>Repeat to write more blocks.
677    * </ol>
678    * <p>
679    */
680   public static class Writer {
681 
682     private enum State {
683       INIT,
684       WRITING,
685       BLOCK_READY
686     };
687 
688     /** Writer state. Used to ensure the correct usage protocol. */
689     private State state = State.INIT;
690 
691     /** Data block encoder used for data blocks */
692     private final HFileDataBlockEncoder dataBlockEncoder;
693 
694     private HFileBlockEncodingContext dataBlockEncodingCtx;
695 
696     /** block encoding context for non-data blocks */
697     private HFileBlockDefaultEncodingContext defaultBlockEncodingCtx;
698 
699     /**
700      * The stream we use to accumulate data in uncompressed format for each
701      * block. We reset this stream at the end of each block and reuse it. The
702      * header is written as the first {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes into this
703      * stream.
704      */
705     private ByteArrayOutputStream baosInMemory;
706 
707     /**
708      * Current block type. Set in {@link #startWriting(BlockType)}. Could be
709      * changed in {@link #finishBlock()} from {@link BlockType#DATA}
710      * to {@link BlockType#ENCODED_DATA}.
711      */
712     private BlockType blockType;
713 
714     /**
715      * A stream that we write uncompressed bytes to, which compresses them and
716      * writes them to {@link #baosInMemory}.
717      */
718     private DataOutputStream userDataStream;
719 
720     // Size of actual data being written. Not considering the block encoding/compression. This
721     // includes the header size also.
722     private int unencodedDataSizeWritten;
723 
724     /**
725      * Bytes to be written to the file system, including the header. Compressed
726      * if compression is turned on. It also includes the checksum data that
727      * immediately follows the block data. (header + data + checksums)
728      */
729     private byte[] onDiskBytesWithHeader;
730 
731     /**
732      * The size of the checksum data on disk. It is used only if data is
733      * not compressed. If data is compressed, then the checksums are already
734      * part of onDiskBytesWithHeader. If data is uncompressed, then this
735      * variable stores the checksum data for this block.
736      */
737     private byte[] onDiskChecksum;
738 
739     /**
740      * Valid in the READY state. Contains the header and the uncompressed (but
741      * potentially encoded, if this is a data block) bytes, so the length is
742      * {@link #uncompressedSizeWithoutHeader} + {@link org.apache.hadoop.hbase.HConstants#HFILEBLOCK_HEADER_SIZE}.
743      * Does not store checksums.
744      */
745     private byte[] uncompressedBytesWithHeader;
746 
747     /**
748      * Current block's start offset in the {@link HFile}. Set in
749      * {@link #writeHeaderAndData(FSDataOutputStream)}.
750      */
751     private long startOffset;
752 
753     /**
754      * Offset of previous block by block type. Updated when the next block is
755      * started.
756      */
757     private long[] prevOffsetByType;
758 
759     /** The offset of the previous block of the same type */
760     private long prevOffset;
761     /** Meta data that holds information about the hfileblock**/
762     private HFileContext fileContext;
763 
764     /**
765      * @param dataBlockEncoder data block encoding algorithm to use
766      */
767     public Writer(HFileDataBlockEncoder dataBlockEncoder, HFileContext fileContext) {
768       this.dataBlockEncoder = dataBlockEncoder != null
769           ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
770       defaultBlockEncodingCtx = new HFileBlockDefaultEncodingContext(null,
771           HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext);
772       dataBlockEncodingCtx = this.dataBlockEncoder
773           .newDataBlockEncodingContext(HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext);
774 
775       if (fileContext.getBytesPerChecksum() < HConstants.HFILEBLOCK_HEADER_SIZE) {
776         throw new RuntimeException("Unsupported value of bytesPerChecksum. " +
777             " Minimum is " + HConstants.HFILEBLOCK_HEADER_SIZE + " but the configured value is " +
778             fileContext.getBytesPerChecksum());
779       }
780 
781       baosInMemory = new ByteArrayOutputStream();
782       
783       prevOffsetByType = new long[BlockType.values().length];
784       for (int i = 0; i < prevOffsetByType.length; ++i)
785         prevOffsetByType[i] = -1;
786 
787       this.fileContext = fileContext;
788     }
789 
790     /**
791      * Starts writing into the block. The previous block's data is discarded.
792      *
793      * @return the stream the user can write their data into
794      * @throws IOException
795      */
796     public DataOutputStream startWriting(BlockType newBlockType)
797         throws IOException {
798       if (state == State.BLOCK_READY && startOffset != -1) {
799         // We had a previous block that was written to a stream at a specific
800         // offset. Save that offset as the last offset of a block of that type.
801         prevOffsetByType[blockType.getId()] = startOffset;
802       }
803 
804       startOffset = -1;
805       blockType = newBlockType;
806 
807       baosInMemory.reset();
808       baosInMemory.write(HConstants.HFILEBLOCK_DUMMY_HEADER);
809 
810       state = State.WRITING;
811 
812       // We will compress it later in finishBlock()
813       userDataStream = new DataOutputStream(baosInMemory);
814       if (newBlockType == BlockType.DATA) {
815         this.dataBlockEncoder.startBlockEncoding(dataBlockEncodingCtx, userDataStream);
816       }
817       this.unencodedDataSizeWritten = 0;
818       return userDataStream;
819     }
820 
821     /**
822      * Writes the Cell to this block
823      * @param cell
824      * @throws IOException
825      */
826     public void write(Cell cell) throws IOException{
827       expectState(State.WRITING);
828       this.unencodedDataSizeWritten += this.dataBlockEncoder.encode(cell, dataBlockEncodingCtx,
829           this.userDataStream);
830     }
831 
832     /**
833      * Returns the stream for the user to write to. The block writer takes care
834      * of handling compression and buffering for caching on write. Can only be
835      * called in the "writing" state.
836      *
837      * @return the data output stream for the user to write to
838      */
839     DataOutputStream getUserDataStream() {
840       expectState(State.WRITING);
841       return userDataStream;
842     }
843 
844     /**
845      * Transitions the block writer from the "writing" state to the "block
846      * ready" state.  Does nothing if a block is already finished.
847      */
848     void ensureBlockReady() throws IOException {
849       Preconditions.checkState(state != State.INIT,
850           "Unexpected state: " + state);
851 
852       if (state == State.BLOCK_READY)
853         return;
854 
855       // This will set state to BLOCK_READY.
856       finishBlock();
857     }
858 
859     /**
860      * An internal method that flushes the compressing stream (if using
861      * compression), serializes the header, and takes care of the separate
862      * uncompressed stream for caching on write, if applicable. Sets block
863      * write state to "block ready".
864      */
865     private void finishBlock() throws IOException {
866       if (blockType == BlockType.DATA) {
867         BufferGrabbingByteArrayOutputStream baosInMemoryCopy = 
868             new BufferGrabbingByteArrayOutputStream();
869         baosInMemory.writeTo(baosInMemoryCopy);
870         this.dataBlockEncoder.endBlockEncoding(dataBlockEncodingCtx, userDataStream,
871             baosInMemoryCopy.buf, blockType);
872         blockType = dataBlockEncodingCtx.getBlockType();
873       }
874       userDataStream.flush();
875       // This does an array copy, so it is safe to cache this byte array.
876       uncompressedBytesWithHeader = baosInMemory.toByteArray();
877       prevOffset = prevOffsetByType[blockType.getId()];
878 
879       // We need to set state before we can package the block up for
880       // cache-on-write. In a way, the block is ready, but not yet encoded or
881       // compressed.
882       state = State.BLOCK_READY;
883       if (blockType == BlockType.DATA || blockType == BlockType.ENCODED_DATA) {
884         onDiskBytesWithHeader = dataBlockEncodingCtx
885             .compressAndEncrypt(uncompressedBytesWithHeader);
886       } else {
887         onDiskBytesWithHeader = defaultBlockEncodingCtx
888             .compressAndEncrypt(uncompressedBytesWithHeader);
889       }
890       int numBytes = (int) ChecksumUtil.numBytes(
891           onDiskBytesWithHeader.length,
892           fileContext.getBytesPerChecksum());
893 
894       // put the header for on disk bytes
895       putHeader(onDiskBytesWithHeader, 0,
896           onDiskBytesWithHeader.length + numBytes,
897           uncompressedBytesWithHeader.length, onDiskBytesWithHeader.length);
898       // set the header for the uncompressed bytes (for cache-on-write)
899       putHeader(uncompressedBytesWithHeader, 0,
900           onDiskBytesWithHeader.length + numBytes,
901           uncompressedBytesWithHeader.length, onDiskBytesWithHeader.length);
902 
903       onDiskChecksum = new byte[numBytes];
904       ChecksumUtil.generateChecksums(
905           onDiskBytesWithHeader, 0, onDiskBytesWithHeader.length,
906           onDiskChecksum, 0, fileContext.getChecksumType(), fileContext.getBytesPerChecksum());
907     }
908 
909     public static class BufferGrabbingByteArrayOutputStream extends ByteArrayOutputStream {
910       private byte[] buf;
911 
912       @Override
913       public void write(byte[] b, int off, int len) {
914         this.buf = b;
915       }
916 
917       public byte[] getBuffer() {
918         return this.buf;
919       }
920     }
921 
922     /**
923      * Put the header into the given byte array at the given offset.
924      * @param onDiskSize size of the block on disk header + data + checksum
925      * @param uncompressedSize size of the block after decompression (but
926      *          before optional data block decoding) including header
927      * @param onDiskDataSize size of the block on disk with header
928      *        and data but not including the checksums
929      */
930     private void putHeader(byte[] dest, int offset, int onDiskSize,
931         int uncompressedSize, int onDiskDataSize) {
932       offset = blockType.put(dest, offset);
933       offset = Bytes.putInt(dest, offset, onDiskSize - HConstants.HFILEBLOCK_HEADER_SIZE);
934       offset = Bytes.putInt(dest, offset, uncompressedSize - HConstants.HFILEBLOCK_HEADER_SIZE);
935       offset = Bytes.putLong(dest, offset, prevOffset);
936       offset = Bytes.putByte(dest, offset, fileContext.getChecksumType().getCode());
937       offset = Bytes.putInt(dest, offset, fileContext.getBytesPerChecksum());
938       Bytes.putInt(dest, offset, onDiskDataSize);
939     }
940 
941     /**
942      * Similar to {@link #writeHeaderAndData(FSDataOutputStream)}, but records
943      * the offset of this block so that it can be referenced in the next block
944      * of the same type.
945      *
946      * @param out
947      * @throws IOException
948      */
949     public void writeHeaderAndData(FSDataOutputStream out) throws IOException {
950       long offset = out.getPos();
951       if (startOffset != -1 && offset != startOffset) {
952         throw new IOException("A " + blockType + " block written to a "
953             + "stream twice, first at offset " + startOffset + ", then at "
954             + offset);
955       }
956       startOffset = offset;
957 
958       finishBlockAndWriteHeaderAndData((DataOutputStream) out);
959     }
960 
961     /**
962      * Writes the header and the compressed data of this block (or uncompressed
963      * data when not using compression) into the given stream. Can be called in
964      * the "writing" state or in the "block ready" state. If called in the
965      * "writing" state, transitions the writer to the "block ready" state.
966      *
967      * @param out the output stream to write the
968      * @throws IOException
969      */
970     protected void finishBlockAndWriteHeaderAndData(DataOutputStream out)
971       throws IOException {
972       ensureBlockReady();
973       out.write(onDiskBytesWithHeader);
974       out.write(onDiskChecksum);
975     }
976 
977     /**
978      * Returns the header or the compressed data (or uncompressed data when not
979      * using compression) as a byte array. Can be called in the "writing" state
980      * or in the "block ready" state. If called in the "writing" state,
981      * transitions the writer to the "block ready" state. This returns
982      * the header + data + checksums stored on disk.
983      *
984      * @return header and data as they would be stored on disk in a byte array
985      * @throws IOException
986      */
987     byte[] getHeaderAndDataForTest() throws IOException {
988       ensureBlockReady();
989       // This is not very optimal, because we are doing an extra copy.
990       // But this method is used only by unit tests.
991       byte[] output =
992           new byte[onDiskBytesWithHeader.length
993               + onDiskChecksum.length];
994       System.arraycopy(onDiskBytesWithHeader, 0, output, 0,
995           onDiskBytesWithHeader.length);
996       System.arraycopy(onDiskChecksum, 0, output,
997           onDiskBytesWithHeader.length, onDiskChecksum.length);
998       return output;
999     }
1000 
1001     /**
1002      * Releases resources used by this writer.
1003      */
1004     public void release() {
1005       if (dataBlockEncodingCtx != null) {
1006         dataBlockEncodingCtx.close();
1007         dataBlockEncodingCtx = null;
1008       }
1009       if (defaultBlockEncodingCtx != null) {
1010         defaultBlockEncodingCtx.close();
1011         defaultBlockEncodingCtx = null;
1012       }
1013     }
1014 
1015     /**
1016      * Returns the on-disk size of the data portion of the block. This is the
1017      * compressed size if compression is enabled. Can only be called in the
1018      * "block ready" state. Header is not compressed, and its size is not
1019      * included in the return value.
1020      *
1021      * @return the on-disk size of the block, not including the header.
1022      */
1023     int getOnDiskSizeWithoutHeader() {
1024       expectState(State.BLOCK_READY);
1025       return onDiskBytesWithHeader.length + onDiskChecksum.length - HConstants.HFILEBLOCK_HEADER_SIZE;
1026     }
1027 
1028     /**
1029      * Returns the on-disk size of the block. Can only be called in the
1030      * "block ready" state.
1031      *
1032      * @return the on-disk size of the block ready to be written, including the
1033      *         header size, the data and the checksum data.
1034      */
1035     int getOnDiskSizeWithHeader() {
1036       expectState(State.BLOCK_READY);
1037       return onDiskBytesWithHeader.length + onDiskChecksum.length;
1038     }
1039 
1040     /**
1041      * The uncompressed size of the block data. Does not include header size.
1042      */
1043     int getUncompressedSizeWithoutHeader() {
1044       expectState(State.BLOCK_READY);
1045       return uncompressedBytesWithHeader.length - HConstants.HFILEBLOCK_HEADER_SIZE;
1046     }
1047 
1048     /**
1049      * The uncompressed size of the block data, including header size.
1050      */
1051     int getUncompressedSizeWithHeader() {
1052       expectState(State.BLOCK_READY);
1053       return uncompressedBytesWithHeader.length;
1054     }
1055 
1056     /** @return true if a block is being written  */
1057     public boolean isWriting() {
1058       return state == State.WRITING;
1059     }
1060 
1061     /**
1062      * Returns the number of bytes written into the current block so far, or
1063      * zero if not writing the block at the moment. Note that this will return
1064      * zero in the "block ready" state as well.
1065      *
1066      * @return the number of bytes written
1067      */
1068     public int blockSizeWritten() {
1069       if (state != State.WRITING) return 0;
1070       return this.unencodedDataSizeWritten;
1071     }
1072 
1073     /**
1074      * Returns the header followed by the uncompressed data, even if using
1075      * compression. This is needed for storing uncompressed blocks in the block
1076      * cache. Can be called in the "writing" state or the "block ready" state.
1077      * Returns only the header and data, does not include checksum data.
1078      *
1079      * @return uncompressed block bytes for caching on write
1080      */
1081     ByteBuffer getUncompressedBufferWithHeader() {
1082       expectState(State.BLOCK_READY);
1083       return ByteBuffer.wrap(uncompressedBytesWithHeader);
1084     }
1085 
1086     /**
1087      * Returns the header followed by the on-disk (compressed/encoded/encrypted) data. This is
1088      * needed for storing packed blocks in the block cache. Expects calling semantics identical to
1089      * {@link #getUncompressedBufferWithHeader()}. Returns only the header and data,
1090      * Does not include checksum data.
1091      *
1092      * @return packed block bytes for caching on write
1093      */
1094     ByteBuffer getOnDiskBufferWithHeader() {
1095       expectState(State.BLOCK_READY);
1096       return ByteBuffer.wrap(onDiskBytesWithHeader);
1097     }
1098 
1099     private void expectState(State expectedState) {
1100       if (state != expectedState) {
1101         throw new IllegalStateException("Expected state: " + expectedState +
1102             ", actual state: " + state);
1103       }
1104     }
1105 
1106     /**
1107      * Takes the given {@link BlockWritable} instance, creates a new block of
1108      * its appropriate type, writes the writable into this block, and flushes
1109      * the block into the output stream. The writer is instructed not to buffer
1110      * uncompressed bytes for cache-on-write.
1111      *
1112      * @param bw the block-writable object to write as a block
1113      * @param out the file system output stream
1114      * @throws IOException
1115      */
1116     public void writeBlock(BlockWritable bw, FSDataOutputStream out)
1117         throws IOException {
1118       bw.writeToBlock(startWriting(bw.getBlockType()));
1119       writeHeaderAndData(out);
1120     }
1121 
1122     /**
1123      * Creates a new HFileBlock. Checksums have already been validated, so
1124      * the byte buffer passed into the constructor of this newly created
1125      * block does not have checksum data even though the header minor 
1126      * version is MINOR_VERSION_WITH_CHECKSUM. This is indicated by setting a
1127      * 0 value in bytesPerChecksum.
1128      */
1129     public HFileBlock getBlockForCaching(CacheConfig cacheConf) {
1130       HFileContext newContext = new HFileContextBuilder()
1131                                 .withBlockSize(fileContext.getBlocksize())
1132                                 .withBytesPerCheckSum(0)
1133                                 .withChecksumType(ChecksumType.NULL) // no checksums in cached data
1134                                 .withCompression(fileContext.getCompression())
1135                                 .withDataBlockEncoding(fileContext.getDataBlockEncoding())
1136                                 .withHBaseCheckSum(fileContext.isUseHBaseChecksum())
1137                                 .withCompressTags(fileContext.isCompressTags())
1138                                 .withIncludesMvcc(fileContext.isIncludesMvcc())
1139                                 .withIncludesTags(fileContext.isIncludesTags())
1140                                 .build();
1141       return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(),
1142           getUncompressedSizeWithoutHeader(), prevOffset,
1143           cacheConf.shouldCacheCompressed(blockType.getCategory()) ?
1144             getOnDiskBufferWithHeader() :
1145             getUncompressedBufferWithHeader(),
1146           DONT_FILL_HEADER, startOffset,
1147           onDiskBytesWithHeader.length + onDiskChecksum.length, newContext);
1148     }
1149   }
1150 
1151   /** Something that can be written into a block. */
1152   public interface BlockWritable {
1153 
1154     /** The type of block this data should use. */
1155     BlockType getBlockType();
1156 
1157     /**
1158      * Writes the block to the provided stream. Must not write any magic
1159      * records.
1160      *
1161      * @param out a stream to write uncompressed data into
1162      */
1163     void writeToBlock(DataOutput out) throws IOException;
1164   }
1165 
1166   // Block readers and writers
1167 
1168   /** An interface allowing to iterate {@link HFileBlock}s. */
1169   public interface BlockIterator {
1170 
1171     /**
1172      * Get the next block, or null if there are no more blocks to iterate.
1173      */
1174     HFileBlock nextBlock() throws IOException;
1175 
1176     /**
1177      * Similar to {@link #nextBlock()} but checks block type, throws an
1178      * exception if incorrect, and returns the HFile block
1179      */
1180     HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException;
1181   }
1182 
1183   /** A full-fledged reader with iteration ability. */
1184   public interface FSReader {
1185 
1186     /**
1187      * Reads the block at the given offset in the file with the given on-disk
1188      * size and uncompressed size.
1189      *
1190      * @param offset
1191      * @param onDiskSize the on-disk size of the entire block, including all
1192      *          applicable headers, or -1 if unknown
1193      * @param uncompressedSize the uncompressed size of the compressed part of
1194      *          the block, or -1 if unknown
1195      * @return the newly read block
1196      */
1197     HFileBlock readBlockData(long offset, long onDiskSize,
1198         int uncompressedSize, boolean pread) throws IOException;
1199 
1200     /**
1201      * Creates a block iterator over the given portion of the {@link HFile}.
1202      * The iterator returns blocks starting with offset such that offset <=
1203      * startOffset < endOffset. Returned blocks are always unpacked.
1204      *
1205      * @param startOffset the offset of the block to start iteration with
1206      * @param endOffset the offset to end iteration at (exclusive)
1207      * @return an iterator of blocks between the two given offsets
1208      */
1209     BlockIterator blockRange(long startOffset, long endOffset);
1210 
1211     /** Closes the backing streams */
1212     void closeStreams() throws IOException;
1213 
1214     /** Get a decoder for {@link BlockType#ENCODED_DATA} blocks from this file. */
1215     HFileBlockDecodingContext getBlockDecodingContext();
1216 
1217     /** Get the default decoder for blocks from this file. */
1218     HFileBlockDecodingContext getDefaultBlockDecodingContext();
1219   }
1220 
1221   /**
1222    * A common implementation of some methods of {@link FSReader} and some
1223    * tools for implementing HFile format version-specific block readers.
1224    */
1225   private abstract static class AbstractFSReader implements FSReader {
1226     /** Compression algorithm used by the {@link HFile} */
1227 
1228     /** The size of the file we are reading from, or -1 if unknown. */
1229     protected long fileSize;
1230 
1231     /** The size of the header */
1232     protected final int hdrSize;
1233 
1234     /** The filesystem used to access data */
1235     protected HFileSystem hfs;
1236 
1237     /** The path (if any) where this data is coming from */
1238     protected Path path;
1239 
1240     private final Lock streamLock = new ReentrantLock();
1241 
1242     /** The default buffer size for our buffered streams */
1243     public static final int DEFAULT_BUFFER_SIZE = 1 << 20;
1244 
1245     protected HFileContext fileContext;
1246 
1247     public AbstractFSReader(long fileSize, HFileSystem hfs, Path path, HFileContext fileContext)
1248         throws IOException {
1249       this.fileSize = fileSize;
1250       this.hfs = hfs;
1251       this.path = path;
1252       this.fileContext = fileContext;
1253       this.hdrSize = headerSize(fileContext.isUseHBaseChecksum());
1254     }
1255 
1256     @Override
1257     public BlockIterator blockRange(final long startOffset,
1258         final long endOffset) {
1259       final FSReader owner = this; // handle for inner class
1260       return new BlockIterator() {
1261         private long offset = startOffset;
1262 
1263         @Override
1264         public HFileBlock nextBlock() throws IOException {
1265           if (offset >= endOffset)
1266             return null;
1267           HFileBlock b = readBlockData(offset, -1, -1, false);
1268           offset += b.getOnDiskSizeWithHeader();
1269           return b.unpack(fileContext, owner);
1270         }
1271 
1272         @Override
1273         public HFileBlock nextBlockWithBlockType(BlockType blockType)
1274             throws IOException {
1275           HFileBlock blk = nextBlock();
1276           if (blk.getBlockType() != blockType) {
1277             throw new IOException("Expected block of type " + blockType
1278                 + " but found " + blk.getBlockType());
1279           }
1280           return blk;
1281         }
1282       };
1283     }
1284 
1285     /**
1286      * Does a positional read or a seek and read into the given buffer. Returns
1287      * the on-disk size of the next block, or -1 if it could not be determined.
1288      *
1289      * @param dest destination buffer
1290      * @param destOffset offset in the destination buffer
1291      * @param size size of the block to be read
1292      * @param peekIntoNextBlock whether to read the next block's on-disk size
1293      * @param fileOffset position in the stream to read at
1294      * @param pread whether we should do a positional read
1295      * @param istream The input source of data
1296      * @return the on-disk size of the next block with header size included, or
1297      *         -1 if it could not be determined
1298      * @throws IOException
1299      */
1300     protected int readAtOffset(FSDataInputStream istream,
1301         byte[] dest, int destOffset, int size,
1302         boolean peekIntoNextBlock, long fileOffset, boolean pread)
1303         throws IOException {
1304       if (peekIntoNextBlock &&
1305           destOffset + size + hdrSize > dest.length) {
1306         // We are asked to read the next block's header as well, but there is
1307         // not enough room in the array.
1308         throw new IOException("Attempted to read " + size + " bytes and " +
1309             hdrSize + " bytes of next header into a " + dest.length +
1310             "-byte array at offset " + destOffset);
1311       }
1312 
1313       if (!pread && streamLock.tryLock()) {
1314         // Seek + read. Better for scanning.
1315         try {
1316           istream.seek(fileOffset);
1317 
1318           long realOffset = istream.getPos();
1319           if (realOffset != fileOffset) {
1320             throw new IOException("Tried to seek to " + fileOffset + " to "
1321                 + "read " + size + " bytes, but pos=" + realOffset
1322                 + " after seek");
1323           }
1324 
1325           if (!peekIntoNextBlock) {
1326             IOUtils.readFully(istream, dest, destOffset, size);
1327             return -1;
1328           }
1329 
1330           // Try to read the next block header.
1331           if (!readWithExtra(istream, dest, destOffset, size, hdrSize))
1332             return -1;
1333         } finally {
1334           streamLock.unlock();
1335         }
1336       } else {
1337         // Positional read. Better for random reads; or when the streamLock is already locked.
1338         int extraSize = peekIntoNextBlock ? hdrSize : 0;
1339         int ret = istream.read(fileOffset, dest, destOffset, size + extraSize);
1340         if (ret < size) {
1341           throw new IOException("Positional read of " + size + " bytes " +
1342               "failed at offset " + fileOffset + " (returned " + ret + ")");
1343         }
1344 
1345         if (ret == size || ret < size + extraSize) {
1346           // Could not read the next block's header, or did not try.
1347           return -1;
1348         }
1349       }
1350 
1351       assert peekIntoNextBlock;
1352       return Bytes.toInt(dest, destOffset + size + BlockType.MAGIC_LENGTH) + hdrSize;
1353     }
1354 
1355   }
1356 
1357   /**
1358    * We always prefetch the header of the next block, so that we know its
1359    * on-disk size in advance and can read it in one operation.
1360    */
1361   private static class PrefetchedHeader {
1362     long offset = -1;
1363     byte[] header = new byte[HConstants.HFILEBLOCK_HEADER_SIZE];
1364     ByteBuffer buf = ByteBuffer.wrap(header, 0, HConstants.HFILEBLOCK_HEADER_SIZE);
1365   }
1366 
1367   /** Reads version 2 blocks from the filesystem. */
1368   static class FSReaderV2 extends AbstractFSReader {
1369     /** The file system stream of the underlying {@link HFile} that 
1370      * does or doesn't do checksum validations in the filesystem */
1371     protected FSDataInputStreamWrapper streamWrapper;
1372 
1373     private HFileBlockDecodingContext encodedBlockDecodingCtx;
1374 
1375     /** Default context used when BlockType != {@link BlockType#ENCODED_DATA}. */
1376     private final HFileBlockDefaultDecodingContext defaultDecodingCtx;
1377 
1378     private ThreadLocal<PrefetchedHeader> prefetchedHeaderForThread =
1379         new ThreadLocal<PrefetchedHeader>() {
1380           @Override
1381           public PrefetchedHeader initialValue() {
1382             return new PrefetchedHeader();
1383           }
1384         };
1385 
1386     public FSReaderV2(FSDataInputStreamWrapper stream, long fileSize, HFileSystem hfs, Path path,
1387         HFileContext fileContext) throws IOException {
1388       super(fileSize, hfs, path, fileContext);
1389       this.streamWrapper = stream;
1390       // Older versions of HBase didn't support checksum.
1391       this.streamWrapper.prepareForBlockReader(!fileContext.isUseHBaseChecksum());
1392       defaultDecodingCtx = new HFileBlockDefaultDecodingContext(fileContext);
1393       encodedBlockDecodingCtx = defaultDecodingCtx;
1394     }
1395 
1396     /**
1397      * A constructor that reads files with the latest minor version.
1398      * This is used by unit tests only.
1399      */
1400     FSReaderV2(FSDataInputStream istream, long fileSize, HFileContext fileContext) throws IOException {
1401       this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext);
1402     }
1403 
1404     /**
1405      * Reads a version 2 block. Tries to do as little memory allocation as
1406      * possible, using the provided on-disk size.
1407      *
1408      * @param offset the offset in the stream to read at
1409      * @param onDiskSizeWithHeaderL the on-disk size of the block, including
1410      *          the header, or -1 if unknown
1411      * @param uncompressedSize the uncompressed size of the the block. Always
1412      *          expected to be -1. This parameter is only used in version 1.
1413      * @param pread whether to use a positional read
1414      */
1415     @Override
1416     public HFileBlock readBlockData(long offset, long onDiskSizeWithHeaderL,
1417         int uncompressedSize, boolean pread) throws IOException {
1418 
1419       // get a copy of the current state of whether to validate
1420       // hbase checksums or not for this read call. This is not 
1421       // thread-safe but the one constaint is that if we decide 
1422       // to skip hbase checksum verification then we are 
1423       // guaranteed to use hdfs checksum verification.
1424       boolean doVerificationThruHBaseChecksum = streamWrapper.shouldUseHBaseChecksum();
1425       FSDataInputStream is = streamWrapper.getStream(doVerificationThruHBaseChecksum);
1426 
1427       HFileBlock blk = readBlockDataInternal(is, offset, 
1428                          onDiskSizeWithHeaderL, 
1429                          uncompressedSize, pread,
1430                          doVerificationThruHBaseChecksum);
1431       if (blk == null) {
1432         HFile.LOG.warn("HBase checksum verification failed for file " +
1433                        path + " at offset " +
1434                        offset + " filesize " + fileSize +
1435                        ". Retrying read with HDFS checksums turned on...");
1436 
1437         if (!doVerificationThruHBaseChecksum) {
1438           String msg = "HBase checksum verification failed for file " +
1439                        path + " at offset " +
1440                        offset + " filesize " + fileSize + 
1441                        " but this cannot happen because doVerify is " +
1442                        doVerificationThruHBaseChecksum;
1443           HFile.LOG.warn(msg);
1444           throw new IOException(msg); // cannot happen case here
1445         }
1446         HFile.checksumFailures.incrementAndGet(); // update metrics
1447 
1448         // If we have a checksum failure, we fall back into a mode where
1449         // the next few reads use HDFS level checksums. We aim to make the
1450         // next CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD reads avoid
1451         // hbase checksum verification, but since this value is set without
1452         // holding any locks, it can so happen that we might actually do
1453         // a few more than precisely this number.
1454         is = this.streamWrapper.fallbackToFsChecksum(CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD);
1455         doVerificationThruHBaseChecksum = false;
1456         blk = readBlockDataInternal(is, offset, onDiskSizeWithHeaderL,
1457                                     uncompressedSize, pread,
1458                                     doVerificationThruHBaseChecksum);
1459         if (blk != null) {
1460           HFile.LOG.warn("HDFS checksum verification suceeded for file " +
1461                          path + " at offset " +
1462                          offset + " filesize " + fileSize);
1463         }
1464       } 
1465       if (blk == null && !doVerificationThruHBaseChecksum) {
1466         String msg = "readBlockData failed, possibly due to " +
1467                      "checksum verification failed for file " + path +
1468                      " at offset " + offset + " filesize " + fileSize;
1469         HFile.LOG.warn(msg);
1470         throw new IOException(msg);
1471       }
1472 
1473       // If there is a checksum mismatch earlier, then retry with 
1474       // HBase checksums switched off and use HDFS checksum verification.
1475       // This triggers HDFS to detect and fix corrupt replicas. The
1476       // next checksumOffCount read requests will use HDFS checksums.
1477       // The decrementing of this.checksumOffCount is not thread-safe,
1478       // but it is harmless because eventually checksumOffCount will be
1479       // a negative number.
1480       streamWrapper.checksumOk();
1481       return blk;
1482     }
1483 
1484     /**
1485      * Reads a version 2 block. 
1486      *
1487      * @param offset the offset in the stream to read at
1488      * @param onDiskSizeWithHeaderL the on-disk size of the block, including
1489      *          the header, or -1 if unknown
1490      * @param uncompressedSize the uncompressed size of the the block. Always
1491      *          expected to be -1. This parameter is only used in version 1.
1492      * @param pread whether to use a positional read
1493      * @param verifyChecksum Whether to use HBase checksums. 
1494      *        If HBase checksum is switched off, then use HDFS checksum.
1495      * @return the HFileBlock or null if there is a HBase checksum mismatch
1496      */
1497     private HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, 
1498         long onDiskSizeWithHeaderL, int uncompressedSize, boolean pread,
1499         boolean verifyChecksum) throws IOException {
1500       if (offset < 0) {
1501         throw new IOException("Invalid offset=" + offset + " trying to read "
1502             + "block (onDiskSize=" + onDiskSizeWithHeaderL
1503             + ", uncompressedSize=" + uncompressedSize + ")");
1504       }
1505       if (uncompressedSize != -1) {
1506         throw new IOException("Version 2 block reader API does not need " +
1507             "the uncompressed size parameter");
1508       }
1509 
1510       if ((onDiskSizeWithHeaderL < hdrSize && onDiskSizeWithHeaderL != -1)
1511           || onDiskSizeWithHeaderL >= Integer.MAX_VALUE) {
1512         throw new IOException("Invalid onDisksize=" + onDiskSizeWithHeaderL
1513             + ": expected to be at least " + hdrSize
1514             + " and at most " + Integer.MAX_VALUE + ", or -1 (offset="
1515             + offset + ", uncompressedSize=" + uncompressedSize + ")");
1516       }
1517 
1518       int onDiskSizeWithHeader = (int) onDiskSizeWithHeaderL;
1519       // See if we can avoid reading the header. This is desirable, because
1520       // we will not incur a backward seek operation if we have already
1521       // read this block's header as part of the previous read's look-ahead.
1522       // And we also want to skip reading the header again if it has already
1523       // been read.
1524       PrefetchedHeader prefetchedHeader = prefetchedHeaderForThread.get();
1525       ByteBuffer headerBuf = prefetchedHeader.offset == offset ?
1526           prefetchedHeader.buf : null;
1527 
1528       int nextBlockOnDiskSize = 0;
1529       // Allocate enough space to fit the next block's header too.
1530       byte[] onDiskBlock = null;
1531 
1532       HFileBlock b = null;
1533       if (onDiskSizeWithHeader > 0) {
1534         // We know the total on-disk size. Read the entire block into memory,
1535         // then parse the header. This code path is used when
1536         // doing a random read operation relying on the block index, as well as
1537         // when the client knows the on-disk size from peeking into the next
1538         // block's header (e.g. this block's header) when reading the previous
1539         // block. This is the faster and more preferable case.
1540 
1541         // Size that we have to skip in case we have already read the header.
1542         int preReadHeaderSize = headerBuf == null ? 0 : hdrSize;
1543         onDiskBlock = new byte[onDiskSizeWithHeader + hdrSize]; // room for this block plus the
1544                                                                 // next block's header
1545         nextBlockOnDiskSize = readAtOffset(is, onDiskBlock,
1546             preReadHeaderSize, onDiskSizeWithHeader - preReadHeaderSize,
1547             true, offset + preReadHeaderSize, pread);
1548         if (headerBuf != null) {
1549           // the header has been read when reading the previous block, copy
1550           // to this block's header
1551           System.arraycopy(headerBuf.array(),
1552               headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize);
1553         } else {
1554           headerBuf = ByteBuffer.wrap(onDiskBlock, 0, hdrSize);
1555         }
1556         // We know the total on-disk size but not the uncompressed size. Read
1557         // the entire block into memory, then parse the header. Here we have
1558         // already read the block's header
1559         try {
1560           b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum());
1561         } catch (IOException ex) {
1562           // Seen in load testing. Provide comprehensive debug info.
1563           throw new IOException("Failed to read compressed block at "
1564               + offset
1565               + ", onDiskSizeWithoutHeader="
1566               + onDiskSizeWithHeader
1567               + ", preReadHeaderSize="
1568               + hdrSize
1569               + ", header.length="
1570               + prefetchedHeader.header.length
1571               + ", header bytes: "
1572               + Bytes.toStringBinary(prefetchedHeader.header, 0,
1573                   hdrSize), ex);
1574         }
1575         // if the caller specifies a onDiskSizeWithHeader, validate it.
1576         int onDiskSizeWithoutHeader = onDiskSizeWithHeader - hdrSize;
1577         assert onDiskSizeWithoutHeader >= 0;
1578         b.validateOnDiskSizeWithoutHeader(onDiskSizeWithoutHeader);
1579       } else {
1580         // Check headerBuf to see if we have read this block's header as part of
1581         // reading the previous block. This is an optimization of peeking into
1582         // the next block's header (e.g.this block's header) when reading the
1583         // previous block. This is the faster and more preferable case. If the
1584         // header is already there, don't read the header again.
1585 
1586         // Unfortunately, we still have to do a separate read operation to
1587         // read the header.
1588         if (headerBuf == null) {
1589           // From the header, determine the on-disk size of the given hfile
1590           // block, and read the remaining data, thereby incurring two read
1591           // operations. This might happen when we are doing the first read
1592           // in a series of reads or a random read, and we don't have access
1593           // to the block index. This is costly and should happen very rarely.
1594           headerBuf = ByteBuffer.allocate(hdrSize);
1595           readAtOffset(is, headerBuf.array(), headerBuf.arrayOffset(),
1596               hdrSize, false, offset, pread);
1597         }
1598         b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum());
1599         onDiskBlock = new byte[b.getOnDiskSizeWithHeader() + hdrSize];
1600         System.arraycopy(headerBuf.array(), headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize);
1601         nextBlockOnDiskSize =
1602           readAtOffset(is, onDiskBlock, hdrSize, b.getOnDiskSizeWithHeader()
1603               - hdrSize, true, offset + hdrSize, pread);
1604         onDiskSizeWithHeader = b.onDiskSizeWithoutHeader + hdrSize;
1605       }
1606 
1607       if (!fileContext.isCompressedOrEncrypted()) {
1608         b.assumeUncompressed();
1609       }
1610 
1611       if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) {
1612         return null;             // checksum mismatch
1613       }
1614 
1615       // The onDiskBlock will become the headerAndDataBuffer for this block.
1616       // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
1617       // contains the header of next block, so no need to set next
1618       // block's header in it.
1619       b = new HFileBlock(ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader),
1620         this.fileContext.isUseHBaseChecksum());
1621 
1622       b.nextBlockOnDiskSizeWithHeader = nextBlockOnDiskSize;
1623 
1624       // Set prefetched header
1625       if (b.hasNextBlockHeader()) {
1626         prefetchedHeader.offset = offset + b.getOnDiskSizeWithHeader();
1627         System.arraycopy(onDiskBlock, onDiskSizeWithHeader,
1628             prefetchedHeader.header, 0, hdrSize);
1629       }
1630 
1631       b.offset = offset;
1632       b.fileContext.setIncludesTags(this.fileContext.isIncludesTags());
1633       b.fileContext.setIncludesMvcc(this.fileContext.isIncludesMvcc());
1634       return b;
1635     }
1636 
1637     void setIncludesMemstoreTS(boolean includesMemstoreTS) {
1638       this.fileContext.setIncludesMvcc(includesMemstoreTS);
1639     }
1640 
1641     void setDataBlockEncoder(HFileDataBlockEncoder encoder) {
1642       encodedBlockDecodingCtx = encoder.newDataBlockDecodingContext(this.fileContext);
1643     }
1644 
1645     @Override
1646     public HFileBlockDecodingContext getBlockDecodingContext() {
1647       return this.encodedBlockDecodingCtx;
1648     }
1649 
1650     @Override
1651     public HFileBlockDecodingContext getDefaultBlockDecodingContext() {
1652       return this.defaultDecodingCtx;
1653     }
1654 
1655     /**
1656      * Generates the checksum for the header as well as the data and
1657      * then validates that it matches the value stored in the header.
1658      * If there is a checksum mismatch, then return false. Otherwise
1659      * return true.
1660      */
1661     protected boolean validateBlockChecksum(HFileBlock block,  byte[] data, int hdrSize)
1662         throws IOException {
1663       return ChecksumUtil.validateBlockChecksum(path, block, data, hdrSize);
1664     }
1665 
1666     @Override
1667     public void closeStreams() throws IOException {
1668       streamWrapper.close();
1669     }
1670 
1671     @Override
1672     public String toString() {
1673       return "FSReaderV2 [ hfs=" + hfs + " path=" + path + " fileContext=" + fileContext + " ]";
1674     }
1675   }
1676 
1677   @Override
1678   public int getSerializedLength() {
1679     if (buf != null) {
1680       // include extra bytes for the next header when it's available.
1681       int extraSpace = hasNextBlockHeader() ? headerSize() : 0;
1682       return this.buf.limit() + extraSpace + HFileBlock.EXTRA_SERIALIZATION_SPACE;
1683     }
1684     return 0;
1685   }
1686 
1687   @Override
1688   public void serialize(ByteBuffer destination) {
1689     // assumes HeapByteBuffer
1690     destination.put(this.buf.array(), this.buf.arrayOffset(),
1691       getSerializedLength() - EXTRA_SERIALIZATION_SPACE);
1692     serializeExtraInfo(destination);
1693   }
1694 
1695   public void serializeExtraInfo(ByteBuffer destination) {
1696     destination.put(this.fileContext.isUseHBaseChecksum() ? (byte) 1 : (byte) 0);
1697     destination.putLong(this.offset);
1698     destination.putInt(this.nextBlockOnDiskSizeWithHeader);
1699     destination.rewind();
1700   }
1701 
1702   @Override
1703   public CacheableDeserializer<Cacheable> getDeserializer() {
1704     return HFileBlock.blockDeserializer;
1705   }
1706 
1707   @Override
1708   public boolean equals(Object comparison) {
1709     if (this == comparison) {
1710       return true;
1711     }
1712     if (comparison == null) {
1713       return false;
1714     }
1715     if (comparison.getClass() != this.getClass()) {
1716       return false;
1717     }
1718 
1719     HFileBlock castedComparison = (HFileBlock) comparison;
1720 
1721     if (castedComparison.blockType != this.blockType) {
1722       return false;
1723     }
1724     if (castedComparison.nextBlockOnDiskSizeWithHeader != this.nextBlockOnDiskSizeWithHeader) {
1725       return false;
1726     }
1727     if (castedComparison.offset != this.offset) {
1728       return false;
1729     }
1730     if (castedComparison.onDiskSizeWithoutHeader != this.onDiskSizeWithoutHeader) {
1731       return false;
1732     }
1733     if (castedComparison.prevBlockOffset != this.prevBlockOffset) {
1734       return false;
1735     }
1736     if (castedComparison.uncompressedSizeWithoutHeader != this.uncompressedSizeWithoutHeader) {
1737       return false;
1738     }
1739     if (Bytes.compareTo(this.buf.array(), this.buf.arrayOffset(), this.buf.limit(),
1740       castedComparison.buf.array(), castedComparison.buf.arrayOffset(),
1741       castedComparison.buf.limit()) != 0) {
1742       return false;
1743     }
1744     return true;
1745   }
1746 
1747   public DataBlockEncoding getDataBlockEncoding() {
1748     if (blockType == BlockType.ENCODED_DATA) {
1749       return DataBlockEncoding.getEncodingById(getDataBlockEncodingId());
1750     }
1751     return DataBlockEncoding.NONE;
1752   }
1753 
1754   byte getChecksumType() {
1755     return this.fileContext.getChecksumType().getCode();
1756   }
1757 
1758   int getBytesPerChecksum() {
1759     return this.fileContext.getBytesPerChecksum();
1760   }
1761 
1762   /** @return the size of data on disk + header. Excludes checksum. */
1763   int getOnDiskDataSizeWithHeader() {
1764     return this.onDiskDataSizeWithHeader;
1765   }
1766 
1767   /** 
1768    * Calcuate the number of bytes required to store all the checksums
1769    * for this block. Each checksum value is a 4 byte integer.
1770    */
1771   int totalChecksumBytes() {
1772     // If the hfile block has minorVersion 0, then there are no checksum
1773     // data to validate. Similarly, a zero value in this.bytesPerChecksum
1774     // indicates that cached blocks do not have checksum data because
1775     // checksums were already validated when the block was read from disk.
1776     if (!fileContext.isUseHBaseChecksum() || this.fileContext.getBytesPerChecksum() == 0) {
1777       return 0;
1778     }
1779     return (int)ChecksumUtil.numBytes(onDiskDataSizeWithHeader, this.fileContext.getBytesPerChecksum());
1780   }
1781 
1782   /**
1783    * Returns the size of this block header.
1784    */
1785   public int headerSize() {
1786     return headerSize(this.fileContext.isUseHBaseChecksum());
1787   }
1788 
1789   /**
1790    * Maps a minor version to the size of the header.
1791    */
1792   public static int headerSize(boolean usesHBaseChecksum) {
1793     if (usesHBaseChecksum) {
1794       return HConstants.HFILEBLOCK_HEADER_SIZE;
1795     }
1796     return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
1797   }
1798 
1799   /**
1800    * Return the appropriate DUMMY_HEADER for the minor version
1801    */
1802   public byte[] getDummyHeaderForVersion() {
1803     return getDummyHeaderForVersion(this.fileContext.isUseHBaseChecksum());
1804   }
1805 
1806   /**
1807    * Return the appropriate DUMMY_HEADER for the minor version
1808    */
1809   static private byte[] getDummyHeaderForVersion(boolean usesHBaseChecksum) {
1810     if (usesHBaseChecksum) {
1811       return HConstants.HFILEBLOCK_DUMMY_HEADER;
1812     }
1813     return DUMMY_HEADER_NO_CHECKSUM;
1814   }
1815 
1816   /**
1817    * @return the HFileContext used to create this HFileBlock. Not necessary the
1818    * fileContext for the file from which this block's data was originally read.
1819    */
1820   public HFileContext getHFileContext() {
1821     return this.fileContext;
1822   }
1823 
1824   /**
1825    * Convert the contents of the block header into a human readable string.
1826    * This is mostly helpful for debugging. This assumes that the block
1827    * has minor version > 0.
1828    */
1829   static String toStringHeader(ByteBuffer buf) throws IOException {
1830     int offset = buf.arrayOffset();
1831     byte[] b = buf.array();
1832     long magic = Bytes.toLong(b, offset);
1833     BlockType bt = BlockType.read(buf);
1834     offset += Bytes.SIZEOF_LONG;
1835     int compressedBlockSizeNoHeader = Bytes.toInt(b, offset);
1836     offset += Bytes.SIZEOF_INT;
1837     int uncompressedBlockSizeNoHeader = Bytes.toInt(b, offset);
1838     offset += Bytes.SIZEOF_INT;
1839     long prevBlockOffset = Bytes.toLong(b, offset); 
1840     offset += Bytes.SIZEOF_LONG;
1841     byte cksumtype = b[offset];
1842     offset += Bytes.SIZEOF_BYTE;
1843     long bytesPerChecksum = Bytes.toInt(b, offset); 
1844     offset += Bytes.SIZEOF_INT;
1845     long onDiskDataSizeWithHeader = Bytes.toInt(b, offset); 
1846     offset += Bytes.SIZEOF_INT;
1847     return " Header dump: magic: " + magic +
1848                    " blockType " + bt +
1849                    " compressedBlockSizeNoHeader " + 
1850                    compressedBlockSizeNoHeader +
1851                    " uncompressedBlockSizeNoHeader " + 
1852                    uncompressedBlockSizeNoHeader +
1853                    " prevBlockOffset " + prevBlockOffset +
1854                    " checksumType " + ChecksumType.codeToType(cksumtype) +
1855                    " bytesPerChecksum " + bytesPerChecksum +
1856                    " onDiskDataSizeWithHeader " + onDiskDataSizeWithHeader;
1857   }
1858 }