View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.DataInput;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  import java.util.ArrayList;
24  import java.util.List;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.hbase.classification.InterfaceAudience;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.Cell;
32  import org.apache.hadoop.hbase.CellUtil;
33  import org.apache.hadoop.hbase.HConstants;
34  import org.apache.hadoop.hbase.KeyValue;
35  import org.apache.hadoop.hbase.KeyValue.KVComparator;
36  import org.apache.hadoop.hbase.NoTagsKeyValue;
37  import org.apache.hadoop.hbase.fs.HFileSystem;
38  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
39  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
40  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
41  import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
42  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
43  import org.apache.hadoop.hbase.util.Bytes;
44  import org.apache.hadoop.hbase.util.IdLock;
45  import org.apache.hadoop.io.WritableUtils;
46  import org.apache.htrace.Trace;
47  import org.apache.htrace.TraceScope;
48  
49  import com.google.common.annotations.VisibleForTesting;
50  
51  /**
52   * {@link HFile} reader for version 2.
53   */
54  @InterfaceAudience.Private
55  public class HFileReaderV2 extends AbstractHFileReader {
56  
57    private static final Log LOG = LogFactory.getLog(HFileReaderV2.class);
58  
59    /** Minor versions in HFile V2 starting with this number have hbase checksums */
60    public static final int MINOR_VERSION_WITH_CHECKSUM = 1;
61    /** In HFile V2 minor version that does not support checksums */
62    public static final int MINOR_VERSION_NO_CHECKSUM = 0;
63  
64    /** HFile minor version that introduced pbuf filetrailer */
65    public static final int PBUF_TRAILER_MINOR_VERSION = 2;
66  
67    /**
68     * The size of a (key length, value length) tuple that prefixes each entry in
69     * a data block.
70     */
71    public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT;
72  
73    private boolean includesMemstoreTS = false;
74    protected boolean decodeMemstoreTS = false;
75  
76    protected boolean shouldIncludeMemstoreTS() {
77      return includesMemstoreTS;
78    }
79  
80    /** Filesystem-level block reader. */
81    private HFileBlock.FSReader fsBlockReader;
82  
83    /**
84     * A "sparse lock" implementation allowing to lock on a particular block
85     * identified by offset. The purpose of this is to avoid two clients loading
86     * the same block, and have all but one client wait to get the block from the
87     * cache.
88     */
89    private IdLock offsetLock = new IdLock();
90  
91    /**
92     * Blocks read from the load-on-open section, excluding data root index, meta
93     * index, and file info.
94     */
95    private List<HFileBlock> loadOnOpenBlocks = new ArrayList<HFileBlock>();
96  
97    /** Minimum minor version supported by this HFile format */
98    static final int MIN_MINOR_VERSION = 0;
99  
100   /** Maximum minor version supported by this HFile format */
101   // We went to version 2 when we moved to pb'ing fileinfo and the trailer on
102   // the file. This version can read Writables version 1.
103   static final int MAX_MINOR_VERSION = 3;
104 
105   /** Minor versions starting with this number have faked index key */
106   static final int MINOR_VERSION_WITH_FAKED_KEY = 3;
107 
108   HFileContext hfileContext;
109 
110   /**
111    * Opens a HFile. You must load the index before you can use it by calling
112    * {@link #loadFileInfo()}.
113    *
114    * @param path Path to HFile.
115    * @param trailer File trailer.
116    * @param fsdis input stream.
117    * @param size Length of the stream.
118    * @param cacheConf Cache configuration.
119    * @param hfs
120    * @param conf
121    */
122   public HFileReaderV2(final Path path, final FixedFileTrailer trailer,
123       final FSDataInputStreamWrapper fsdis, final long size, final CacheConfig cacheConf,
124       final HFileSystem hfs, final Configuration conf) throws IOException {
125     super(path, trailer, size, cacheConf, hfs, conf);
126     this.conf = conf;
127     trailer.expectMajorVersion(getMajorVersion());
128     validateMinorVersion(path, trailer.getMinorVersion());
129     this.hfileContext = createHFileContext(fsdis, fileSize, hfs, path, trailer);
130     HFileBlock.FSReaderImpl fsBlockReaderV2 =
131       new HFileBlock.FSReaderImpl(fsdis, fileSize, hfs, path, hfileContext);
132     this.fsBlockReader = fsBlockReaderV2; // upcast
133 
134     // Comparator class name is stored in the trailer in version 2.
135     comparator = trailer.createComparator();
136     dataBlockIndexReader = new HFileBlockIndex.BlockIndexReader(comparator,
137         trailer.getNumDataIndexLevels(), this);
138     metaBlockIndexReader = new HFileBlockIndex.BlockIndexReader(
139         KeyValue.RAW_COMPARATOR, 1);
140 
141     // Parse load-on-open data.
142 
143     HFileBlock.BlockIterator blockIter = fsBlockReaderV2.blockRange(
144         trailer.getLoadOnOpenDataOffset(),
145         fileSize - trailer.getTrailerSize());
146 
147     // Data index. We also read statistics about the block index written after
148     // the root level.
149     dataBlockIndexReader.readMultiLevelIndexRoot(
150         blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
151         trailer.getDataIndexCount());
152 
153     // Meta index.
154     metaBlockIndexReader.readRootIndex(
155         blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
156         trailer.getMetaIndexCount());
157 
158     // File info
159     fileInfo = new FileInfo();
160     fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
161     byte[] creationTimeBytes = fileInfo.get(FileInfo.CREATE_TIME_TS);
162     this.hfileContext.setFileCreateTime(creationTimeBytes == null? 0:
163       Bytes.toLong(creationTimeBytes));
164     lastKey = fileInfo.get(FileInfo.LASTKEY);
165     avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN));
166     avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN));
167     byte [] keyValueFormatVersion =
168         fileInfo.get(HFileWriterV2.KEY_VALUE_VERSION);
169     includesMemstoreTS = keyValueFormatVersion != null &&
170         Bytes.toInt(keyValueFormatVersion) ==
171             HFileWriterV2.KEY_VALUE_VER_WITH_MEMSTORE;
172     fsBlockReaderV2.setIncludesMemstoreTS(includesMemstoreTS);
173     if (includesMemstoreTS) {
174       decodeMemstoreTS = Bytes.toLong(fileInfo.get(HFileWriterV2.MAX_MEMSTORE_TS_KEY)) > 0;
175     }
176 
177     // Read data block encoding algorithm name from file info.
178     dataBlockEncoder = HFileDataBlockEncoderImpl.createFromFileInfo(fileInfo);
179     fsBlockReaderV2.setDataBlockEncoder(dataBlockEncoder);
180 
181     // Store all other load-on-open blocks for further consumption.
182     HFileBlock b;
183     while ((b = blockIter.nextBlock()) != null) {
184       loadOnOpenBlocks.add(b);
185     }
186 
187     // Prefetch file blocks upon open if requested
188     if (cacheConf.shouldPrefetchOnOpen()) {
189       PrefetchExecutor.request(path, new Runnable() {
190         public void run() {
191           try {
192             long offset = 0;
193             long end = fileSize - getTrailer().getTrailerSize();
194             HFileBlock prevBlock = null;
195             while (offset < end) {
196               if (Thread.interrupted()) {
197                 break;
198               }
199               long onDiskSize = -1;
200               if (prevBlock != null) {
201                 onDiskSize = prevBlock.getNextBlockOnDiskSizeWithHeader();
202               }
203               HFileBlock block = readBlock(offset, onDiskSize, true, false, false, false,
204                 null, null);
205               prevBlock = block;
206               offset += block.getOnDiskSizeWithHeader();
207             }
208           } catch (IOException e) {
209             // IOExceptions are probably due to region closes (relocation, etc.)
210             if (LOG.isTraceEnabled()) {
211               LOG.trace("Exception encountered while prefetching " + path + ":", e);
212             }
213           } catch (Exception e) {
214             // Other exceptions are interesting
215             LOG.warn("Exception encountered while prefetching " + path + ":", e);
216           } finally {
217             PrefetchExecutor.complete(path);
218           }
219         }
220       });
221     }
222   }
223 
224   protected HFileContext createHFileContext(FSDataInputStreamWrapper fsdis, long fileSize,
225       HFileSystem hfs, Path path, FixedFileTrailer trailer) throws IOException {
226     return new HFileContextBuilder()
227       .withIncludesMvcc(this.includesMemstoreTS)
228       .withCompression(this.compressAlgo)
229       .withHBaseCheckSum(trailer.getMinorVersion() >= MINOR_VERSION_WITH_CHECKSUM)
230       .build();
231   }
232 
233   /**
234    * Create a Scanner on this file. No seeks or reads are done on creation. Call
235    * {@link HFileScanner#seekTo(byte[])} to position an start the read. There is
236    * nothing to clean up in a Scanner. Letting go of your references to the
237    * scanner is sufficient.
238    *
239    * @param cacheBlocks True if we should cache blocks read in by this scanner.
240    * @param pread Use positional read rather than seek+read if true (pread is
241    *          better for random reads, seek+read is better scanning).
242    * @param isCompaction is scanner being used for a compaction?
243    * @return Scanner on this file.
244    */
245    @Override
246    public HFileScanner getScanner(boolean cacheBlocks, final boolean pread,
247       final boolean isCompaction) {
248     if (dataBlockEncoder.useEncodedScanner()) {
249       return new EncodedScannerV2(this, cacheBlocks, pread, isCompaction,
250           hfileContext);
251     }
252 
253     return new ScannerV2(this, cacheBlocks, pread, isCompaction);
254   }
255 
256   /**
257    * Retrieve block from cache. Validates the retrieved block's type vs {@code expectedBlockType}
258    * and its encoding vs. {@code expectedDataBlockEncoding}. Unpacks the block as necessary.
259    */
260    private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock,
261        boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType,
262        DataBlockEncoding expectedDataBlockEncoding) throws IOException {
263      // Check cache for block. If found return.
264      if (cacheConf.isBlockCacheEnabled()) {
265        BlockCache cache = cacheConf.getBlockCache();
266        HFileBlock cachedBlock = (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock,
267          updateCacheMetrics);
268        if (cachedBlock != null) {
269          if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) {
270            cachedBlock = cachedBlock.unpack(hfileContext, fsBlockReader);
271          }
272          validateBlockType(cachedBlock, expectedBlockType);
273 
274          if (expectedDataBlockEncoding == null) {
275            return cachedBlock;
276          }
277          DataBlockEncoding actualDataBlockEncoding =
278                  cachedBlock.getDataBlockEncoding();
279          // Block types other than data blocks always have
280          // DataBlockEncoding.NONE. To avoid false negative cache misses, only
281          // perform this check if cached block is a data block.
282          if (cachedBlock.getBlockType().isData() &&
283                  !actualDataBlockEncoding.equals(expectedDataBlockEncoding)) {
284            // This mismatch may happen if a ScannerV2, which is used for say a
285            // compaction, tries to read an encoded block from the block cache.
286            // The reverse might happen when an EncodedScannerV2 tries to read
287            // un-encoded blocks which were cached earlier.
288            //
289            // Because returning a data block with an implicit BlockType mismatch
290            // will cause the requesting scanner to throw a disk read should be
291            // forced here. This will potentially cause a significant number of
292            // cache misses, so update so we should keep track of this as it might
293            // justify the work on a CompoundScannerV2.
294            if (!expectedDataBlockEncoding.equals(DataBlockEncoding.NONE) &&
295                    !actualDataBlockEncoding.equals(DataBlockEncoding.NONE)) {
296              // If the block is encoded but the encoding does not match the
297              // expected encoding it is likely the encoding was changed but the
298              // block was not yet evicted. Evictions on file close happen async
299              // so blocks with the old encoding still linger in cache for some
300              // period of time. This event should be rare as it only happens on
301              // schema definition change.
302              LOG.info("Evicting cached block with key " + cacheKey +
303                      " because of a data block encoding mismatch" +
304                      "; expected: " + expectedDataBlockEncoding +
305                      ", actual: " + actualDataBlockEncoding);
306              cache.evictBlock(cacheKey);
307            }
308            return null;
309          }
310          return cachedBlock;
311        }
312      }
313      return null;
314    }
315   /**
316    * @param metaBlockName
317    * @param cacheBlock Add block to cache, if found
318    * @return block wrapped in a ByteBuffer, with header skipped
319    * @throws IOException
320    */
321   @Override
322   public ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock)
323       throws IOException {
324     if (trailer.getMetaIndexCount() == 0) {
325       return null; // there are no meta blocks
326     }
327     if (metaBlockIndexReader == null) {
328       throw new IOException("Meta index not loaded");
329     }
330 
331     byte[] mbname = Bytes.toBytes(metaBlockName);
332     int block = metaBlockIndexReader.rootBlockContainingKey(mbname,
333         0, mbname.length);
334     if (block == -1)
335       return null;
336     long blockSize = metaBlockIndexReader.getRootBlockDataSize(block);
337 
338     // Per meta key from any given file, synchronize reads for said block. This
339     // is OK to do for meta blocks because the meta block index is always
340     // single-level.
341     synchronized (metaBlockIndexReader.getRootBlockKey(block)) {
342       // Check cache for block. If found return.
343       long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
344       BlockCacheKey cacheKey = new BlockCacheKey(name, metaBlockOffset);
345 
346       cacheBlock &= cacheConf.shouldCacheDataOnRead();
347       if (cacheConf.isBlockCacheEnabled()) {
348         HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, false, true, true,
349           BlockType.META, null);
350         if (cachedBlock != null) {
351           assert cachedBlock.isUnpacked() : "Packed block leak.";
352           // Return a distinct 'shallow copy' of the block,
353           // so pos does not get messed by the scanner
354           return cachedBlock.getBufferWithoutHeader();
355         }
356         // Cache Miss, please load.
357       }
358 
359       HFileBlock metaBlock = fsBlockReader.readBlockData(metaBlockOffset,
360           blockSize, -1, true).unpack(hfileContext, fsBlockReader);
361 
362       // Cache the block
363       if (cacheBlock) {
364         cacheConf.getBlockCache().cacheBlock(cacheKey, metaBlock,
365             cacheConf.isInMemory(), this.cacheConf.isCacheDataInL1());
366       }
367 
368       return metaBlock.getBufferWithoutHeader();
369     }
370   }
371 
372   @Override
373   public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize,
374       final boolean cacheBlock, boolean pread, final boolean isCompaction,
375       boolean updateCacheMetrics, BlockType expectedBlockType,
376       DataBlockEncoding expectedDataBlockEncoding)
377       throws IOException {
378     if (dataBlockIndexReader == null) {
379       throw new IOException("Block index not loaded");
380     }
381     if (dataBlockOffset < 0 || dataBlockOffset >= trailer.getLoadOnOpenDataOffset()) {
382       throw new IOException("Requested block is out of range: " + dataBlockOffset +
383         ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset());
384     }
385 
386     // For any given block from any given file, synchronize reads for said block.
387     // Without a cache, this synchronizing is needless overhead, but really
388     // the other choice is to duplicate work (which the cache would prevent you
389     // from doing).
390     BlockCacheKey cacheKey = new BlockCacheKey(name, dataBlockOffset);
391     boolean useLock = false;
392     IdLock.Entry lockEntry = null;
393     TraceScope traceScope = Trace.startSpan("HFileReaderV2.readBlock");
394     try {
395       while (true) {
396         // Check cache for block. If found return.
397         if (cacheConf.shouldReadBlockFromCache(expectedBlockType)) {
398           if (useLock) {
399             lockEntry = offsetLock.getLockEntry(dataBlockOffset);
400           }
401           // Try and get the block from the block cache. If the useLock variable is true then this
402           // is the second time through the loop and it should not be counted as a block cache miss.
403           HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, useLock, isCompaction,
404             updateCacheMetrics, expectedBlockType, expectedDataBlockEncoding);
405           if (cachedBlock != null) {
406             if (Trace.isTracing()) {
407               traceScope.getSpan().addTimelineAnnotation("blockCacheHit");
408             }
409             assert cachedBlock.isUnpacked() : "Packed block leak.";
410             if (cachedBlock.getBlockType().isData()) {
411               if (updateCacheMetrics) {
412                 HFile.dataBlockReadCnt.incrementAndGet();
413               }
414               // Validate encoding type for data blocks. We include encoding
415               // type in the cache key, and we expect it to match on a cache hit.
416               if (cachedBlock.getDataBlockEncoding() != dataBlockEncoder.getDataBlockEncoding()) {
417                 throw new IOException("Cached block under key " + cacheKey + " "
418                   + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: "
419                   + dataBlockEncoder.getDataBlockEncoding() + ")");
420               }
421             }
422             // Cache-hit. Return!
423             return cachedBlock;
424           }
425           if (!useLock && cacheBlock && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) {
426             // check cache again with lock
427             useLock = true;
428             continue;
429           }
430           // Carry on, please load.
431         }
432 
433         if (Trace.isTracing()) {
434           traceScope.getSpan().addTimelineAnnotation("blockCacheMiss");
435         }
436         // Load block from filesystem.
437         HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, -1,
438             pread);
439         validateBlockType(hfileBlock, expectedBlockType);
440         HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader);
441         BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory();
442 
443         // Cache the block if necessary
444         if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) {
445           cacheConf.getBlockCache().cacheBlock(cacheKey,
446             cacheConf.shouldCacheCompressed(category) ? hfileBlock : unpacked,
447             cacheConf.isInMemory(), this.cacheConf.isCacheDataInL1());
448         }
449 
450         if (updateCacheMetrics && hfileBlock.getBlockType().isData()) {
451           HFile.dataBlockReadCnt.incrementAndGet();
452         }
453 
454         return unpacked;
455       }
456     } finally {
457       traceScope.close();
458       if (lockEntry != null) {
459         offsetLock.releaseLockEntry(lockEntry);
460       }
461     }
462   }
463 
464   @Override
465   public boolean hasMVCCInfo() {
466     return includesMemstoreTS && decodeMemstoreTS;
467   }
468 
469   /**
470    * Compares the actual type of a block retrieved from cache or disk with its
471    * expected type and throws an exception in case of a mismatch. Expected
472    * block type of {@link BlockType#DATA} is considered to match the actual
473    * block type [@link {@link BlockType#ENCODED_DATA} as well.
474    * @param block a block retrieved from cache or disk
475    * @param expectedBlockType the expected block type, or null to skip the
476    *          check
477    */
478   private void validateBlockType(HFileBlock block,
479       BlockType expectedBlockType) throws IOException {
480     if (expectedBlockType == null) {
481       return;
482     }
483     BlockType actualBlockType = block.getBlockType();
484     if (expectedBlockType.isData() && actualBlockType.isData()) {
485       // We consider DATA to match ENCODED_DATA for the purpose of this
486       // verification.
487       return;
488     }
489     if (actualBlockType != expectedBlockType) {
490       throw new IOException("Expected block type " + expectedBlockType + ", " +
491           "but got " + actualBlockType + ": " + block);
492     }
493   }
494 
495   /**
496    * @return Last key in the file. May be null if file has no entries. Note that
497    *         this is not the last row key, but rather the byte form of the last
498    *         KeyValue.
499    */
500   @Override
501   public byte[] getLastKey() {
502     return dataBlockIndexReader.isEmpty() ? null : lastKey;
503   }
504 
505   /**
506    * @return Midkey for this file. We work with block boundaries only so
507    *         returned midkey is an approximation only.
508    * @throws IOException
509    */
510   @Override
511   public byte[] midkey() throws IOException {
512     return dataBlockIndexReader.midkey();
513   }
514 
515   @Override
516   public void close() throws IOException {
517     close(cacheConf.shouldEvictOnClose());
518   }
519 
520   public void close(boolean evictOnClose) throws IOException {
521     PrefetchExecutor.cancel(path);
522     if (evictOnClose && cacheConf.isBlockCacheEnabled()) {
523       int numEvicted = cacheConf.getBlockCache().evictBlocksByHfileName(name);
524       if (LOG.isTraceEnabled()) {
525         LOG.trace("On close, file=" + name + " evicted=" + numEvicted
526           + " block(s)");
527       }
528     }
529     fsBlockReader.closeStreams();
530   }
531 
532   public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) {
533     return dataBlockEncoder.getEffectiveEncodingInCache(isCompaction);
534   }
535 
536   /** For testing */
537   @Override
538   HFileBlock.FSReader getUncachedBlockReader() {
539     return fsBlockReader;
540   }
541 
542 
543   protected abstract static class AbstractScannerV2
544       extends AbstractHFileReader.Scanner {
545     protected HFileBlock block;
546 
547     @Override
548     public Cell getNextIndexedKey() {
549       return nextIndexedKey;
550     }
551     /**
552      * The next indexed key is to keep track of the indexed key of the next data block.
553      * If the nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the
554      * current data block is the last data block.
555      *
556      * If the nextIndexedKey is null, it means the nextIndexedKey has not been loaded yet.
557      */
558     protected Cell nextIndexedKey;
559 
560     public AbstractScannerV2(HFileReaderV2 r, boolean cacheBlocks,
561         final boolean pread, final boolean isCompaction) {
562       super(r, cacheBlocks, pread, isCompaction);
563     }
564 
565     protected abstract ByteBuffer getFirstKeyInBlock(HFileBlock curBlock);
566 
567     protected abstract int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
568         boolean rewind, Cell key, boolean seekBefore) throws IOException;
569 
570     @Override
571     public int seekTo(byte[] key, int offset, int length) throws IOException {
572       // Always rewind to the first key of the block, because the given key
573       // might be before or after the current key.
574       return seekTo(new KeyValue.KeyOnlyKeyValue(key, offset, length));
575     }
576 
577     @Override
578     public int reseekTo(byte[] key, int offset, int length) throws IOException {
579       return reseekTo(new KeyValue.KeyOnlyKeyValue(key, offset, length));
580     }
581 
582     @Override
583     public int seekTo(Cell key) throws IOException {
584       return seekTo(key, true);
585     }
586 
587     @Override
588     public int reseekTo(Cell key) throws IOException {
589       int compared;
590       if (isSeeked()) {
591         compared = compareKey(reader.getComparator(), key);
592         if (compared < 1) {
593           // If the required key is less than or equal to current key, then
594           // don't do anything.
595           return compared;
596         } else {
597           // The comparison with no_next_index_key has to be checked
598           if (this.nextIndexedKey != null &&
599               (this.nextIndexedKey == HConstants.NO_NEXT_INDEXED_KEY || reader
600               .getComparator()
601                   .compareOnlyKeyPortion(key, nextIndexedKey) < 0)) {
602             // The reader shall continue to scan the current data block instead
603             // of querying the
604             // block index as long as it knows the target key is strictly
605             // smaller than
606             // the next indexed key or the current data block is the last data
607             // block.
608             return loadBlockAndSeekToKey(this.block, nextIndexedKey, false, key, false);
609           }
610         }
611       }
612       // Don't rewind on a reseek operation, because reseek implies that we are
613       // always going forward in the file.
614       return seekTo(key, false);
615     }
616 
617 
618     /**
619      * An internal API function. Seek to the given key, optionally rewinding to
620      * the first key of the block before doing the seek.
621      *
622      * @param key - a cell representing the key that we need to fetch
623      * @param rewind whether to rewind to the first key of the block before
624      *        doing the seek. If this is false, we are assuming we never go
625      *        back, otherwise the result is undefined.
626      * @return -1 if the key is earlier than the first key of the file,
627      *         0 if we are at the given key, 1 if we are past the given key
628      *         -2 if the key is earlier than the first key of the file while
629      *         using a faked index key
630      * @throws IOException
631      */
632     public int seekTo(Cell key, boolean rewind) throws IOException {
633       HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader();
634       BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, block,
635           cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding());
636       if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) {
637         // This happens if the key e.g. falls before the beginning of the file.
638         return -1;
639       }
640       return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(),
641           blockWithScanInfo.getNextIndexedKey(), rewind, key, false);
642     }
643 
644     @Override
645     public boolean seekBefore(byte[] key, int offset, int length) throws IOException {
646       return seekBefore(new KeyValue.KeyOnlyKeyValue(key, offset, length));
647     }
648 
649     @Override
650     public boolean seekBefore(Cell key) throws IOException {
651       HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, block,
652           cacheBlocks, pread, isCompaction,
653           ((HFileReaderV2) reader).getEffectiveEncodingInCache(isCompaction));
654       if (seekToBlock == null) {
655         return false;
656       }
657       ByteBuffer firstKey = getFirstKeyInBlock(seekToBlock);
658 
659       if (reader.getComparator()
660           .compareOnlyKeyPortion(
661               new KeyValue.KeyOnlyKeyValue(firstKey.array(), firstKey.arrayOffset(),
662                   firstKey.limit()), key) >= 0) {
663         long previousBlockOffset = seekToBlock.getPrevBlockOffset();
664         // The key we are interested in
665         if (previousBlockOffset == -1) {
666           // we have a 'problem', the key we want is the first of the file.
667           return false;
668         }
669 
670         // It is important that we compute and pass onDiskSize to the block
671         // reader so that it does not have to read the header separately to
672         // figure out the size.  Currently, we do not have a way to do this
673         // correctly in the general case however.
674         // TODO: See https://issues.apache.org/jira/browse/HBASE-14576
675         int prevBlockSize = -1;
676         seekToBlock = reader.readBlock(previousBlockOffset,
677             prevBlockSize, cacheBlocks,
678             pread, isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
679         // TODO shortcut: seek forward in this block to the last key of the
680         // block.
681       }
682       Cell firstKeyInCurrentBlock = new KeyValue.KeyOnlyKeyValue(Bytes.getBytes(firstKey));
683       loadBlockAndSeekToKey(seekToBlock, firstKeyInCurrentBlock, true, key, true);
684       return true;
685     }
686 
687     /**
688      * Scans blocks in the "scanned" section of the {@link HFile} until the next
689      * data block is found.
690      *
691      * @return the next block, or null if there are no more data blocks
692      * @throws IOException
693      */
694     protected HFileBlock readNextDataBlock() throws IOException {
695       long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
696       if (block == null)
697         return null;
698 
699       HFileBlock curBlock = block;
700 
701       do {
702         if (curBlock.getOffset() >= lastDataBlockOffset)
703           return null;
704 
705         if (curBlock.getOffset() < 0) {
706           throw new IOException("Invalid block file offset: " + block);
707         }
708 
709         // We are reading the next block without block type validation, because
710         // it might turn out to be a non-data block.
711         curBlock = reader.readBlock(curBlock.getOffset()
712             + curBlock.getOnDiskSizeWithHeader(),
713             curBlock.getNextBlockOnDiskSizeWithHeader(), cacheBlocks, pread,
714             isCompaction, true, null, getEffectiveDataBlockEncoding());
715       } while (!curBlock.getBlockType().isData());
716 
717       return curBlock;
718     }
719 
720     public DataBlockEncoding getEffectiveDataBlockEncoding() {
721       return ((HFileReaderV2)reader).getEffectiveEncodingInCache(isCompaction);
722     }
723     /**
724      * Compare the given key against the current key
725      * @param comparator
726      * @param key
727      * @param offset
728      * @param length
729      * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater
730      */
731     public abstract int compareKey(KVComparator comparator, byte[] key, int offset,
732         int length);
733 
734     public abstract int compareKey(KVComparator comparator, Cell kv);
735   }
736 
737   /**
738    * Implementation of {@link HFileScanner} interface.
739    */
740   protected static class ScannerV2 extends AbstractScannerV2 {
741     private HFileReaderV2 reader;
742 
743     public ScannerV2(HFileReaderV2 r, boolean cacheBlocks,
744         final boolean pread, final boolean isCompaction) {
745       super(r, cacheBlocks, pread, isCompaction);
746       this.reader = r;
747     }
748 
749     @Override
750     public Cell getKeyValue() {
751       if (!isSeeked())
752         return null;
753 
754       return formNoTagsKeyValue();
755     }
756 
757     protected Cell formNoTagsKeyValue() {
758       NoTagsKeyValue ret = new NoTagsKeyValue(blockBuffer.array(), blockBuffer.arrayOffset()
759           + blockBuffer.position(), getCellBufSize());
760       if (this.reader.shouldIncludeMemstoreTS()) {
761         ret.setSequenceId(currMemstoreTS);
762       }
763       return ret;
764     }
765 
766     protected int getCellBufSize() {
767       return KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen;
768     }
769 
770     @Override
771     public ByteBuffer getKey() {
772       assertSeeked();
773       return ByteBuffer.wrap(
774           blockBuffer.array(),
775           blockBuffer.arrayOffset() + blockBuffer.position()
776               + KEY_VALUE_LEN_SIZE, currKeyLen).slice();
777     }
778 
779     @Override
780     public int compareKey(KVComparator comparator, byte[] key, int offset, int length) {
781       return comparator.compareFlatKey(key, offset, length, blockBuffer.array(),
782           blockBuffer.arrayOffset() + blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen);
783     }
784 
785     @Override
786     public ByteBuffer getValue() {
787       assertSeeked();
788       return ByteBuffer.wrap(
789           blockBuffer.array(),
790           blockBuffer.arrayOffset() + blockBuffer.position()
791               + KEY_VALUE_LEN_SIZE + currKeyLen, currValueLen).slice();
792     }
793 
794     protected void setNonSeekedState() {
795       block = null;
796       blockBuffer = null;
797       currKeyLen = 0;
798       currValueLen = 0;
799       currMemstoreTS = 0;
800       currMemstoreTSLen = 0;
801     }
802 
803     /**
804      * Set the position on current backing blockBuffer.
805      */
806     private void positionThisBlockBuffer() {
807       try {
808         blockBuffer.position(getNextCellStartPosition());
809       } catch (IllegalArgumentException e) {
810         LOG.error("Current pos = " + blockBuffer.position()
811             + "; currKeyLen = " + currKeyLen + "; currValLen = "
812             + currValueLen + "; block limit = " + blockBuffer.limit()
813             + "; HFile name = " + reader.getName()
814             + "; currBlock currBlockOffset = " + block.getOffset());
815         throw e;
816       }
817     }
818 
819     /**
820      * Set our selves up for the next 'next' invocation, set up next block.
821      * @return True is more to read else false if at the end.
822      * @throws IOException
823      */
824     private boolean positionForNextBlock() throws IOException {
825       // Methods are small so they get inlined because they are 'hot'.
826       long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
827       if (block.getOffset() >= lastDataBlockOffset) {
828         setNonSeekedState();
829         return false;
830       }
831       return isNextBlock();
832     }
833 
834     private boolean isNextBlock() throws IOException {
835       // Methods are small so they get inlined because they are 'hot'.
836       HFileBlock nextBlock = readNextDataBlock();
837       if (nextBlock == null) {
838         setNonSeekedState();
839         return false;
840       }
841       updateCurrBlock(nextBlock);
842       return true;
843     }
844 
845     private final boolean _next() throws IOException {
846       // Small method so can be inlined. It is a hot one.
847       if (blockBuffer.remaining() <= 0) {
848         return positionForNextBlock();
849       }
850       // We are still in the same block.
851       readKeyValueLen();
852       return true;
853     }
854 
855     /**
856      * Go to the next key/value in the block section. Loads the next block if
857      * necessary. If successful, {@link #getKey()} and {@link #getValue()} can
858      * be called.
859      *
860      * @return true if successfully navigated to the next key/value
861      */
862     @Override
863     public boolean next() throws IOException {
864       // This is a hot method so extreme measures taken to ensure it is small and inlineable.
865       // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation
866       assertSeeked();
867       positionThisBlockBuffer();
868       return _next();
869     }
870 
871     protected int getNextCellStartPosition() {
872       return blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen
873           + currMemstoreTSLen;
874     }
875 
876     /**
877      * Positions this scanner at the start of the file.
878      *
879      * @return false if empty file; i.e. a call to next would return false and
880      *         the current key and value are undefined.
881      * @throws IOException
882      */
883     @Override
884     public boolean seekTo() throws IOException {
885       if (reader == null) {
886         return false;
887       }
888 
889       if (reader.getTrailer().getEntryCount() == 0) {
890         // No data blocks.
891         return false;
892       }
893 
894       long firstDataBlockOffset =
895           reader.getTrailer().getFirstDataBlockOffset();
896       if (block != null && block.getOffset() == firstDataBlockOffset) {
897         blockBuffer.rewind();
898         readKeyValueLen();
899         return true;
900       }
901 
902       block = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
903           isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
904       if (block.getOffset() < 0) {
905         throw new IOException("Invalid block offset: " + block.getOffset());
906       }
907       updateCurrBlock(block);
908       return true;
909     }
910 
911     @Override
912     protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
913         boolean rewind, Cell key, boolean seekBefore) throws IOException {
914       if (block == null || block.getOffset() != seekToBlock.getOffset()) {
915         updateCurrBlock(seekToBlock);
916       } else if (rewind) {
917         blockBuffer.rewind();
918       }
919 
920       // Update the nextIndexedKey
921       this.nextIndexedKey = nextIndexedKey;
922       return blockSeek(key, seekBefore);
923     }
924 
925     /**
926      * Updates the current block to be the given {@link HFileBlock}. Seeks to
927      * the the first key/value pair.
928      *
929      * @param newBlock the block to make current
930      */
931     protected void updateCurrBlock(HFileBlock newBlock) {
932       block = newBlock;
933 
934       // sanity check
935       if (block.getBlockType() != BlockType.DATA) {
936         throw new IllegalStateException("ScannerV2 works only on data " +
937             "blocks, got " + block.getBlockType() + "; " +
938             "fileName=" + reader.name + ", " +
939             "dataBlockEncoder=" + reader.dataBlockEncoder + ", " +
940             "isCompaction=" + isCompaction);
941       }
942 
943       blockBuffer = block.getBufferWithoutHeader();
944       readKeyValueLen();
945       blockFetches++;
946 
947       // Reset the next indexed key
948       this.nextIndexedKey = null;
949     }
950 
951     /**
952      * @param v
953      * @return True if v < 0 or v > current block buffer limit.
954      */
955     protected final boolean checkLen(final int v) {
956       return v < 0 || v > this.blockBuffer.limit();
957     }
958 
959     /**
960      * Check key and value lengths are wholesome.
961      */
962     protected final void checkKeyValueLen() {
963       if (checkLen(this.currKeyLen) || checkLen(this.currValueLen)) {
964         throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen +
965           " or currValueLen " + this.currValueLen + ". Block offset: " + block.getOffset() +
966           ", block length: " + this.blockBuffer.limit() + ", position: " +
967            this.blockBuffer.position() + " (without header).");
968       }
969     }
970 
971     protected void readKeyValueLen() {
972       // TODO: METHOD (mostly) DUPLICATED IN V3!!!!! FIXED in master branch by collapsing v3 and v2.
973       // This is a hot method. We go out of our way to make this method short so it can be
974       // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves
975       // because it is faster than going via range-checked ByteBuffer methods or going through a
976       // byte buffer array a byte at a time.
977       int p = blockBuffer.position() + blockBuffer.arrayOffset();
978       // Get a long at a time rather than read two individual ints. In micro-benchmarking, even
979       // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints.
980       long ll = Bytes.toLong(blockBuffer.array(), p);
981       // Read top half as an int of key length and bottom int as value length
982       this.currKeyLen = (int)(ll >> Integer.SIZE);
983       this.currValueLen = (int)(Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
984       checkKeyValueLen();
985       // Move position past the key and value lengths and then beyond the key and value
986       p += (Bytes.SIZEOF_LONG + currKeyLen + currValueLen);
987       readMvccVersion(p);
988     }
989 
990     /**
991      * Read mvcc. Does checks to see if we even need to read the mvcc at all.
992      * @param position
993      */
994     protected void readMvccVersion(final int position) {
995       // See if we even need to decode mvcc.
996       if (!this.reader.shouldIncludeMemstoreTS()) return;
997       if (!this.reader.decodeMemstoreTS) {
998         currMemstoreTS = 0;
999         currMemstoreTSLen = 1;
1000         return;
1001       }
1002       _readMvccVersion(position);
1003     }
1004 
1005     /**
1006      * Actually do the mvcc read. Does no checks.
1007      * @param position
1008      */
1009     private void _readMvccVersion(final int position) {
1010       // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e.
1011       // previous if one-byte vint, we'd redo the vint call to find int size.
1012       // Also the method is kept small so can be inlined.
1013       byte firstByte = blockBuffer.array()[position];
1014       int len = WritableUtils.decodeVIntSize(firstByte);
1015       if (len == 1) {
1016         this.currMemstoreTS = firstByte;
1017       } else {
1018         long i = 0;
1019         for (int idx = 0; idx < len - 1; idx++) {
1020           byte b = blockBuffer.array()[position + 1 + idx];
1021           i = i << 8;
1022           i = i | (b & 0xFF);
1023         }
1024         currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
1025       }
1026       this.currMemstoreTSLen = len;
1027     }
1028 
1029     protected void readMvccVersion() {
1030       // TODO CLEANUP!!!
1031       readMvccVersion(blockBuffer.arrayOffset() + blockBuffer.position());
1032     }
1033 
1034     /**
1035      * Within a loaded block, seek looking for the last key that is smaller than
1036      * (or equal to?) the key we are interested in.
1037      *
1038      * A note on the seekBefore: if you have seekBefore = true, AND the first
1039      * key in the block = key, then you'll get thrown exceptions. The caller has
1040      * to check for that case and load the previous block as appropriate.
1041      *
1042      * @param key
1043      *          the key to find
1044      * @param seekBefore
1045      *          find the key before the given key in case of exact match.
1046      * @return 0 in case of an exact key match, 1 in case of an inexact match,
1047      *         -2 in case of an inexact match and furthermore, the input key
1048      *         less than the first key of current block(e.g. using a faked index
1049      *         key)
1050      */
1051     protected int blockSeek(Cell key, boolean seekBefore) {
1052       int klen, vlen;
1053       long memstoreTS = 0;
1054       int memstoreTSLen = 0;
1055       int lastKeyValueSize = -1;
1056       KeyValue.KeyOnlyKeyValue keyOnlykv = new KeyValue.KeyOnlyKeyValue();
1057       do {
1058         blockBuffer.mark();
1059         klen = blockBuffer.getInt();
1060         vlen = blockBuffer.getInt();
1061         blockBuffer.reset();
1062         if (this.reader.shouldIncludeMemstoreTS()) {
1063           if (this.reader.decodeMemstoreTS) {
1064             int memstoreTSOffset = blockBuffer.arrayOffset() + blockBuffer.position()
1065                 + KEY_VALUE_LEN_SIZE + klen + vlen;
1066             memstoreTS = Bytes.readAsVLong(blockBuffer.array(), memstoreTSOffset);
1067             memstoreTSLen = WritableUtils.getVIntSize(memstoreTS);
1068           } else {
1069             memstoreTS = 0;
1070             memstoreTSLen = 1;
1071           }
1072         }
1073 
1074         int keyOffset = blockBuffer.arrayOffset() + blockBuffer.position() + KEY_VALUE_LEN_SIZE;
1075         keyOnlykv.setKey(blockBuffer.array(), keyOffset, klen);
1076         int comp = reader.getComparator().compareOnlyKeyPortion(key, keyOnlykv);
1077 
1078         if (comp == 0) {
1079           if (seekBefore) {
1080             if (lastKeyValueSize < 0) {
1081               throw new IllegalStateException("blockSeek with seekBefore "
1082                   + "at the first key of the block: key="
1083                   + CellUtil.getCellKeyAsString(key)
1084                   + ", blockOffset=" + block.getOffset() + ", onDiskSize="
1085                   + block.getOnDiskSizeWithHeader());
1086             }
1087             blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
1088             readKeyValueLen();
1089             return 1; // non exact match.
1090           }
1091           currKeyLen = klen;
1092           currValueLen = vlen;
1093           if (this.reader.shouldIncludeMemstoreTS()) {
1094             currMemstoreTS = memstoreTS;
1095             currMemstoreTSLen = memstoreTSLen;
1096           }
1097           return 0; // indicate exact match
1098         } else if (comp < 0) {
1099           if (lastKeyValueSize > 0)
1100             blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
1101           readKeyValueLen();
1102           if (lastKeyValueSize == -1 && blockBuffer.position() == 0
1103               && this.reader.trailer.getMinorVersion() >= MINOR_VERSION_WITH_FAKED_KEY) {
1104             return HConstants.INDEX_KEY_MAGIC;
1105           }
1106           return 1;
1107         }
1108 
1109         // The size of this key/value tuple, including key/value length fields.
1110         lastKeyValueSize = klen + vlen + memstoreTSLen + KEY_VALUE_LEN_SIZE;
1111         blockBuffer.position(blockBuffer.position() + lastKeyValueSize);
1112       } while (blockBuffer.remaining() > 0);
1113 
1114       // Seek to the last key we successfully read. This will happen if this is
1115       // the last key/value pair in the file, in which case the following call
1116       // to next() has to return false.
1117       blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
1118       readKeyValueLen();
1119       return 1; // didn't exactly find it.
1120     }
1121 
1122     @Override
1123     protected ByteBuffer getFirstKeyInBlock(HFileBlock curBlock) {
1124       ByteBuffer buffer = curBlock.getBufferWithoutHeader();
1125       // It is safe to manipulate this buffer because we own the buffer object.
1126       buffer.rewind();
1127       int klen = buffer.getInt();
1128       buffer.getInt();
1129       ByteBuffer keyBuff = buffer.slice();
1130       keyBuff.limit(klen);
1131       keyBuff.rewind();
1132       return keyBuff;
1133     }
1134 
1135     @Override
1136     public String getKeyString() {
1137       return Bytes.toStringBinary(blockBuffer.array(),
1138           blockBuffer.arrayOffset() + blockBuffer.position()
1139               + KEY_VALUE_LEN_SIZE, currKeyLen);
1140     }
1141 
1142     @Override
1143     public String getValueString() {
1144       return Bytes.toString(blockBuffer.array(), blockBuffer.arrayOffset()
1145           + blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen,
1146           currValueLen);
1147     }
1148 
1149     @Override
1150     public int compareKey(KVComparator comparator, Cell key) {
1151       return comparator.compareOnlyKeyPortion(
1152           key,
1153           new KeyValue.KeyOnlyKeyValue(blockBuffer.array(), blockBuffer.arrayOffset()
1154               + blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen));
1155     }
1156   }
1157 
1158   /**
1159    * ScannerV2 that operates on encoded data blocks.
1160    */
1161   protected static class EncodedScannerV2 extends AbstractScannerV2 {
1162     private final HFileBlockDecodingContext decodingCtx;
1163     private final DataBlockEncoder.EncodedSeeker seeker;
1164     private final DataBlockEncoder dataBlockEncoder;
1165     protected final HFileContext meta;
1166 
1167     public EncodedScannerV2(HFileReaderV2 reader, boolean cacheBlocks,
1168         boolean pread, boolean isCompaction, HFileContext meta) {
1169       super(reader, cacheBlocks, pread, isCompaction);
1170       DataBlockEncoding encoding = reader.dataBlockEncoder.getDataBlockEncoding();
1171       dataBlockEncoder = encoding.getEncoder();
1172       decodingCtx = dataBlockEncoder.newDataBlockDecodingContext(meta);
1173       seeker = dataBlockEncoder.createSeeker(
1174         reader.getComparator(), decodingCtx);
1175       this.meta = meta;
1176     }
1177 
1178     @Override
1179     public boolean isSeeked(){
1180       return this.block != null;
1181     }
1182 
1183     /**
1184      * Updates the current block to be the given {@link HFileBlock}. Seeks to
1185      * the the first key/value pair.
1186      *
1187      * @param newBlock the block to make current
1188      * @throws CorruptHFileException
1189      */
1190     private void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException {
1191       block = newBlock;
1192 
1193       // sanity checks
1194       if (block.getBlockType() != BlockType.ENCODED_DATA) {
1195         throw new IllegalStateException(
1196             "EncodedScanner works only on encoded data blocks");
1197       }
1198       short dataBlockEncoderId = block.getDataBlockEncodingId();
1199       if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) {
1200         String encoderCls = dataBlockEncoder.getClass().getName();
1201         throw new CorruptHFileException("Encoder " + encoderCls
1202           + " doesn't support data block encoding "
1203           + DataBlockEncoding.getNameFromId(dataBlockEncoderId));
1204       }
1205 
1206       seeker.setCurrentBuffer(getEncodedBuffer(newBlock));
1207       blockFetches++;
1208 
1209       // Reset the next indexed key
1210       this.nextIndexedKey = null;
1211     }
1212 
1213     private ByteBuffer getEncodedBuffer(HFileBlock newBlock) {
1214       ByteBuffer origBlock = newBlock.getBufferReadOnly();
1215       ByteBuffer encodedBlock = ByteBuffer.wrap(origBlock.array(),
1216           origBlock.arrayOffset() + newBlock.headerSize() +
1217           DataBlockEncoding.ID_SIZE,
1218           newBlock.getUncompressedSizeWithoutHeader() -
1219           DataBlockEncoding.ID_SIZE).slice();
1220       return encodedBlock;
1221     }
1222 
1223     @Override
1224     public boolean seekTo() throws IOException {
1225       if (reader == null) {
1226         return false;
1227       }
1228 
1229       if (reader.getTrailer().getEntryCount() == 0) {
1230         // No data blocks.
1231         return false;
1232       }
1233 
1234       long firstDataBlockOffset =
1235           reader.getTrailer().getFirstDataBlockOffset();
1236       if (block != null && block.getOffset() == firstDataBlockOffset) {
1237         seeker.rewind();
1238         return true;
1239       }
1240 
1241       block = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
1242           isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
1243       if (block.getOffset() < 0) {
1244         throw new IOException("Invalid block offset: " + block.getOffset());
1245       }
1246       updateCurrentBlock(block);
1247       return true;
1248     }
1249 
1250     @Override
1251     public boolean next() throws IOException {
1252       boolean isValid = seeker.next();
1253       if (!isValid) {
1254         block = readNextDataBlock();
1255         isValid = block != null;
1256         if (isValid) {
1257           updateCurrentBlock(block);
1258         }
1259       }
1260       return isValid;
1261     }
1262 
1263     @Override
1264     public ByteBuffer getKey() {
1265       assertValidSeek();
1266       return seeker.getKeyDeepCopy();
1267     }
1268 
1269     @Override
1270     public int compareKey(KVComparator comparator, byte[] key, int offset, int length) {
1271       return seeker.compareKey(comparator, key, offset, length);
1272     }
1273 
1274     @Override
1275     public ByteBuffer getValue() {
1276       assertValidSeek();
1277       return seeker.getValueShallowCopy();
1278     }
1279 
1280     @Override
1281     public Cell getKeyValue() {
1282       if (block == null) {
1283         return null;
1284       }
1285       return seeker.getKeyValue();
1286     }
1287 
1288     @Override
1289     public String getKeyString() {
1290       ByteBuffer keyBuffer = getKey();
1291       return Bytes.toStringBinary(keyBuffer.array(),
1292           keyBuffer.arrayOffset(), keyBuffer.limit());
1293     }
1294 
1295     @Override
1296     public String getValueString() {
1297       ByteBuffer valueBuffer = getValue();
1298       return Bytes.toStringBinary(valueBuffer.array(),
1299           valueBuffer.arrayOffset(), valueBuffer.limit());
1300     }
1301 
1302     private void assertValidSeek() {
1303       if (block == null) {
1304         throw new NotSeekedException();
1305       }
1306     }
1307 
1308     @Override
1309     protected ByteBuffer getFirstKeyInBlock(HFileBlock curBlock) {
1310       return dataBlockEncoder.getFirstKeyInBlock(getEncodedBuffer(curBlock));
1311     }
1312 
1313     @Override
1314     protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
1315         boolean rewind, Cell key, boolean seekBefore) throws IOException {
1316       if (block == null || block.getOffset() != seekToBlock.getOffset()) {
1317         updateCurrentBlock(seekToBlock);
1318       } else if (rewind) {
1319         seeker.rewind();
1320       }
1321       this.nextIndexedKey = nextIndexedKey;
1322       return seeker.seekToKeyInBlock(key, seekBefore);
1323     }
1324 
1325     @Override
1326     public int compareKey(KVComparator comparator, Cell key) {
1327       return seeker.compareKey(comparator, key);
1328     }
1329   }
1330 
1331   /**
1332    * Returns a buffer with the Bloom filter metadata. The caller takes
1333    * ownership of the buffer.
1334    */
1335   @Override
1336   public DataInput getGeneralBloomFilterMetadata() throws IOException {
1337     return this.getBloomFilterMetadata(BlockType.GENERAL_BLOOM_META);
1338   }
1339 
1340   @Override
1341   public DataInput getDeleteBloomFilterMetadata() throws IOException {
1342     return this.getBloomFilterMetadata(BlockType.DELETE_FAMILY_BLOOM_META);
1343   }
1344 
1345   private DataInput getBloomFilterMetadata(BlockType blockType)
1346   throws IOException {
1347     if (blockType != BlockType.GENERAL_BLOOM_META &&
1348         blockType != BlockType.DELETE_FAMILY_BLOOM_META) {
1349       throw new RuntimeException("Block Type: " + blockType.toString() +
1350           " is not supported") ;
1351     }
1352 
1353     for (HFileBlock b : loadOnOpenBlocks)
1354       if (b.getBlockType() == blockType)
1355         return b.getByteStream();
1356     return null;
1357   }
1358 
1359   @Override
1360   public boolean isFileInfoLoaded() {
1361     return true; // We load file info in constructor in version 2.
1362   }
1363 
1364   /**
1365    * Validates that the minor version is within acceptable limits.
1366    * Otherwise throws an Runtime exception
1367    */
1368   private void validateMinorVersion(Path path, int minorVersion) {
1369     if (minorVersion < MIN_MINOR_VERSION ||
1370         minorVersion > MAX_MINOR_VERSION) {
1371       String msg = "Minor version for path " + path +
1372                    " is expected to be between " +
1373                    MIN_MINOR_VERSION + " and " + MAX_MINOR_VERSION +
1374                    " but is found to be " + minorVersion;
1375       LOG.error(msg);
1376       throw new RuntimeException(msg);
1377     }
1378   }
1379 
1380   @Override
1381   public int getMajorVersion() {
1382     return 2;
1383   }
1384 
1385   @Override
1386   public HFileContext getFileContext() {
1387     return hfileContext;
1388   }
1389 
1390   /**
1391    * Returns false if block prefetching was requested for this file and has
1392    * not completed, true otherwise
1393    */
1394   @VisibleForTesting
1395   boolean prefetchComplete() {
1396     return PrefetchExecutor.isCompleted(path);
1397   }
1398 }