001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import java.io.DataInput;
021import java.io.IOException;
022import java.nio.ByteBuffer;
023import java.util.ArrayList;
024import java.util.Optional;
025import org.apache.hadoop.conf.Configurable;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue;
029import org.apache.hadoop.hbase.ByteBufferKeyValue;
030import org.apache.hadoop.hbase.Cell;
031import org.apache.hadoop.hbase.CellComparator;
032import org.apache.hadoop.hbase.CellUtil;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.KeyValue;
035import org.apache.hadoop.hbase.NoTagsByteBufferKeyValue;
036import org.apache.hadoop.hbase.PrivateCellUtil;
037import org.apache.hadoop.hbase.SizeCachedKeyValue;
038import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue;
039import org.apache.hadoop.hbase.io.compress.Compression;
040import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
041import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
042import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
043import org.apache.hadoop.hbase.nio.ByteBuff;
044import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
045import org.apache.hadoop.hbase.trace.TraceUtil;
046import org.apache.hadoop.hbase.util.ByteBufferUtils;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.IdLock;
049import org.apache.hadoop.hbase.util.ObjectIntPair;
050import org.apache.hadoop.io.WritableUtils;
051import org.apache.htrace.core.TraceScope;
052import org.apache.yetus.audience.InterfaceAudience;
053import org.slf4j.Logger;
054import org.slf4j.LoggerFactory;
055import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
056
057/**
058 * Implementation that can handle all hfile versions of {@link HFile.Reader}.
059 */
060@InterfaceAudience.Private
061@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
062public abstract class HFileReaderImpl implements HFile.Reader, Configurable {
063  // This class is HFileReaderV3 + HFileReaderV2 + AbstractHFileReader all squashed together into
064  // one file.  Ditto for all the HFileReader.ScannerV? implementations. I was running up against
065  // the MaxInlineLevel limit because too many tiers involved reading from an hfile. Was also hard
066  // to navigate the source code when so many classes participating in read.
067  private static final Logger LOG = LoggerFactory.getLogger(HFileReaderImpl.class);
068
069  /** Data block index reader keeping the root data index in memory */
070  protected HFileBlockIndex.CellBasedKeyBlockIndexReader dataBlockIndexReader;
071
072  /** Meta block index reader -- always single level */
073  protected HFileBlockIndex.ByteArrayKeyBlockIndexReader metaBlockIndexReader;
074
075  protected FixedFileTrailer trailer;
076
077  private final boolean primaryReplicaReader;
078
079  /**
080   * What kind of data block encoding should be used while reading, writing,
081   * and handling cache.
082   */
083  protected HFileDataBlockEncoder dataBlockEncoder = NoOpDataBlockEncoder.INSTANCE;
084
085  /** Block cache configuration. */
086  protected final CacheConfig cacheConf;
087
088  protected ReaderContext context;
089
090  protected final HFileInfo fileInfo;
091
092  /** Path of file */
093  protected final Path path;
094
095  /** File name to be used for block names */
096  protected final String name;
097
098  private Configuration conf;
099
100  protected HFileContext hfileContext;
101
102  /** Filesystem-level block reader. */
103  protected HFileBlock.FSReader fsBlockReader;
104
105  /**
106   * A "sparse lock" implementation allowing to lock on a particular block
107   * identified by offset. The purpose of this is to avoid two clients loading
108   * the same block, and have all but one client wait to get the block from the
109   * cache.
110   */
111  private IdLock offsetLock = new IdLock();
112
113  /** Minimum minor version supported by this HFile format */
114  static final int MIN_MINOR_VERSION = 0;
115
116  /** Maximum minor version supported by this HFile format */
117  // We went to version 2 when we moved to pb'ing fileinfo and the trailer on
118  // the file. This version can read Writables version 1.
119  static final int MAX_MINOR_VERSION = 3;
120
121  /** Minor versions starting with this number have faked index key */
122  static final int MINOR_VERSION_WITH_FAKED_KEY = 3;
123
124  /**
125   * Opens a HFile.
126   * @param context Reader context info
127   * @param fileInfo HFile info
128   * @param cacheConf Cache configuration.
129   * @param conf Configuration
130   */
131  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
132  public HFileReaderImpl(ReaderContext context, HFileInfo fileInfo, CacheConfig cacheConf,
133      Configuration conf) throws IOException {
134    this.cacheConf = cacheConf;
135    this.context = context;
136    this.path = context.getFilePath();
137    this.name = path.getName();
138    this.conf = conf;
139    this.primaryReplicaReader = context.isPrimaryReplicaReader();
140    this.fileInfo = fileInfo;
141    this.trailer = fileInfo.getTrailer();
142    this.hfileContext = fileInfo.getHFileContext();
143    this.fsBlockReader = new HFileBlock.FSReaderImpl(context, hfileContext,
144        cacheConf.getByteBuffAllocator());
145    this.dataBlockEncoder = HFileDataBlockEncoderImpl.createFromFileInfo(fileInfo);
146    fsBlockReader.setDataBlockEncoder(dataBlockEncoder);
147    dataBlockIndexReader = fileInfo.getDataBlockIndexReader();
148    metaBlockIndexReader = fileInfo.getMetaBlockIndexReader();
149  }
150
151  @SuppressWarnings("serial")
152  public static class BlockIndexNotLoadedException extends IllegalStateException {
153    public BlockIndexNotLoadedException(Path path) {
154      // Add a message in case anyone relies on it as opposed to class name.
155      super(path + " block index not loaded");
156    }
157  }
158
159  private Optional<String> toStringFirstKey() {
160    return getFirstKey().map(CellUtil::getCellKeyAsString);
161  }
162
163  private Optional<String> toStringLastKey() {
164    return getLastKey().map(CellUtil::getCellKeyAsString);
165  }
166
167  @Override
168  public String toString() {
169    return "reader=" + path.toString() +
170        (!isFileInfoLoaded()? "":
171          ", compression=" + trailer.getCompressionCodec().getName() +
172          ", cacheConf=" + cacheConf +
173          ", firstKey=" + toStringFirstKey() +
174          ", lastKey=" + toStringLastKey()) +
175          ", avgKeyLen=" + fileInfo.getAvgKeyLen() +
176          ", avgValueLen=" + fileInfo.getAvgValueLen() +
177          ", entries=" + trailer.getEntryCount() +
178          ", length=" + context.getFileSize();
179  }
180
181  @Override
182  public long length() {
183    return context.getFileSize();
184  }
185
186  /**
187   * @return the first key in the file. May be null if file has no entries. Note
188   *         that this is not the first row key, but rather the byte form of the
189   *         first KeyValue.
190   */
191  @Override
192  public Optional<Cell> getFirstKey() {
193    if (dataBlockIndexReader == null) {
194      throw new BlockIndexNotLoadedException(path);
195    }
196    return dataBlockIndexReader.isEmpty() ? Optional.empty()
197        : Optional.of(dataBlockIndexReader.getRootBlockKey(0));
198  }
199
200  /**
201   * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's
202   * patch goes in to eliminate {@link KeyValue} here.
203   *
204   * @return the first row key, or null if the file is empty.
205   */
206  @Override
207  public Optional<byte[]> getFirstRowKey() {
208    // We have to copy the row part to form the row key alone
209    return getFirstKey().map(CellUtil::cloneRow);
210  }
211
212  /**
213   * TODO left from {@link HFile} version 1: move this to StoreFile after
214   * Ryan's patch goes in to eliminate {@link KeyValue} here.
215   *
216   * @return the last row key, or null if the file is empty.
217   */
218  @Override
219  public Optional<byte[]> getLastRowKey() {
220    // We have to copy the row part to form the row key alone
221    return getLastKey().map(CellUtil::cloneRow);
222  }
223
224  /** @return number of KV entries in this HFile */
225  @Override
226  public long getEntries() {
227    return trailer.getEntryCount();
228  }
229
230  /** @return comparator */
231  @Override
232  public CellComparator getComparator() {
233    return this.hfileContext.getCellComparator();
234  }
235
236  @VisibleForTesting
237  public Compression.Algorithm getCompressionAlgorithm() {
238    return trailer.getCompressionCodec();
239  }
240
241  /**
242   * @return the total heap size of data and meta block indexes in bytes. Does
243   *         not take into account non-root blocks of a multilevel data index.
244   */
245  @Override
246  public long indexSize() {
247    return (dataBlockIndexReader != null ? dataBlockIndexReader.heapSize() : 0)
248        + ((metaBlockIndexReader != null) ? metaBlockIndexReader.heapSize()
249            : 0);
250  }
251
252  @Override
253  public String getName() {
254    return name;
255  }
256
257  @Override
258  public void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder) {
259    this.dataBlockEncoder = dataBlockEncoder;
260    this.fsBlockReader.setDataBlockEncoder(dataBlockEncoder);
261  }
262
263  @Override
264  public void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader) {
265    this.dataBlockIndexReader = reader;
266  }
267
268  @Override
269  public HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader() {
270    return dataBlockIndexReader;
271  }
272
273  @Override
274  public void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader) {
275    this.metaBlockIndexReader = reader;
276  }
277
278  @Override
279  public HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader() {
280    return metaBlockIndexReader;
281  }
282
283  @Override
284  public FixedFileTrailer getTrailer() {
285    return trailer;
286  }
287
288  @Override
289  public ReaderContext getContext() {
290    return this.context;
291  }
292
293  @Override
294  public HFileInfo getHFileInfo() {
295    return this.fileInfo;
296  }
297
298  @Override
299  public boolean isPrimaryReplicaReader() {
300    return primaryReplicaReader;
301  }
302
303  /**
304   * An exception thrown when an operation requiring a scanner to be seeked
305   * is invoked on a scanner that is not seeked.
306   */
307  @SuppressWarnings("serial")
308  public static class NotSeekedException extends IllegalStateException {
309    public NotSeekedException(Path path) {
310      super(path + " not seeked to a key/value");
311    }
312  }
313
314  protected static class HFileScannerImpl implements HFileScanner {
315    private ByteBuff blockBuffer;
316    protected final boolean cacheBlocks;
317    protected final boolean pread;
318    protected final boolean isCompaction;
319    private int currKeyLen;
320    private int currValueLen;
321    private int currMemstoreTSLen;
322    private long currMemstoreTS;
323    protected final HFile.Reader reader;
324    private int currTagsLen;
325    // buffer backed keyonlyKV
326    private ByteBufferKeyOnlyKeyValue bufBackedKeyOnlyKv = new ByteBufferKeyOnlyKeyValue();
327    // A pair for reusing in blockSeek() so that we don't garbage lot of objects
328    final ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>();
329
330    /**
331     * The next indexed key is to keep track of the indexed key of the next data block.
332     * If the nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the
333     * current data block is the last data block.
334     *
335     * If the nextIndexedKey is null, it means the nextIndexedKey has not been loaded yet.
336     */
337    protected Cell nextIndexedKey;
338    // Current block being used. NOTICE: DON't release curBlock separately except in shipped() or
339    // close() methods. Because the shipped() or close() will do the release finally, even if any
340    // exception occur the curBlock will be released by the close() method (see
341    // RegionScannerImpl#handleException). Call the releaseIfNotCurBlock() to release the
342    // unreferenced block please.
343    protected HFileBlock curBlock;
344    // Previous blocks that were used in the course of the read
345    protected final ArrayList<HFileBlock> prevBlocks = new ArrayList<>();
346
347    public HFileScannerImpl(final HFile.Reader reader, final boolean cacheBlocks,
348        final boolean pread, final boolean isCompaction) {
349      this.reader = reader;
350      this.cacheBlocks = cacheBlocks;
351      this.pread = pread;
352      this.isCompaction = isCompaction;
353    }
354
355    void updateCurrBlockRef(HFileBlock block) {
356      if (block != null && curBlock != null && block.getOffset() == curBlock.getOffset()) {
357        return;
358      }
359      if (this.curBlock != null && this.curBlock.isSharedMem()) {
360        prevBlocks.add(this.curBlock);
361      }
362      this.curBlock = block;
363    }
364
365    void reset() {
366      // We don't have to keep ref to heap block
367      if (this.curBlock != null && this.curBlock.isSharedMem()) {
368        this.prevBlocks.add(this.curBlock);
369      }
370      this.curBlock = null;
371    }
372
373    private void returnBlocks(boolean returnAll) {
374      this.prevBlocks.forEach(HFileBlock::release);
375      this.prevBlocks.clear();
376      if (returnAll && this.curBlock != null) {
377        this.curBlock.release();
378        this.curBlock = null;
379      }
380    }
381
382    @Override
383    public boolean isSeeked(){
384      return blockBuffer != null;
385    }
386
387    @Override
388    public String toString() {
389      return "HFileScanner for reader " + String.valueOf(getReader());
390    }
391
392    protected void assertSeeked() {
393      if (!isSeeked()) {
394        throw new NotSeekedException(reader.getPath());
395      }
396    }
397
398    @Override
399    public HFile.Reader getReader() {
400      return reader;
401    }
402
403    // From non encoded HFiles, we always read back KeyValue or its descendant.(Note: When HFile
404    // block is in DBB, it will be OffheapKV). So all parts of the Cell is in a contiguous
405    // array/buffer. How many bytes we should wrap to make the KV is what this method returns.
406    private int getKVBufSize() {
407      int kvBufSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen;
408      if (currTagsLen > 0) {
409        kvBufSize += Bytes.SIZEOF_SHORT + currTagsLen;
410      }
411      return kvBufSize;
412    }
413
414    @Override
415    public void close() {
416      if (!pread) {
417        // For seek + pread stream socket should be closed when the scanner is closed. HBASE-9393
418        reader.unbufferStream();
419      }
420      this.returnBlocks(true);
421    }
422
423    // Returns the #bytes in HFile for the current cell. Used to skip these many bytes in current
424    // HFile block's buffer so as to position to the next cell.
425    private int getCurCellSerializedSize() {
426      int curCellSize =  KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen
427          + currMemstoreTSLen;
428      if (this.reader.getFileContext().isIncludesTags()) {
429        curCellSize += Bytes.SIZEOF_SHORT + currTagsLen;
430      }
431      return curCellSize;
432    }
433
434    protected void readKeyValueLen() {
435      // This is a hot method. We go out of our way to make this method short so it can be
436      // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves
437      // because it is faster than going via range-checked ByteBuffer methods or going through a
438      // byte buffer array a byte at a time.
439      // Get a long at a time rather than read two individual ints. In micro-benchmarking, even
440      // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints.
441      // Trying to imitate what was done - need to profile if this is better or
442      // earlier way is better by doing mark and reset?
443      // But ensure that you read long instead of two ints
444      long ll = blockBuffer.getLongAfterPosition(0);
445      // Read top half as an int of key length and bottom int as value length
446      this.currKeyLen = (int)(ll >> Integer.SIZE);
447      this.currValueLen = (int)(Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
448      checkKeyValueLen();
449      // Move position past the key and value lengths and then beyond the key and value
450      int p = (Bytes.SIZEOF_LONG + currKeyLen + currValueLen);
451      if (reader.getFileContext().isIncludesTags()) {
452        // Tags length is a short.
453        this.currTagsLen = blockBuffer.getShortAfterPosition(p);
454        checkTagsLen();
455        p += (Bytes.SIZEOF_SHORT + currTagsLen);
456      }
457      readMvccVersion(p);
458    }
459
460    private final void checkTagsLen() {
461      if (checkLen(this.currTagsLen)) {
462        throw new IllegalStateException("Invalid currTagsLen " + this.currTagsLen +
463          ". Block offset: " + curBlock.getOffset() + ", block length: " +
464            this.blockBuffer.limit() +
465          ", position: " + this.blockBuffer.position() + " (without header)." +
466          " path=" + reader.getPath());
467      }
468    }
469
470    /**
471     * Read mvcc. Does checks to see if we even need to read the mvcc at all.
472     */
473    protected void readMvccVersion(final int offsetFromPos) {
474      // See if we even need to decode mvcc.
475      if (!this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
476        return;
477      }
478      if (!this.reader.getHFileInfo().isDecodeMemstoreTS()) {
479        currMemstoreTS = 0;
480        currMemstoreTSLen = 1;
481        return;
482      }
483      _readMvccVersion(offsetFromPos);
484    }
485
486    /**
487     * Actually do the mvcc read. Does no checks.
488     */
489    private void _readMvccVersion(int offsetFromPos) {
490      // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e.
491      // previous if one-byte vint, we'd redo the vint call to find int size.
492      // Also the method is kept small so can be inlined.
493      byte firstByte = blockBuffer.getByteAfterPosition(offsetFromPos);
494      int len = WritableUtils.decodeVIntSize(firstByte);
495      if (len == 1) {
496        this.currMemstoreTS = firstByte;
497      } else {
498        int remaining = len -1;
499        long i = 0;
500        offsetFromPos++;
501        if (remaining >= Bytes.SIZEOF_INT) {
502          // The int read has to be converted to unsigned long so the & op
503          i = (blockBuffer.getIntAfterPosition(offsetFromPos) & 0x00000000ffffffffL);
504          remaining -= Bytes.SIZEOF_INT;
505          offsetFromPos += Bytes.SIZEOF_INT;
506        }
507        if (remaining >= Bytes.SIZEOF_SHORT) {
508          short s = blockBuffer.getShortAfterPosition(offsetFromPos);
509          i = i << 16;
510          i = i | (s & 0xFFFF);
511          remaining -= Bytes.SIZEOF_SHORT;
512          offsetFromPos += Bytes.SIZEOF_SHORT;
513        }
514        for (int idx = 0; idx < remaining; idx++) {
515          byte b = blockBuffer.getByteAfterPosition(offsetFromPos + idx);
516          i = i << 8;
517          i = i | (b & 0xFF);
518        }
519        currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
520      }
521      this.currMemstoreTSLen = len;
522    }
523
524    /**
525     * Within a loaded block, seek looking for the last key that is smaller than
526     * (or equal to?) the key we are interested in.
527     * A note on the seekBefore: if you have seekBefore = true, AND the first
528     * key in the block = key, then you'll get thrown exceptions. The caller has
529     * to check for that case and load the previous block as appropriate.
530     * @param key
531     *          the key to find
532     * @param seekBefore
533     *          find the key before the given key in case of exact match.
534     * @return 0 in case of an exact key match, 1 in case of an inexact match,
535     *         -2 in case of an inexact match and furthermore, the input key
536     *         less than the first key of current block(e.g. using a faked index
537     *         key)
538     */
539    protected int blockSeek(Cell key, boolean seekBefore) {
540      int klen, vlen, tlen = 0;
541      int lastKeyValueSize = -1;
542      int offsetFromPos;
543      do {
544        offsetFromPos = 0;
545        // Better to ensure that we use the BB Utils here
546        long ll = blockBuffer.getLongAfterPosition(offsetFromPos);
547        klen = (int)(ll >> Integer.SIZE);
548        vlen = (int)(Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
549        if (checkKeyLen(klen) || checkLen(vlen)) {
550          throw new IllegalStateException("Invalid klen " + klen + " or vlen "
551              + vlen + ". Block offset: "
552              + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
553              + blockBuffer.position() + " (without header)."
554              + " path=" + reader.getPath());
555        }
556        offsetFromPos += Bytes.SIZEOF_LONG;
557        blockBuffer.asSubByteBuffer(blockBuffer.position() + offsetFromPos, klen, pair);
558        bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), klen);
559        int comp =
560            PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, bufBackedKeyOnlyKv);
561        offsetFromPos += klen + vlen;
562        if (this.reader.getFileContext().isIncludesTags()) {
563          // Read short as unsigned, high byte first
564          tlen = ((blockBuffer.getByteAfterPosition(offsetFromPos) & 0xff) << 8)
565              ^ (blockBuffer.getByteAfterPosition(offsetFromPos + 1) & 0xff);
566          if (checkLen(tlen)) {
567            throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: "
568                + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
569                + blockBuffer.position() + " (without header)."
570                + " path=" + reader.getPath());
571          }
572          // add the two bytes read for the tags.
573          offsetFromPos += tlen + (Bytes.SIZEOF_SHORT);
574        }
575        if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
576          // Directly read the mvcc based on current position
577          readMvccVersion(offsetFromPos);
578        }
579        if (comp == 0) {
580          if (seekBefore) {
581            if (lastKeyValueSize < 0) {
582              throw new IllegalStateException("blockSeek with seekBefore "
583                  + "at the first key of the block: key=" + CellUtil.getCellKeyAsString(key)
584                  + ", blockOffset=" + curBlock.getOffset() + ", onDiskSize="
585                  + curBlock.getOnDiskSizeWithHeader()
586                  + ", path=" + reader.getPath());
587            }
588            blockBuffer.moveBack(lastKeyValueSize);
589            readKeyValueLen();
590            return 1; // non exact match.
591          }
592          currKeyLen = klen;
593          currValueLen = vlen;
594          currTagsLen = tlen;
595          return 0; // indicate exact match
596        } else if (comp < 0) {
597          if (lastKeyValueSize > 0) {
598            blockBuffer.moveBack(lastKeyValueSize);
599          }
600          readKeyValueLen();
601          if (lastKeyValueSize == -1 && blockBuffer.position() == 0) {
602            return HConstants.INDEX_KEY_MAGIC;
603          }
604          return 1;
605        }
606        // The size of this key/value tuple, including key/value length fields.
607        lastKeyValueSize = klen + vlen + currMemstoreTSLen + KEY_VALUE_LEN_SIZE;
608        // include tag length also if tags included with KV
609        if (reader.getFileContext().isIncludesTags()) {
610          lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT;
611        }
612        blockBuffer.skip(lastKeyValueSize);
613      } while (blockBuffer.hasRemaining());
614
615      // Seek to the last key we successfully read. This will happen if this is
616      // the last key/value pair in the file, in which case the following call
617      // to next() has to return false.
618      blockBuffer.moveBack(lastKeyValueSize);
619      readKeyValueLen();
620      return 1; // didn't exactly find it.
621    }
622
623    @Override
624    public Cell getNextIndexedKey() {
625      return nextIndexedKey;
626    }
627
628    @Override
629    public int seekTo(Cell key) throws IOException {
630      return seekTo(key, true);
631    }
632
633    @Override
634    public int reseekTo(Cell key) throws IOException {
635      int compared;
636      if (isSeeked()) {
637        compared = compareKey(reader.getComparator(), key);
638        if (compared < 1) {
639          // If the required key is less than or equal to current key, then
640          // don't do anything.
641          return compared;
642        } else {
643          // The comparison with no_next_index_key has to be checked
644          if (this.nextIndexedKey != null &&
645              (this.nextIndexedKey == KeyValueScanner.NO_NEXT_INDEXED_KEY || PrivateCellUtil
646                  .compareKeyIgnoresMvcc(reader.getComparator(), key, nextIndexedKey) < 0)) {
647            // The reader shall continue to scan the current data block instead
648            // of querying the
649            // block index as long as it knows the target key is strictly
650            // smaller than
651            // the next indexed key or the current data block is the last data
652            // block.
653            return loadBlockAndSeekToKey(this.curBlock, nextIndexedKey, false, key,
654                false);
655          }
656        }
657      }
658      // Don't rewind on a reseek operation, because reseek implies that we are
659      // always going forward in the file.
660      return seekTo(key, false);
661    }
662
663    /**
664     * An internal API function. Seek to the given key, optionally rewinding to
665     * the first key of the block before doing the seek.
666     *
667     * @param key - a cell representing the key that we need to fetch
668     * @param rewind whether to rewind to the first key of the block before
669     *        doing the seek. If this is false, we are assuming we never go
670     *        back, otherwise the result is undefined.
671     * @return -1 if the key is earlier than the first key of the file,
672     *         0 if we are at the given key, 1 if we are past the given key
673     *         -2 if the key is earlier than the first key of the file while
674     *         using a faked index key
675     */
676    public int seekTo(Cell key, boolean rewind) throws IOException {
677      HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader();
678      BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, curBlock,
679          cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding(), reader);
680      if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) {
681        // This happens if the key e.g. falls before the beginning of the file.
682        return -1;
683      }
684      return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(),
685        blockWithScanInfo.getNextIndexedKey(), rewind, key, false);
686    }
687
688    @Override
689    public boolean seekBefore(Cell key) throws IOException {
690      HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, curBlock,
691          cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction),
692          reader);
693      if (seekToBlock == null) {
694        return false;
695      }
696      Cell firstKey = getFirstKeyCellInBlock(seekToBlock);
697      if (PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), firstKey, key) >= 0) {
698        long previousBlockOffset = seekToBlock.getPrevBlockOffset();
699        // The key we are interested in
700        if (previousBlockOffset == -1) {
701          // we have a 'problem', the key we want is the first of the file.
702          releaseIfNotCurBlock(seekToBlock);
703          return false;
704        }
705
706        // The first key in the current block 'seekToBlock' is greater than the given
707        // seekBefore key. We will go ahead by reading the next block that satisfies the
708        // given key. Return the current block before reading the next one.
709        releaseIfNotCurBlock(seekToBlock);
710        // It is important that we compute and pass onDiskSize to the block
711        // reader so that it does not have to read the header separately to
712        // figure out the size. Currently, we do not have a way to do this
713        // correctly in the general case however.
714        // TODO: See https://issues.apache.org/jira/browse/HBASE-14576
715        int prevBlockSize = -1;
716        seekToBlock = reader.readBlock(previousBlockOffset, prevBlockSize, cacheBlocks, pread,
717          isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
718        // TODO shortcut: seek forward in this block to the last key of the
719        // block.
720      }
721      loadBlockAndSeekToKey(seekToBlock, firstKey, true, key, true);
722      return true;
723    }
724
725    /**
726     * The curBlock will be released by shipping or close method, so only need to consider releasing
727     * the block, which was read from HFile before and not referenced by curBlock.
728     */
729    protected void releaseIfNotCurBlock(HFileBlock block) {
730      if (curBlock != block) {
731        block.release();
732      }
733    }
734
735    /**
736     * Scans blocks in the "scanned" section of the {@link HFile} until the next
737     * data block is found.
738     *
739     * @return the next block, or null if there are no more data blocks
740     */
741    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH",
742        justification="Yeah, unnecessary null check; could do w/ clean up")
743    protected HFileBlock readNextDataBlock() throws IOException {
744      long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
745      if (curBlock == null) {
746        return null;
747      }
748      HFileBlock block = this.curBlock;
749      do {
750        if (block.getOffset() >= lastDataBlockOffset) {
751          releaseIfNotCurBlock(block);
752          return null;
753        }
754        if (block.getOffset() < 0) {
755          releaseIfNotCurBlock(block);
756          throw new IOException("Invalid block offset=" + block + ", path=" + reader.getPath());
757        }
758        // We are reading the next block without block type validation, because
759        // it might turn out to be a non-data block.
760        block = reader.readBlock(block.getOffset() + block.getOnDiskSizeWithHeader(),
761          block.getNextBlockOnDiskSize(), cacheBlocks, pread, isCompaction, true, null,
762          getEffectiveDataBlockEncoding());
763        if (block != null && !block.getBlockType().isData()) {
764          // Whatever block we read we will be returning it unless
765          // it is a datablock. Just in case the blocks are non data blocks
766          block.release();
767        }
768      } while (!block.getBlockType().isData());
769      return block;
770    }
771
772    public DataBlockEncoding getEffectiveDataBlockEncoding() {
773      return this.reader.getEffectiveEncodingInCache(isCompaction);
774    }
775
776    @Override
777    public Cell getCell() {
778      if (!isSeeked()) {
779        return null;
780      }
781
782      Cell ret;
783      int cellBufSize = getKVBufSize();
784      long seqId = 0L;
785      if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
786        seqId = currMemstoreTS;
787      }
788      if (blockBuffer.hasArray()) {
789        // TODO : reduce the varieties of KV here. Check if based on a boolean
790        // we can handle the 'no tags' case.
791        if (currTagsLen > 0) {
792          ret = new SizeCachedKeyValue(blockBuffer.array(),
793              blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId);
794        } else {
795          ret = new SizeCachedNoTagsKeyValue(blockBuffer.array(),
796              blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId);
797        }
798      } else {
799        ByteBuffer buf = blockBuffer.asSubByteBuffer(cellBufSize);
800        if (buf.isDirect()) {
801          ret = currTagsLen > 0 ? new ByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId)
802              : new NoTagsByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId);
803        } else {
804          if (currTagsLen > 0) {
805            ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + buf.position(),
806                cellBufSize, seqId);
807          } else {
808            ret = new SizeCachedNoTagsKeyValue(buf.array(), buf.arrayOffset() + buf.position(),
809                cellBufSize, seqId);
810          }
811        }
812      }
813      return ret;
814    }
815
816    @Override
817    public Cell getKey() {
818      assertSeeked();
819      // Create a new object so that this getKey is cached as firstKey, lastKey
820      ObjectIntPair<ByteBuffer> keyPair = new ObjectIntPair<>();
821      blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, keyPair);
822      ByteBuffer keyBuf = keyPair.getFirst();
823      if (keyBuf.hasArray()) {
824        return new KeyValue.KeyOnlyKeyValue(keyBuf.array(), keyBuf.arrayOffset()
825            + keyPair.getSecond(), currKeyLen);
826      } else {
827        // Better to do a copy here instead of holding on to this BB so that
828        // we could release the blocks referring to this key. This key is specifically used
829        // in HalfStoreFileReader to get the firstkey and lastkey by creating a new scanner
830        // every time. So holding onto the BB (incase of DBB) is not advised here.
831        byte[] key = new byte[currKeyLen];
832        ByteBufferUtils.copyFromBufferToArray(key, keyBuf, keyPair.getSecond(), 0, currKeyLen);
833        return new KeyValue.KeyOnlyKeyValue(key, 0, currKeyLen);
834      }
835    }
836
837    @Override
838    public ByteBuffer getValue() {
839      assertSeeked();
840      // Okie to create new Pair. Not used in hot path
841      ObjectIntPair<ByteBuffer> valuePair = new ObjectIntPair<>();
842      this.blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen,
843        currValueLen, valuePair);
844      ByteBuffer valBuf = valuePair.getFirst().duplicate();
845      valBuf.position(valuePair.getSecond());
846      valBuf.limit(currValueLen + valuePair.getSecond());
847      return valBuf.slice();
848    }
849
850    protected void setNonSeekedState() {
851      reset();
852      blockBuffer = null;
853      currKeyLen = 0;
854      currValueLen = 0;
855      currMemstoreTS = 0;
856      currMemstoreTSLen = 0;
857      currTagsLen = 0;
858    }
859
860    /**
861     * Set the position on current backing blockBuffer.
862     */
863    private void positionThisBlockBuffer() {
864      try {
865        blockBuffer.skip(getCurCellSerializedSize());
866      } catch (IllegalArgumentException e) {
867        LOG.error("Current pos = " + blockBuffer.position()
868            + "; currKeyLen = " + currKeyLen + "; currValLen = "
869            + currValueLen + "; block limit = " + blockBuffer.limit()
870            + "; currBlock currBlockOffset = " + this.curBlock.getOffset()
871            + "; path=" + reader.getPath());
872        throw e;
873      }
874    }
875
876    /**
877     * Set our selves up for the next 'next' invocation, set up next block.
878     * @return True is more to read else false if at the end.
879     */
880    private boolean positionForNextBlock() throws IOException {
881      // Methods are small so they get inlined because they are 'hot'.
882      long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
883      if (this.curBlock.getOffset() >= lastDataBlockOffset) {
884        setNonSeekedState();
885        return false;
886      }
887      return isNextBlock();
888    }
889
890
891    private boolean isNextBlock() throws IOException {
892      // Methods are small so they get inlined because they are 'hot'.
893      HFileBlock nextBlock = readNextDataBlock();
894      if (nextBlock == null) {
895        setNonSeekedState();
896        return false;
897      }
898      updateCurrentBlock(nextBlock);
899      return true;
900    }
901
902    private final boolean _next() throws IOException {
903      // Small method so can be inlined. It is a hot one.
904      if (blockBuffer.remaining() <= 0) {
905        return positionForNextBlock();
906      }
907
908      // We are still in the same block.
909      readKeyValueLen();
910      return true;
911    }
912
913    /**
914     * Go to the next key/value in the block section. Loads the next block if
915     * necessary. If successful, {@link #getKey()} and {@link #getValue()} can
916     * be called.
917     *
918     * @return true if successfully navigated to the next key/value
919     */
920    @Override
921    public boolean next() throws IOException {
922      // This is a hot method so extreme measures taken to ensure it is small and inlineable.
923      // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation
924      assertSeeked();
925      positionThisBlockBuffer();
926      return _next();
927    }
928
929    /**
930     * Positions this scanner at the start of the file.
931     *
932     * @return false if empty file; i.e. a call to next would return false and
933     *         the current key and value are undefined.
934     */
935    @Override
936    public boolean seekTo() throws IOException {
937      if (reader == null) {
938        return false;
939      }
940
941      if (reader.getTrailer().getEntryCount() == 0) {
942        // No data blocks.
943        return false;
944      }
945
946      long firstDataBlockOffset = reader.getTrailer().getFirstDataBlockOffset();
947      if (curBlock != null && curBlock.getOffset() == firstDataBlockOffset) {
948        return processFirstDataBlock();
949      }
950
951      readAndUpdateNewBlock(firstDataBlockOffset);
952      return true;
953    }
954
955    protected boolean processFirstDataBlock() throws IOException{
956      blockBuffer.rewind();
957      readKeyValueLen();
958      return true;
959    }
960
961    protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException {
962      HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
963        isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
964      if (newBlock.getOffset() < 0) {
965        releaseIfNotCurBlock(newBlock);
966        throw new IOException("Invalid offset=" + newBlock.getOffset() +
967          ", path=" + reader.getPath());
968      }
969      updateCurrentBlock(newBlock);
970    }
971
972    protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind,
973        Cell key, boolean seekBefore) throws IOException {
974      if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
975        updateCurrentBlock(seekToBlock);
976      } else if (rewind) {
977        blockBuffer.rewind();
978      }
979      // Update the nextIndexedKey
980      this.nextIndexedKey = nextIndexedKey;
981      return blockSeek(key, seekBefore);
982    }
983
984    /**
985     * @return True if v &lt;= 0 or v &gt; current block buffer limit.
986     */
987    protected final boolean checkKeyLen(final int v) {
988      return v <= 0 || v > this.blockBuffer.limit();
989    }
990
991    /**
992     * @return True if v &lt; 0 or v &gt; current block buffer limit.
993     */
994    protected final boolean checkLen(final int v) {
995      return v < 0 || v > this.blockBuffer.limit();
996    }
997
998    /**
999     * Check key and value lengths are wholesome.
1000     */
1001    protected final void checkKeyValueLen() {
1002      if (checkKeyLen(this.currKeyLen) || checkLen(this.currValueLen)) {
1003        throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen
1004            + " or currValueLen " + this.currValueLen + ". Block offset: "
1005            + this.curBlock.getOffset() + ", block length: "
1006            + this.blockBuffer.limit() + ", position: " + this.blockBuffer.position()
1007            + " (without header)." + ", path=" + reader.getPath());
1008      }
1009    }
1010
1011    /**
1012     * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first
1013     * key/value pair.
1014     * @param newBlock the block read by {@link HFileReaderImpl#readBlock}, it's a totally new block
1015     *          with new allocated {@link ByteBuff}, so if no further reference to this block, we
1016     *          should release it carefully.
1017     */
1018    protected void updateCurrentBlock(HFileBlock newBlock) throws IOException {
1019      try {
1020        if (newBlock.getBlockType() != BlockType.DATA) {
1021          throw new IllegalStateException(
1022              "ScannerV2 works only on data blocks, got " + newBlock.getBlockType() + "; "
1023                  + "HFileName=" + reader.getPath() + ", " + "dataBlockEncoder="
1024                  + reader.getDataBlockEncoding() + ", " + "isCompaction=" + isCompaction);
1025        }
1026        updateCurrBlockRef(newBlock);
1027        blockBuffer = newBlock.getBufferWithoutHeader();
1028        readKeyValueLen();
1029      } finally {
1030        releaseIfNotCurBlock(newBlock);
1031      }
1032      // Reset the next indexed key
1033      this.nextIndexedKey = null;
1034    }
1035
1036    protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) {
1037      ByteBuff buffer = curBlock.getBufferWithoutHeader();
1038      // It is safe to manipulate this buffer because we own the buffer object.
1039      buffer.rewind();
1040      int klen = buffer.getInt();
1041      buffer.skip(Bytes.SIZEOF_INT);// Skip value len part
1042      ByteBuffer keyBuff = buffer.asSubByteBuffer(klen);
1043      if (keyBuff.hasArray()) {
1044        return new KeyValue.KeyOnlyKeyValue(keyBuff.array(), keyBuff.arrayOffset()
1045            + keyBuff.position(), klen);
1046      } else {
1047        return new ByteBufferKeyOnlyKeyValue(keyBuff, keyBuff.position(), klen);
1048      }
1049    }
1050
1051    @Override
1052    public String getKeyString() {
1053      return CellUtil.toString(getKey(), false);
1054    }
1055
1056    @Override
1057    public String getValueString() {
1058      return ByteBufferUtils.toStringBinary(getValue());
1059    }
1060
1061    public int compareKey(CellComparator comparator, Cell key) {
1062      blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, pair);
1063      this.bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), currKeyLen);
1064      return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, this.bufBackedKeyOnlyKv);
1065    }
1066
1067    @Override
1068    public void shipped() throws IOException {
1069      this.returnBlocks(false);
1070    }
1071  }
1072
1073  @Override
1074  public Path getPath() {
1075    return path;
1076  }
1077
1078  @Override
1079  public DataBlockEncoding getDataBlockEncoding() {
1080    return dataBlockEncoder.getDataBlockEncoding();
1081  }
1082
1083  @Override
1084  public Configuration getConf() {
1085    return conf;
1086  }
1087
1088  @Override
1089  public void setConf(Configuration conf) {
1090    this.conf = conf;
1091  }
1092
1093  /** Minor versions in HFile starting with this number have hbase checksums */
1094  public static final int MINOR_VERSION_WITH_CHECKSUM = 1;
1095  /** In HFile minor version that does not support checksums */
1096  public static final int MINOR_VERSION_NO_CHECKSUM = 0;
1097
1098  /** HFile minor version that introduced pbuf filetrailer */
1099  public static final int PBUF_TRAILER_MINOR_VERSION = 2;
1100
1101  /**
1102   * The size of a (key length, value length) tuple that prefixes each entry in
1103   * a data block.
1104   */
1105  public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT;
1106
1107  /**
1108   * Retrieve block from cache. Validates the retrieved block's type vs {@code expectedBlockType}
1109   * and its encoding vs. {@code expectedDataBlockEncoding}. Unpacks the block as necessary.
1110   */
1111  private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock,
1112      boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType,
1113      DataBlockEncoding expectedDataBlockEncoding) throws IOException {
1114    // Check cache for block. If found return.
1115    BlockCache cache = cacheConf.getBlockCache().orElse(null);
1116    if (cache != null) {
1117      HFileBlock cachedBlock =
1118          (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock, updateCacheMetrics);
1119      if (cachedBlock != null) {
1120        if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) {
1121          HFileBlock compressedBlock = cachedBlock;
1122          cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
1123          // In case of compressed block after unpacking we can release the compressed block
1124          if (compressedBlock != cachedBlock) {
1125            compressedBlock.release();
1126          }
1127        }
1128        try {
1129          validateBlockType(cachedBlock, expectedBlockType);
1130        } catch (IOException e) {
1131          returnAndEvictBlock(cache, cacheKey, cachedBlock);
1132          throw e;
1133        }
1134
1135        if (expectedDataBlockEncoding == null) {
1136          return cachedBlock;
1137        }
1138        DataBlockEncoding actualDataBlockEncoding = cachedBlock.getDataBlockEncoding();
1139        // Block types other than data blocks always have
1140        // DataBlockEncoding.NONE. To avoid false negative cache misses, only
1141        // perform this check if cached block is a data block.
1142        if (cachedBlock.getBlockType().isData() &&
1143            !actualDataBlockEncoding.equals(expectedDataBlockEncoding)) {
1144          // This mismatch may happen if a Scanner, which is used for say a
1145          // compaction, tries to read an encoded block from the block cache.
1146          // The reverse might happen when an EncodedScanner tries to read
1147          // un-encoded blocks which were cached earlier.
1148          //
1149          // Because returning a data block with an implicit BlockType mismatch
1150          // will cause the requesting scanner to throw a disk read should be
1151          // forced here. This will potentially cause a significant number of
1152          // cache misses, so update so we should keep track of this as it might
1153          // justify the work on a CompoundScanner.
1154          if (!expectedDataBlockEncoding.equals(DataBlockEncoding.NONE) &&
1155              !actualDataBlockEncoding.equals(DataBlockEncoding.NONE)) {
1156            // If the block is encoded but the encoding does not match the
1157            // expected encoding it is likely the encoding was changed but the
1158            // block was not yet evicted. Evictions on file close happen async
1159            // so blocks with the old encoding still linger in cache for some
1160            // period of time. This event should be rare as it only happens on
1161            // schema definition change.
1162            LOG.info("Evicting cached block with key {} because data block encoding mismatch; " +
1163                "expected {}, actual {}, path={}", cacheKey, actualDataBlockEncoding,
1164              expectedDataBlockEncoding, path);
1165            // This is an error scenario. so here we need to release the block.
1166            returnAndEvictBlock(cache, cacheKey, cachedBlock);
1167          }
1168          return null;
1169        }
1170        return cachedBlock;
1171      }
1172    }
1173    return null;
1174  }
1175
1176  private void returnAndEvictBlock(BlockCache cache, BlockCacheKey cacheKey, Cacheable block) {
1177    block.release();
1178    cache.evictBlock(cacheKey);
1179  }
1180
1181  /**
1182   * @param cacheBlock Add block to cache, if found
1183   * @return block wrapped in a ByteBuffer, with header skipped
1184   */
1185  @Override
1186  public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock)
1187      throws IOException {
1188    if (trailer.getMetaIndexCount() == 0) {
1189      return null; // there are no meta blocks
1190    }
1191    if (metaBlockIndexReader == null) {
1192      throw new IOException(path + " meta index not loaded");
1193    }
1194
1195    byte[] mbname = Bytes.toBytes(metaBlockName);
1196    int block = metaBlockIndexReader.rootBlockContainingKey(mbname,
1197        0, mbname.length);
1198    if (block == -1) {
1199      return null;
1200    }
1201    long blockSize = metaBlockIndexReader.getRootBlockDataSize(block);
1202
1203    // Per meta key from any given file, synchronize reads for said block. This
1204    // is OK to do for meta blocks because the meta block index is always
1205    // single-level.
1206    synchronized (metaBlockIndexReader.getRootBlockKey(block)) {
1207      // Check cache for block. If found return.
1208      long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
1209      BlockCacheKey cacheKey =
1210          new BlockCacheKey(name, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META);
1211
1212      cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory());
1213      HFileBlock cachedBlock =
1214          getCachedBlock(cacheKey, cacheBlock, false, true, true, BlockType.META, null);
1215      if (cachedBlock != null) {
1216        assert cachedBlock.isUnpacked() : "Packed block leak.";
1217        // Return a distinct 'shallow copy' of the block,
1218        // so pos does not get messed by the scanner
1219        return cachedBlock;
1220      }
1221      // Cache Miss, please load.
1222
1223      HFileBlock compressedBlock =
1224          fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false, true);
1225      HFileBlock uncompressedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
1226      if (compressedBlock != uncompressedBlock) {
1227        compressedBlock.release();
1228      }
1229
1230      // Cache the block
1231      if (cacheBlock) {
1232        cacheConf.getBlockCache().ifPresent(
1233          cache -> cache.cacheBlock(cacheKey, uncompressedBlock, cacheConf.isInMemory()));
1234      }
1235      return uncompressedBlock;
1236    }
1237  }
1238
1239  /**
1240   * If expected block is data block, we'll allocate the ByteBuff of block from
1241   * {@link org.apache.hadoop.hbase.io.ByteBuffAllocator} and it's usually an off-heap one,
1242   * otherwise it will allocate from heap.
1243   * @see org.apache.hadoop.hbase.io.hfile.HFileBlock.FSReader#readBlockData(long, long, boolean,
1244   *      boolean, boolean)
1245   */
1246  private boolean shouldUseHeap(BlockType expectedBlockType) {
1247    if (!cacheConf.getBlockCache().isPresent()) {
1248      return false;
1249    } else if (!cacheConf.isCombinedBlockCache()) {
1250      // Block to cache in LruBlockCache must be an heap one. So just allocate block memory from
1251      // heap for saving an extra off-heap to heap copying.
1252      return true;
1253    }
1254    return expectedBlockType != null && !expectedBlockType.isData();
1255  }
1256
1257  @Override
1258  public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize,
1259      final boolean cacheBlock, boolean pread, final boolean isCompaction,
1260      boolean updateCacheMetrics, BlockType expectedBlockType,
1261      DataBlockEncoding expectedDataBlockEncoding)
1262      throws IOException {
1263    if (dataBlockIndexReader == null) {
1264      throw new IOException(path + " block index not loaded");
1265    }
1266    long trailerOffset = trailer.getLoadOnOpenDataOffset();
1267    if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) {
1268      throw new IOException("Requested block is out of range: " + dataBlockOffset +
1269        ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() +
1270        ", trailer.getLoadOnOpenDataOffset: " + trailerOffset +
1271        ", path=" + path);
1272    }
1273    // For any given block from any given file, synchronize reads for said
1274    // block.
1275    // Without a cache, this synchronizing is needless overhead, but really
1276    // the other choice is to duplicate work (which the cache would prevent you
1277    // from doing).
1278
1279    BlockCacheKey cacheKey = new BlockCacheKey(name, dataBlockOffset,
1280      this.isPrimaryReplicaReader(), expectedBlockType);
1281
1282    boolean useLock = false;
1283    IdLock.Entry lockEntry = null;
1284    try (TraceScope traceScope = TraceUtil.createTrace("HFileReaderImpl.readBlock")) {
1285      while (true) {
1286        // Check cache for block. If found return.
1287        if (cacheConf.shouldReadBlockFromCache(expectedBlockType)) {
1288          if (useLock) {
1289            lockEntry = offsetLock.getLockEntry(dataBlockOffset);
1290          }
1291          // Try and get the block from the block cache. If the useLock variable is true then this
1292          // is the second time through the loop and it should not be counted as a block cache miss.
1293          HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, useLock, isCompaction,
1294            updateCacheMetrics, expectedBlockType, expectedDataBlockEncoding);
1295          if (cachedBlock != null) {
1296            if (LOG.isTraceEnabled()) {
1297              LOG.trace("From Cache " + cachedBlock);
1298            }
1299            TraceUtil.addTimelineAnnotation("blockCacheHit");
1300            assert cachedBlock.isUnpacked() : "Packed block leak.";
1301            if (cachedBlock.getBlockType().isData()) {
1302              if (updateCacheMetrics) {
1303                HFile.DATABLOCK_READ_COUNT.increment();
1304              }
1305              // Validate encoding type for data blocks. We include encoding
1306              // type in the cache key, and we expect it to match on a cache hit.
1307              if (cachedBlock.getDataBlockEncoding() != dataBlockEncoder.getDataBlockEncoding()) {
1308                // Remember to release the block when in exceptional path.
1309                cacheConf.getBlockCache().ifPresent(cache -> {
1310                  returnAndEvictBlock(cache, cacheKey, cachedBlock);
1311                });
1312                throw new IOException("Cached block under key " + cacheKey + " "
1313                    + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: "
1314                    + dataBlockEncoder.getDataBlockEncoding() + "), path=" + path);
1315              }
1316            }
1317            // Cache-hit. Return!
1318            return cachedBlock;
1319          }
1320
1321          if (!useLock && cacheBlock && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) {
1322            // check cache again with lock
1323            useLock = true;
1324            continue;
1325          }
1326          // Carry on, please load.
1327        }
1328
1329        TraceUtil.addTimelineAnnotation("blockCacheMiss");
1330        // Load block from filesystem.
1331        HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread,
1332          !isCompaction, shouldUseHeap(expectedBlockType));
1333        validateBlockType(hfileBlock, expectedBlockType);
1334        HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader);
1335        BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory();
1336
1337        // Cache the block if necessary
1338        cacheConf.getBlockCache().ifPresent(cache -> {
1339          if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) {
1340            cache.cacheBlock(cacheKey,
1341              cacheConf.shouldCacheCompressed(category) ? hfileBlock : unpacked,
1342              cacheConf.isInMemory());
1343          }
1344        });
1345        if (unpacked != hfileBlock) {
1346          // End of life here if hfileBlock is an independent block.
1347          hfileBlock.release();
1348        }
1349        if (updateCacheMetrics && hfileBlock.getBlockType().isData()) {
1350          HFile.DATABLOCK_READ_COUNT.increment();
1351        }
1352
1353        return unpacked;
1354      }
1355    } finally {
1356      if (lockEntry != null) {
1357        offsetLock.releaseLockEntry(lockEntry);
1358      }
1359    }
1360  }
1361
1362  @Override
1363  public boolean hasMVCCInfo() {
1364    return fileInfo.shouldIncludeMemStoreTS() && fileInfo.isDecodeMemstoreTS();
1365  }
1366
1367  /**
1368   * Compares the actual type of a block retrieved from cache or disk with its
1369   * expected type and throws an exception in case of a mismatch. Expected
1370   * block type of {@link BlockType#DATA} is considered to match the actual
1371   * block type [@link {@link BlockType#ENCODED_DATA} as well.
1372   * @param block a block retrieved from cache or disk
1373   * @param expectedBlockType the expected block type, or null to skip the
1374   *          check
1375   */
1376  private void validateBlockType(HFileBlock block,
1377      BlockType expectedBlockType) throws IOException {
1378    if (expectedBlockType == null) {
1379      return;
1380    }
1381    BlockType actualBlockType = block.getBlockType();
1382    if (expectedBlockType.isData() && actualBlockType.isData()) {
1383      // We consider DATA to match ENCODED_DATA for the purpose of this
1384      // verification.
1385      return;
1386    }
1387    if (actualBlockType != expectedBlockType) {
1388      throw new IOException("Expected block type " + expectedBlockType + ", " +
1389          "but got " + actualBlockType + ": " + block + ", path=" + path);
1390    }
1391  }
1392
1393  /**
1394   * @return Last key as cell in the file. May be null if file has no entries. Note that
1395   *         this is not the last row key, but it is the Cell representation of the last
1396   *         key
1397   */
1398  @Override
1399  public Optional<Cell> getLastKey() {
1400    return dataBlockIndexReader.isEmpty() ? Optional.empty() :
1401        Optional.of(fileInfo.getLastKeyCell());
1402  }
1403
1404  /**
1405   * @return Midkey for this file. We work with block boundaries only so
1406   *         returned midkey is an approximation only.
1407   */
1408  @Override
1409  public Optional<Cell> midKey() throws IOException {
1410    return Optional.ofNullable(dataBlockIndexReader.midkey(this));
1411  }
1412
1413  @Override
1414  public void close() throws IOException {
1415    close(cacheConf.shouldEvictOnClose());
1416  }
1417
1418  @Override
1419  public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) {
1420    return dataBlockEncoder.getEffectiveEncodingInCache(isCompaction);
1421  }
1422
1423  /** For testing */
1424  @Override
1425  public HFileBlock.FSReader getUncachedBlockReader() {
1426    return fsBlockReader;
1427  }
1428
1429  /**
1430   * Scanner that operates on encoded data blocks.
1431   */
1432  protected static class EncodedScanner extends HFileScannerImpl {
1433    private final HFileBlockDecodingContext decodingCtx;
1434    private final DataBlockEncoder.EncodedSeeker seeker;
1435    private final DataBlockEncoder dataBlockEncoder;
1436
1437    public EncodedScanner(HFile.Reader reader, boolean cacheBlocks,
1438        boolean pread, boolean isCompaction, HFileContext meta) {
1439      super(reader, cacheBlocks, pread, isCompaction);
1440      DataBlockEncoding encoding = reader.getDataBlockEncoding();
1441      dataBlockEncoder = encoding.getEncoder();
1442      decodingCtx = dataBlockEncoder.newDataBlockDecodingContext(meta);
1443      seeker = dataBlockEncoder.createSeeker(decodingCtx);
1444    }
1445
1446    @Override
1447    public boolean isSeeked(){
1448      return curBlock != null;
1449    }
1450
1451    @Override
1452    public void setNonSeekedState() {
1453      reset();
1454    }
1455
1456    /**
1457     * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first
1458     * key/value pair.
1459     * @param newBlock the block to make current, and read by {@link HFileReaderImpl#readBlock},
1460     *          it's a totally new block with new allocated {@link ByteBuff}, so if no further
1461     *          reference to this block, we should release it carefully.
1462     */
1463    @Override
1464    protected void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException {
1465      try {
1466        // sanity checks
1467        if (newBlock.getBlockType() != BlockType.ENCODED_DATA) {
1468          throw new IllegalStateException("EncodedScanner works only on encoded data blocks");
1469        }
1470        short dataBlockEncoderId = newBlock.getDataBlockEncodingId();
1471        if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) {
1472          String encoderCls = dataBlockEncoder.getClass().getName();
1473          throw new CorruptHFileException("Encoder " + encoderCls +
1474            " doesn't support data block encoding " +
1475            DataBlockEncoding.getNameFromId(dataBlockEncoderId) + ",path=" + reader.getPath());
1476        }
1477        updateCurrBlockRef(newBlock);
1478        ByteBuff encodedBuffer = getEncodedBuffer(newBlock);
1479        seeker.setCurrentBuffer(encodedBuffer);
1480      } finally {
1481        releaseIfNotCurBlock(newBlock);
1482      }
1483      // Reset the next indexed key
1484      this.nextIndexedKey = null;
1485    }
1486
1487    private ByteBuff getEncodedBuffer(HFileBlock newBlock) {
1488      ByteBuff origBlock = newBlock.getBufferReadOnly();
1489      int pos = newBlock.headerSize() + DataBlockEncoding.ID_SIZE;
1490      origBlock.position(pos);
1491      origBlock
1492          .limit(pos + newBlock.getUncompressedSizeWithoutHeader() - DataBlockEncoding.ID_SIZE);
1493      return origBlock.slice();
1494    }
1495
1496    @Override
1497    protected boolean processFirstDataBlock() throws IOException {
1498      seeker.rewind();
1499      return true;
1500    }
1501
1502    @Override
1503    public boolean next() throws IOException {
1504      boolean isValid = seeker.next();
1505      if (!isValid) {
1506        HFileBlock newBlock = readNextDataBlock();
1507        isValid = newBlock != null;
1508        if (isValid) {
1509          updateCurrentBlock(newBlock);
1510        } else {
1511          setNonSeekedState();
1512        }
1513      }
1514      return isValid;
1515    }
1516
1517    @Override
1518    public Cell getKey() {
1519      assertValidSeek();
1520      return seeker.getKey();
1521    }
1522
1523    @Override
1524    public ByteBuffer getValue() {
1525      assertValidSeek();
1526      return seeker.getValueShallowCopy();
1527    }
1528
1529    @Override
1530    public Cell getCell() {
1531      if (this.curBlock == null) {
1532        return null;
1533      }
1534      return seeker.getCell();
1535    }
1536
1537    @Override
1538    public String getKeyString() {
1539      return CellUtil.toString(getKey(), true);
1540    }
1541
1542    @Override
1543    public String getValueString() {
1544      ByteBuffer valueBuffer = getValue();
1545      return ByteBufferUtils.toStringBinary(valueBuffer);
1546    }
1547
1548    private void assertValidSeek() {
1549      if (this.curBlock == null) {
1550        throw new NotSeekedException(reader.getPath());
1551      }
1552    }
1553
1554    @Override
1555    protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) {
1556      return dataBlockEncoder.getFirstKeyCellInBlock(getEncodedBuffer(curBlock));
1557    }
1558
1559    @Override
1560    protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
1561        boolean rewind, Cell key, boolean seekBefore) throws IOException {
1562      if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
1563        updateCurrentBlock(seekToBlock);
1564      } else if (rewind) {
1565        seeker.rewind();
1566      }
1567      this.nextIndexedKey = nextIndexedKey;
1568      return seeker.seekToKeyInBlock(key, seekBefore);
1569    }
1570
1571    @Override
1572    public int compareKey(CellComparator comparator, Cell key) {
1573      return seeker.compareKey(comparator, key);
1574    }
1575  }
1576
1577  /**
1578   * Returns a buffer with the Bloom filter metadata. The caller takes
1579   * ownership of the buffer.
1580   */
1581  @Override
1582  public DataInput getGeneralBloomFilterMetadata() throws IOException {
1583    return this.getBloomFilterMetadata(BlockType.GENERAL_BLOOM_META);
1584  }
1585
1586  @Override
1587  public DataInput getDeleteBloomFilterMetadata() throws IOException {
1588    return this.getBloomFilterMetadata(BlockType.DELETE_FAMILY_BLOOM_META);
1589  }
1590
1591  private DataInput getBloomFilterMetadata(BlockType blockType)
1592      throws IOException {
1593    if (blockType != BlockType.GENERAL_BLOOM_META &&
1594        blockType != BlockType.DELETE_FAMILY_BLOOM_META) {
1595      throw new RuntimeException("Block Type: " + blockType.toString() +
1596          " is not supported, path=" + path) ;
1597    }
1598
1599    for (HFileBlock b : fileInfo.getLoadOnOpenBlocks()) {
1600      if (b.getBlockType() == blockType) {
1601        return b.getByteStream();
1602      }
1603    }
1604    return null;
1605  }
1606
1607  public boolean isFileInfoLoaded() {
1608    return true; // We load file info in constructor in version 2.
1609  }
1610
1611  @Override
1612  public HFileContext getFileContext() {
1613    return hfileContext;
1614  }
1615
1616  /**
1617   * Returns false if block prefetching was requested for this file and has
1618   * not completed, true otherwise
1619   */
1620  @Override
1621  @VisibleForTesting
1622  public boolean prefetchComplete() {
1623    return PrefetchExecutor.isCompleted(path);
1624  }
1625
1626  /**
1627   * Create a Scanner on this file. No seeks or reads are done on creation. Call
1628   * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is
1629   * nothing to clean up in a Scanner. Letting go of your references to the
1630   * scanner is sufficient. NOTE: Do not use this overload of getScanner for
1631   * compactions. See {@link #getScanner(boolean, boolean, boolean)}
1632   *
1633   * @param cacheBlocks True if we should cache blocks read in by this scanner.
1634   * @param pread Use positional read rather than seek+read if true (pread is
1635   *          better for random reads, seek+read is better scanning).
1636   * @return Scanner on this file.
1637   */
1638  @Override
1639  @VisibleForTesting
1640  public HFileScanner getScanner(boolean cacheBlocks, final boolean pread) {
1641    return getScanner(cacheBlocks, pread, false);
1642  }
1643
1644  /**
1645   * Create a Scanner on this file. No seeks or reads are done on creation. Call
1646   * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is
1647   * nothing to clean up in a Scanner. Letting go of your references to the
1648   * scanner is sufficient.
1649   * @param cacheBlocks
1650   *          True if we should cache blocks read in by this scanner.
1651   * @param pread
1652   *          Use positional read rather than seek+read if true (pread is better
1653   *          for random reads, seek+read is better scanning).
1654   * @param isCompaction
1655   *          is scanner being used for a compaction?
1656   * @return Scanner on this file.
1657   */
1658  @Override
1659  public HFileScanner getScanner(boolean cacheBlocks, final boolean pread,
1660      final boolean isCompaction) {
1661    if (dataBlockEncoder.useEncodedScanner()) {
1662      return new EncodedScanner(this, cacheBlocks, pread, isCompaction, this.hfileContext);
1663    }
1664    return new HFileScannerImpl(this, cacheBlocks, pread, isCompaction);
1665  }
1666
1667  public int getMajorVersion() {
1668    return 3;
1669  }
1670
1671  @Override
1672  public void unbufferStream() {
1673    fsBlockReader.unbufferStream();
1674  }
1675}