001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.apache.hadoop.hbase.trace.HBaseSemanticAttributes.BLOCK_CACHE_KEY_KEY;
021
022import io.opentelemetry.api.common.Attributes;
023import io.opentelemetry.api.trace.Span;
024import java.io.DataInput;
025import java.io.IOException;
026import java.nio.ByteBuffer;
027import java.util.ArrayList;
028import java.util.Optional;
029import java.util.function.IntConsumer;
030import org.apache.hadoop.conf.Configurable;
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.fs.Path;
033import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue;
034import org.apache.hadoop.hbase.Cell;
035import org.apache.hadoop.hbase.CellComparator;
036import org.apache.hadoop.hbase.CellUtil;
037import org.apache.hadoop.hbase.HConstants;
038import org.apache.hadoop.hbase.KeyValue;
039import org.apache.hadoop.hbase.PrivateCellUtil;
040import org.apache.hadoop.hbase.SizeCachedByteBufferKeyValue;
041import org.apache.hadoop.hbase.SizeCachedKeyValue;
042import org.apache.hadoop.hbase.SizeCachedNoTagsByteBufferKeyValue;
043import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue;
044import org.apache.hadoop.hbase.io.compress.Compression;
045import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
046import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
047import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
048import org.apache.hadoop.hbase.nio.ByteBuff;
049import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
050import org.apache.hadoop.hbase.util.ByteBufferUtils;
051import org.apache.hadoop.hbase.util.Bytes;
052import org.apache.hadoop.hbase.util.IdLock;
053import org.apache.hadoop.hbase.util.ObjectIntPair;
054import org.apache.hadoop.io.WritableUtils;
055import org.apache.yetus.audience.InterfaceAudience;
056import org.slf4j.Logger;
057import org.slf4j.LoggerFactory;
058
059/**
060 * Implementation that can handle all hfile versions of {@link HFile.Reader}.
061 */
062@InterfaceAudience.Private
063@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
064public abstract class HFileReaderImpl implements HFile.Reader, Configurable {
065  // This class is HFileReaderV3 + HFileReaderV2 + AbstractHFileReader all squashed together into
066  // one file. Ditto for all the HFileReader.ScannerV? implementations. I was running up against
067  // the MaxInlineLevel limit because too many tiers involved reading from an hfile. Was also hard
068  // to navigate the source code when so many classes participating in read.
069  private static final Logger LOG = LoggerFactory.getLogger(HFileReaderImpl.class);
070
071  /** Data block index reader keeping the root data index in memory */
072  protected HFileBlockIndex.CellBasedKeyBlockIndexReader dataBlockIndexReader;
073
074  /** Meta block index reader -- always single level */
075  protected HFileBlockIndex.ByteArrayKeyBlockIndexReader metaBlockIndexReader;
076
077  protected FixedFileTrailer trailer;
078
079  private final boolean primaryReplicaReader;
080
081  /**
082   * What kind of data block encoding should be used while reading, writing, and handling cache.
083   */
084  protected HFileDataBlockEncoder dataBlockEncoder = NoOpDataBlockEncoder.INSTANCE;
085
086  /** Block cache configuration. */
087  protected final CacheConfig cacheConf;
088
089  protected ReaderContext context;
090
091  protected final HFileInfo fileInfo;
092
093  /** Path of file */
094  protected final Path path;
095
096  /** File name to be used for block names */
097  protected final String name;
098
099  private Configuration conf;
100
101  protected HFileContext hfileContext;
102
103  /** Filesystem-level block reader. */
104  protected HFileBlock.FSReader fsBlockReader;
105
106  /**
107   * A "sparse lock" implementation allowing to lock on a particular block identified by offset. The
108   * purpose of this is to avoid two clients loading the same block, and have all but one client
109   * wait to get the block from the cache.
110   */
111  private IdLock offsetLock = new IdLock();
112
113  /** Minimum minor version supported by this HFile format */
114  static final int MIN_MINOR_VERSION = 0;
115
116  /** Maximum minor version supported by this HFile format */
117  // We went to version 2 when we moved to pb'ing fileinfo and the trailer on
118  // the file. This version can read Writables version 1.
119  static final int MAX_MINOR_VERSION = 3;
120
121  /** Minor versions starting with this number have faked index key */
122  static final int MINOR_VERSION_WITH_FAKED_KEY = 3;
123
124  /**
125   * Opens a HFile.
126   * @param context   Reader context info
127   * @param fileInfo  HFile info
128   * @param cacheConf Cache configuration.
129   * @param conf      Configuration
130   */
131  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
132  public HFileReaderImpl(ReaderContext context, HFileInfo fileInfo, CacheConfig cacheConf,
133    Configuration conf) throws IOException {
134    this.cacheConf = cacheConf;
135    this.context = context;
136    this.path = context.getFilePath();
137    this.name = path.getName();
138    this.conf = conf;
139    this.primaryReplicaReader = context.isPrimaryReplicaReader();
140    this.fileInfo = fileInfo;
141    this.trailer = fileInfo.getTrailer();
142    this.hfileContext = fileInfo.getHFileContext();
143    this.fsBlockReader =
144      new HFileBlock.FSReaderImpl(context, hfileContext, cacheConf.getByteBuffAllocator(), conf);
145    this.dataBlockEncoder = HFileDataBlockEncoderImpl.createFromFileInfo(fileInfo);
146    fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf);
147    dataBlockIndexReader = fileInfo.getDataBlockIndexReader();
148    metaBlockIndexReader = fileInfo.getMetaBlockIndexReader();
149  }
150
151  @SuppressWarnings("serial")
152  public static class BlockIndexNotLoadedException extends IllegalStateException {
153    public BlockIndexNotLoadedException(Path path) {
154      // Add a message in case anyone relies on it as opposed to class name.
155      super(path + " block index not loaded");
156    }
157  }
158
159  public CacheConfig getCacheConf() {
160    return cacheConf;
161  }
162
163  private Optional<String> toStringFirstKey() {
164    return getFirstKey().map(CellUtil::getCellKeyAsString);
165  }
166
167  private Optional<String> toStringLastKey() {
168    return getLastKey().map(CellUtil::getCellKeyAsString);
169  }
170
171  @Override
172  public String toString() {
173    return "reader=" + path.toString()
174      + (!isFileInfoLoaded()
175        ? ""
176        : ", compression=" + trailer.getCompressionCodec().getName() + ", cacheConf=" + cacheConf
177          + ", firstKey=" + toStringFirstKey() + ", lastKey=" + toStringLastKey())
178      + ", avgKeyLen=" + fileInfo.getAvgKeyLen() + ", avgValueLen=" + fileInfo.getAvgValueLen()
179      + ", entries=" + trailer.getEntryCount() + ", length=" + context.getFileSize();
180  }
181
182  @Override
183  public long length() {
184    return context.getFileSize();
185  }
186
187  /**
188   * @return the first key in the file. May be null if file has no entries. Note that this is not
189   *         the first row key, but rather the byte form of the first KeyValue.
190   */
191  @Override
192  public Optional<Cell> getFirstKey() {
193    if (dataBlockIndexReader == null) {
194      throw new BlockIndexNotLoadedException(path);
195    }
196    return dataBlockIndexReader.isEmpty()
197      ? Optional.empty()
198      : Optional.of(dataBlockIndexReader.getRootBlockKey(0));
199  }
200
201  /**
202   * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's patch goes in to
203   * eliminate {@link KeyValue} here.
204   * @return the first row key, or null if the file is empty.
205   */
206  @Override
207  public Optional<byte[]> getFirstRowKey() {
208    // We have to copy the row part to form the row key alone
209    return getFirstKey().map(CellUtil::cloneRow);
210  }
211
212  /**
213   * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's patch goes in to
214   * eliminate {@link KeyValue} here.
215   * @return the last row key, or null if the file is empty.
216   */
217  @Override
218  public Optional<byte[]> getLastRowKey() {
219    // We have to copy the row part to form the row key alone
220    return getLastKey().map(CellUtil::cloneRow);
221  }
222
223  /** Returns number of KV entries in this HFile */
224  @Override
225  public long getEntries() {
226    return trailer.getEntryCount();
227  }
228
229  /** Returns comparator */
230  @Override
231  public CellComparator getComparator() {
232    return this.hfileContext.getCellComparator();
233  }
234
235  public Compression.Algorithm getCompressionAlgorithm() {
236    return trailer.getCompressionCodec();
237  }
238
239  /**
240   * @return the total heap size of data and meta block indexes in bytes. Does not take into account
241   *         non-root blocks of a multilevel data index.
242   */
243  @Override
244  public long indexSize() {
245    return (dataBlockIndexReader != null ? dataBlockIndexReader.heapSize() : 0)
246      + ((metaBlockIndexReader != null) ? metaBlockIndexReader.heapSize() : 0);
247  }
248
249  @Override
250  public String getName() {
251    return name;
252  }
253
254  @Override
255  public void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder) {
256    this.dataBlockEncoder = dataBlockEncoder;
257    this.fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf);
258  }
259
260  @Override
261  public void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader) {
262    this.dataBlockIndexReader = reader;
263  }
264
265  @Override
266  public HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader() {
267    return dataBlockIndexReader;
268  }
269
270  @Override
271  public void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader) {
272    this.metaBlockIndexReader = reader;
273  }
274
275  @Override
276  public HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader() {
277    return metaBlockIndexReader;
278  }
279
280  @Override
281  public FixedFileTrailer getTrailer() {
282    return trailer;
283  }
284
285  @Override
286  public ReaderContext getContext() {
287    return this.context;
288  }
289
290  @Override
291  public HFileInfo getHFileInfo() {
292    return this.fileInfo;
293  }
294
295  @Override
296  public boolean isPrimaryReplicaReader() {
297    return primaryReplicaReader;
298  }
299
300  /**
301   * An exception thrown when an operation requiring a scanner to be seeked is invoked on a scanner
302   * that is not seeked.
303   */
304  @SuppressWarnings("serial")
305  public static class NotSeekedException extends IllegalStateException {
306    public NotSeekedException(Path path) {
307      super(path + " not seeked to a key/value");
308    }
309  }
310
311  public static class HFileScannerImpl implements HFileScanner {
312    private ByteBuff blockBuffer;
313    protected final boolean cacheBlocks;
314    protected final boolean pread;
315    protected final boolean isCompaction;
316    private int currKeyLen;
317    private int currValueLen;
318    private int currMemstoreTSLen;
319    private long currMemstoreTS;
320    protected final HFile.Reader reader;
321    private int currTagsLen;
322    private short rowLen;
323    // buffer backed keyonlyKV
324    private ByteBufferKeyOnlyKeyValue bufBackedKeyOnlyKv = new ByteBufferKeyOnlyKeyValue();
325    // A pair for reusing in blockSeek() so that we don't garbage lot of objects
326    final ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>();
327
328    /**
329     * The next indexed key is to keep track of the indexed key of the next data block. If the
330     * nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the current data block is the
331     * last data block. If the nextIndexedKey is null, it means the nextIndexedKey has not been
332     * loaded yet.
333     */
334    protected Cell nextIndexedKey;
335
336    // Current block being used. NOTICE: DON't release curBlock separately except in shipped() or
337    // close() methods. Because the shipped() or close() will do the release finally, even if any
338    // exception occur the curBlock will be released by the close() method (see
339    // RegionScannerImpl#handleException). Call the releaseIfNotCurBlock() to release the
340    // unreferenced block please.
341    protected HFileBlock curBlock;
342    // Whether we returned a result for curBlock's size in recordBlockSize().
343    // gets reset whenever curBlock is changed.
344    private boolean providedCurrentBlockSize = false;
345
346    public HFileBlock getCurBlock() {
347      return curBlock;
348    }
349
350    // Previous blocks that were used in the course of the read
351    protected final ArrayList<HFileBlock> prevBlocks = new ArrayList<>();
352
353    public HFileScannerImpl(final HFile.Reader reader, final boolean cacheBlocks,
354      final boolean pread, final boolean isCompaction) {
355      this.reader = reader;
356      this.cacheBlocks = cacheBlocks;
357      this.pread = pread;
358      this.isCompaction = isCompaction;
359    }
360
361    void updateCurrBlockRef(HFileBlock block) {
362      if (block != null && curBlock != null && block.getOffset() == curBlock.getOffset()) {
363        return;
364      }
365      if (this.curBlock != null && this.curBlock.isSharedMem()) {
366        prevBlocks.add(this.curBlock);
367      }
368      this.curBlock = block;
369      this.providedCurrentBlockSize = false;
370    }
371
372    void reset() {
373      // We don't have to keep ref to heap block
374      if (this.curBlock != null && this.curBlock.isSharedMem()) {
375        this.prevBlocks.add(this.curBlock);
376      }
377      this.curBlock = null;
378    }
379
380    private void returnBlocks(boolean returnAll) {
381      this.prevBlocks.forEach(HFileBlock::release);
382      this.prevBlocks.clear();
383      if (returnAll && this.curBlock != null) {
384        this.curBlock.release();
385        this.curBlock = null;
386      }
387    }
388
389    @Override
390    public boolean isSeeked() {
391      return blockBuffer != null;
392    }
393
394    @Override
395    public String toString() {
396      return "HFileScanner for reader " + String.valueOf(getReader());
397    }
398
399    protected void assertSeeked() {
400      if (!isSeeked()) {
401        throw new NotSeekedException(reader.getPath());
402      }
403    }
404
405    @Override
406    public HFile.Reader getReader() {
407      return reader;
408    }
409
410    // From non encoded HFiles, we always read back KeyValue or its descendant.(Note: When HFile
411    // block is in DBB, it will be OffheapKV). So all parts of the Cell is in a contiguous
412    // array/buffer. How many bytes we should wrap to make the KV is what this method returns.
413    private int getKVBufSize() {
414      int kvBufSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen;
415      if (currTagsLen > 0) {
416        kvBufSize += Bytes.SIZEOF_SHORT + currTagsLen;
417      }
418      return kvBufSize;
419    }
420
421    @Override
422    public void close() {
423      if (!pread) {
424        // For seek + pread stream socket should be closed when the scanner is closed. HBASE-9393
425        reader.unbufferStream();
426      }
427      this.returnBlocks(true);
428    }
429
430    @Override
431    public void recordBlockSize(IntConsumer blockSizeConsumer) {
432      if (!providedCurrentBlockSize && curBlock != null) {
433        providedCurrentBlockSize = true;
434        blockSizeConsumer.accept(curBlock.getUncompressedSizeWithoutHeader());
435      }
436    }
437
438    // Returns the #bytes in HFile for the current cell. Used to skip these many bytes in current
439    // HFile block's buffer so as to position to the next cell.
440    private int getCurCellSerializedSize() {
441      int curCellSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen + currMemstoreTSLen;
442      if (this.reader.getFileContext().isIncludesTags()) {
443        curCellSize += Bytes.SIZEOF_SHORT + currTagsLen;
444      }
445      return curCellSize;
446    }
447
448    protected void readKeyValueLen() {
449      // This is a hot method. We go out of our way to make this method short so it can be
450      // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves
451      // because it is faster than going via range-checked ByteBuffer methods or going through a
452      // byte buffer array a byte at a time.
453      // Get a long at a time rather than read two individual ints. In micro-benchmarking, even
454      // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints.
455      // Trying to imitate what was done - need to profile if this is better or
456      // earlier way is better by doing mark and reset?
457      // But ensure that you read long instead of two ints
458      long ll = blockBuffer.getLongAfterPosition(0);
459      // Read top half as an int of key length and bottom int as value length
460      this.currKeyLen = (int) (ll >> Integer.SIZE);
461      this.currValueLen = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
462      checkKeyValueLen();
463      this.rowLen = blockBuffer.getShortAfterPosition(Bytes.SIZEOF_LONG);
464      // Move position past the key and value lengths and then beyond the key and value
465      int p = (Bytes.SIZEOF_LONG + currKeyLen + currValueLen);
466      if (reader.getFileContext().isIncludesTags()) {
467        // Tags length is a short.
468        this.currTagsLen = blockBuffer.getShortAfterPosition(p);
469        checkTagsLen();
470        p += (Bytes.SIZEOF_SHORT + currTagsLen);
471      }
472      readMvccVersion(p);
473    }
474
475    private final void checkTagsLen() {
476      if (checkLen(this.currTagsLen)) {
477        throw new IllegalStateException(
478          "Invalid currTagsLen " + this.currTagsLen + ". Block offset: " + curBlock.getOffset()
479            + ", block length: " + this.blockBuffer.limit() + ", position: "
480            + this.blockBuffer.position() + " (without header)." + " path=" + reader.getPath());
481      }
482    }
483
484    /**
485     * Read mvcc. Does checks to see if we even need to read the mvcc at all.
486     */
487    protected void readMvccVersion(final int offsetFromPos) {
488      // See if we even need to decode mvcc.
489      if (!this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
490        return;
491      }
492      if (!this.reader.getHFileInfo().isDecodeMemstoreTS()) {
493        currMemstoreTS = 0;
494        currMemstoreTSLen = 1;
495        return;
496      }
497      _readMvccVersion(offsetFromPos);
498    }
499
500    /**
501     * Actually do the mvcc read. Does no checks.
502     */
503    private void _readMvccVersion(int offsetFromPos) {
504      // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e.
505      // previous if one-byte vint, we'd redo the vint call to find int size.
506      // Also the method is kept small so can be inlined.
507      byte firstByte = blockBuffer.getByteAfterPosition(offsetFromPos);
508      int len = WritableUtils.decodeVIntSize(firstByte);
509      if (len == 1) {
510        this.currMemstoreTS = firstByte;
511      } else {
512        int remaining = len - 1;
513        long i = 0;
514        offsetFromPos++;
515        if (remaining >= Bytes.SIZEOF_INT) {
516          // The int read has to be converted to unsigned long so the & op
517          i = (blockBuffer.getIntAfterPosition(offsetFromPos) & 0x00000000ffffffffL);
518          remaining -= Bytes.SIZEOF_INT;
519          offsetFromPos += Bytes.SIZEOF_INT;
520        }
521        if (remaining >= Bytes.SIZEOF_SHORT) {
522          short s = blockBuffer.getShortAfterPosition(offsetFromPos);
523          i = i << 16;
524          i = i | (s & 0xFFFF);
525          remaining -= Bytes.SIZEOF_SHORT;
526          offsetFromPos += Bytes.SIZEOF_SHORT;
527        }
528        for (int idx = 0; idx < remaining; idx++) {
529          byte b = blockBuffer.getByteAfterPosition(offsetFromPos + idx);
530          i = i << 8;
531          i = i | (b & 0xFF);
532        }
533        currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
534      }
535      this.currMemstoreTSLen = len;
536    }
537
538    /**
539     * Within a loaded block, seek looking for the last key that is smaller than (or equal to?) the
540     * key we are interested in. A note on the seekBefore: if you have seekBefore = true, AND the
541     * first key in the block = key, then you'll get thrown exceptions. The caller has to check for
542     * that case and load the previous block as appropriate. the key to find find the key before the
543     * given key in case of exact match.
544     * @return 0 in case of an exact key match, 1 in case of an inexact match, -2 in case of an
545     *         inexact match and furthermore, the input key less than the first key of current
546     *         block(e.g. using a faked index key)
547     */
548    protected int blockSeek(Cell key, boolean seekBefore) {
549      int klen, vlen, tlen = 0;
550      int lastKeyValueSize = -1;
551      int offsetFromPos;
552      do {
553        offsetFromPos = 0;
554        // Better to ensure that we use the BB Utils here
555        long ll = blockBuffer.getLongAfterPosition(offsetFromPos);
556        klen = (int) (ll >> Integer.SIZE);
557        vlen = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
558        if (checkKeyLen(klen) || checkLen(vlen)) {
559          throw new IllegalStateException(
560            "Invalid klen " + klen + " or vlen " + vlen + ". Block offset: " + curBlock.getOffset()
561              + ", block length: " + blockBuffer.limit() + ", position: " + blockBuffer.position()
562              + " (without header)." + " path=" + reader.getPath());
563        }
564        offsetFromPos += Bytes.SIZEOF_LONG;
565        this.rowLen = blockBuffer.getShortAfterPosition(offsetFromPos);
566        blockBuffer.asSubByteBuffer(blockBuffer.position() + offsetFromPos, klen, pair);
567        bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), klen, rowLen);
568        int comp =
569          PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, bufBackedKeyOnlyKv);
570        offsetFromPos += klen + vlen;
571        if (this.reader.getFileContext().isIncludesTags()) {
572          // Read short as unsigned, high byte first
573          tlen = ((blockBuffer.getByteAfterPosition(offsetFromPos) & 0xff) << 8)
574            ^ (blockBuffer.getByteAfterPosition(offsetFromPos + 1) & 0xff);
575          if (checkLen(tlen)) {
576            throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: "
577              + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
578              + blockBuffer.position() + " (without header)." + " path=" + reader.getPath());
579          }
580          // add the two bytes read for the tags.
581          offsetFromPos += tlen + (Bytes.SIZEOF_SHORT);
582        }
583        if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
584          // Directly read the mvcc based on current position
585          readMvccVersion(offsetFromPos);
586        }
587        if (comp == 0) {
588          if (seekBefore) {
589            if (lastKeyValueSize < 0) {
590              throw new IllegalStateException("blockSeek with seekBefore "
591                + "at the first key of the block: key=" + CellUtil.getCellKeyAsString(key)
592                + ", blockOffset=" + curBlock.getOffset() + ", onDiskSize="
593                + curBlock.getOnDiskSizeWithHeader() + ", path=" + reader.getPath());
594            }
595            blockBuffer.moveBack(lastKeyValueSize);
596            readKeyValueLen();
597            return 1; // non exact match.
598          }
599          currKeyLen = klen;
600          currValueLen = vlen;
601          currTagsLen = tlen;
602          return 0; // indicate exact match
603        } else if (comp < 0) {
604          if (lastKeyValueSize > 0) {
605            blockBuffer.moveBack(lastKeyValueSize);
606          }
607          readKeyValueLen();
608          if (lastKeyValueSize == -1 && blockBuffer.position() == 0) {
609            return HConstants.INDEX_KEY_MAGIC;
610          }
611          return 1;
612        }
613        // The size of this key/value tuple, including key/value length fields.
614        lastKeyValueSize = klen + vlen + currMemstoreTSLen + KEY_VALUE_LEN_SIZE;
615        // include tag length also if tags included with KV
616        if (reader.getFileContext().isIncludesTags()) {
617          lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT;
618        }
619        blockBuffer.skip(lastKeyValueSize);
620      } while (blockBuffer.hasRemaining());
621
622      // Seek to the last key we successfully read. This will happen if this is
623      // the last key/value pair in the file, in which case the following call
624      // to next() has to return false.
625      blockBuffer.moveBack(lastKeyValueSize);
626      readKeyValueLen();
627      return 1; // didn't exactly find it.
628    }
629
630    @Override
631    public Cell getNextIndexedKey() {
632      return nextIndexedKey;
633    }
634
635    @Override
636    public int seekTo(Cell key) throws IOException {
637      return seekTo(key, true);
638    }
639
640    @Override
641    public int reseekTo(Cell key) throws IOException {
642      int compared;
643      if (isSeeked()) {
644        compared = compareKey(reader.getComparator(), key);
645        if (compared < 1) {
646          // If the required key is less than or equal to current key, then
647          // don't do anything.
648          return compared;
649        } else {
650          // The comparison with no_next_index_key has to be checked
651          if (
652            this.nextIndexedKey != null && (this.nextIndexedKey
653                == KeyValueScanner.NO_NEXT_INDEXED_KEY
654              || PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, nextIndexedKey)
655                  < 0)
656          ) {
657            // The reader shall continue to scan the current data block instead
658            // of querying the
659            // block index as long as it knows the target key is strictly
660            // smaller than
661            // the next indexed key or the current data block is the last data
662            // block.
663            return loadBlockAndSeekToKey(this.curBlock, nextIndexedKey, false, key, false);
664          }
665        }
666      }
667      // Don't rewind on a reseek operation, because reseek implies that we are
668      // always going forward in the file.
669      return seekTo(key, false);
670    }
671
672    /**
673     * An internal API function. Seek to the given key, optionally rewinding to the first key of the
674     * block before doing the seek.
675     * @param key    - a cell representing the key that we need to fetch
676     * @param rewind whether to rewind to the first key of the block before doing the seek. If this
677     *               is false, we are assuming we never go back, otherwise the result is undefined.
678     * @return -1 if the key is earlier than the first key of the file, 0 if we are at the given
679     *         key, 1 if we are past the given key -2 if the key is earlier than the first key of
680     *         the file while using a faked index key
681     */
682    public int seekTo(Cell key, boolean rewind) throws IOException {
683      HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader();
684      BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, curBlock,
685        cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding(), reader);
686      if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) {
687        // This happens if the key e.g. falls before the beginning of the file.
688        return -1;
689      }
690      return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(),
691        blockWithScanInfo.getNextIndexedKey(), rewind, key, false);
692    }
693
694    @Override
695    public boolean seekBefore(Cell key) throws IOException {
696      HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, curBlock,
697        cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction), reader);
698      if (seekToBlock == null) {
699        return false;
700      }
701      Cell firstKey = getFirstKeyCellInBlock(seekToBlock);
702      if (PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), firstKey, key) >= 0) {
703        long previousBlockOffset = seekToBlock.getPrevBlockOffset();
704        // The key we are interested in
705        if (previousBlockOffset == -1) {
706          // we have a 'problem', the key we want is the first of the file.
707          releaseIfNotCurBlock(seekToBlock);
708          return false;
709        }
710
711        // The first key in the current block 'seekToBlock' is greater than the given
712        // seekBefore key. We will go ahead by reading the next block that satisfies the
713        // given key. Return the current block before reading the next one.
714        releaseIfNotCurBlock(seekToBlock);
715        // It is important that we compute and pass onDiskSize to the block
716        // reader so that it does not have to read the header separately to
717        // figure out the size. Currently, we do not have a way to do this
718        // correctly in the general case however.
719        // TODO: See https://issues.apache.org/jira/browse/HBASE-14576
720        int prevBlockSize = -1;
721        seekToBlock = reader.readBlock(previousBlockOffset, prevBlockSize, cacheBlocks, pread,
722          isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
723        // TODO shortcut: seek forward in this block to the last key of the
724        // block.
725      }
726      loadBlockAndSeekToKey(seekToBlock, firstKey, true, key, true);
727      return true;
728    }
729
730    /**
731     * The curBlock will be released by shipping or close method, so only need to consider releasing
732     * the block, which was read from HFile before and not referenced by curBlock.
733     */
734    protected void releaseIfNotCurBlock(HFileBlock block) {
735      if (curBlock != block) {
736        block.release();
737      }
738    }
739
740    /**
741     * Scans blocks in the "scanned" section of the {@link HFile} until the next data block is
742     * found.
743     * @return the next block, or null if there are no more data blocks
744     */
745    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH",
746        justification = "Yeah, unnecessary null check; could do w/ clean up")
747    protected HFileBlock readNextDataBlock() throws IOException {
748      long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
749      if (curBlock == null) {
750        return null;
751      }
752      HFileBlock block = this.curBlock;
753      do {
754        if (block.getOffset() >= lastDataBlockOffset) {
755          releaseIfNotCurBlock(block);
756          return null;
757        }
758        if (block.getOffset() < 0) {
759          releaseIfNotCurBlock(block);
760          throw new IOException("Invalid block offset: " + block + ", path=" + reader.getPath());
761        }
762        // We are reading the next block without block type validation, because
763        // it might turn out to be a non-data block.
764        block = reader.readBlock(block.getOffset() + block.getOnDiskSizeWithHeader(),
765          block.getNextBlockOnDiskSize(), cacheBlocks, pread, isCompaction, true, null,
766          getEffectiveDataBlockEncoding());
767        if (block != null && !block.getBlockType().isData()) {
768          // Whatever block we read we will be returning it unless
769          // it is a datablock. Just in case the blocks are non data blocks
770          block.release();
771        }
772      } while (!block.getBlockType().isData());
773      return block;
774    }
775
776    public DataBlockEncoding getEffectiveDataBlockEncoding() {
777      return this.reader.getEffectiveEncodingInCache(isCompaction);
778    }
779
780    @Override
781    public Cell getCell() {
782      if (!isSeeked()) {
783        return null;
784      }
785
786      Cell ret;
787      int cellBufSize = getKVBufSize();
788      long seqId = 0L;
789      if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
790        seqId = currMemstoreTS;
791      }
792      if (blockBuffer.hasArray()) {
793        // TODO : reduce the varieties of KV here. Check if based on a boolean
794        // we can handle the 'no tags' case.
795        if (currTagsLen > 0) {
796          ret = new SizeCachedKeyValue(blockBuffer.array(),
797            blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen,
798            rowLen);
799        } else {
800          ret = new SizeCachedNoTagsKeyValue(blockBuffer.array(),
801            blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen,
802            rowLen);
803        }
804      } else {
805        ByteBuffer buf = blockBuffer.asSubByteBuffer(cellBufSize);
806        if (buf.isDirect()) {
807          ret = currTagsLen > 0
808            ? new SizeCachedByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId, currKeyLen,
809              rowLen)
810            : new SizeCachedNoTagsByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId,
811              currKeyLen, rowLen);
812        } else {
813          if (currTagsLen > 0) {
814            ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + buf.position(),
815              cellBufSize, seqId, currKeyLen, rowLen);
816          } else {
817            ret = new SizeCachedNoTagsKeyValue(buf.array(), buf.arrayOffset() + buf.position(),
818              cellBufSize, seqId, currKeyLen, rowLen);
819          }
820        }
821      }
822      return ret;
823    }
824
825    @Override
826    public Cell getKey() {
827      assertSeeked();
828      // Create a new object so that this getKey is cached as firstKey, lastKey
829      ObjectIntPair<ByteBuffer> keyPair = new ObjectIntPair<>();
830      blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, keyPair);
831      ByteBuffer keyBuf = keyPair.getFirst();
832      if (keyBuf.hasArray()) {
833        return new KeyValue.KeyOnlyKeyValue(keyBuf.array(),
834          keyBuf.arrayOffset() + keyPair.getSecond(), currKeyLen);
835      } else {
836        // Better to do a copy here instead of holding on to this BB so that
837        // we could release the blocks referring to this key. This key is specifically used
838        // in HalfStoreFileReader to get the firstkey and lastkey by creating a new scanner
839        // every time. So holding onto the BB (incase of DBB) is not advised here.
840        byte[] key = new byte[currKeyLen];
841        ByteBufferUtils.copyFromBufferToArray(key, keyBuf, keyPair.getSecond(), 0, currKeyLen);
842        return new KeyValue.KeyOnlyKeyValue(key, 0, currKeyLen);
843      }
844    }
845
846    @Override
847    public ByteBuffer getValue() {
848      assertSeeked();
849      // Okie to create new Pair. Not used in hot path
850      ObjectIntPair<ByteBuffer> valuePair = new ObjectIntPair<>();
851      this.blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen,
852        currValueLen, valuePair);
853      ByteBuffer valBuf = valuePair.getFirst().duplicate();
854      valBuf.position(valuePair.getSecond());
855      valBuf.limit(currValueLen + valuePair.getSecond());
856      return valBuf.slice();
857    }
858
859    protected void setNonSeekedState() {
860      reset();
861      blockBuffer = null;
862      currKeyLen = 0;
863      currValueLen = 0;
864      currMemstoreTS = 0;
865      currMemstoreTSLen = 0;
866      currTagsLen = 0;
867    }
868
869    /**
870     * Set the position on current backing blockBuffer.
871     */
872    private void positionThisBlockBuffer() {
873      try {
874        blockBuffer.skip(getCurCellSerializedSize());
875      } catch (IllegalArgumentException e) {
876        LOG.error("Current pos = " + blockBuffer.position() + "; currKeyLen = " + currKeyLen
877          + "; currValLen = " + currValueLen + "; block limit = " + blockBuffer.limit()
878          + "; currBlock currBlockOffset = " + this.curBlock.getOffset() + "; path="
879          + reader.getPath());
880        throw e;
881      }
882    }
883
884    /**
885     * Set our selves up for the next 'next' invocation, set up next block.
886     * @return True is more to read else false if at the end.
887     */
888    private boolean positionForNextBlock() throws IOException {
889      // Methods are small so they get inlined because they are 'hot'.
890      long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
891      if (this.curBlock.getOffset() >= lastDataBlockOffset) {
892        setNonSeekedState();
893        return false;
894      }
895      return isNextBlock();
896    }
897
898    private boolean isNextBlock() throws IOException {
899      // Methods are small so they get inlined because they are 'hot'.
900      HFileBlock nextBlock = readNextDataBlock();
901      if (nextBlock == null) {
902        setNonSeekedState();
903        return false;
904      }
905      updateCurrentBlock(nextBlock);
906      return true;
907    }
908
909    private final boolean _next() throws IOException {
910      // Small method so can be inlined. It is a hot one.
911      if (blockBuffer.remaining() <= 0) {
912        return positionForNextBlock();
913      }
914
915      // We are still in the same block.
916      readKeyValueLen();
917      return true;
918    }
919
920    /**
921     * Go to the next key/value in the block section. Loads the next block if necessary. If
922     * successful, {@link #getKey()} and {@link #getValue()} can be called.
923     * @return true if successfully navigated to the next key/value
924     */
925    @Override
926    public boolean next() throws IOException {
927      // This is a hot method so extreme measures taken to ensure it is small and inlineable.
928      // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation
929      assertSeeked();
930      positionThisBlockBuffer();
931      return _next();
932    }
933
934    /**
935     * Positions this scanner at the start of the file.
936     * @return false if empty file; i.e. a call to next would return false and the current key and
937     *         value are undefined.
938     */
939    @Override
940    public boolean seekTo() throws IOException {
941      if (reader == null) {
942        return false;
943      }
944
945      if (reader.getTrailer().getEntryCount() == 0) {
946        // No data blocks.
947        return false;
948      }
949
950      long firstDataBlockOffset = reader.getTrailer().getFirstDataBlockOffset();
951      if (curBlock != null && curBlock.getOffset() == firstDataBlockOffset) {
952        return processFirstDataBlock();
953      }
954
955      readAndUpdateNewBlock(firstDataBlockOffset);
956      return true;
957    }
958
959    protected boolean processFirstDataBlock() throws IOException {
960      blockBuffer.rewind();
961      readKeyValueLen();
962      return true;
963    }
964
965    protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException {
966      HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
967        isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
968      if (newBlock.getOffset() < 0) {
969        releaseIfNotCurBlock(newBlock);
970        throw new IOException(
971          "Invalid offset=" + newBlock.getOffset() + ", path=" + reader.getPath());
972      }
973      updateCurrentBlock(newBlock);
974    }
975
976    protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind,
977      Cell key, boolean seekBefore) throws IOException {
978      if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
979        updateCurrentBlock(seekToBlock);
980      } else if (rewind) {
981        blockBuffer.rewind();
982      }
983      // Update the nextIndexedKey
984      this.nextIndexedKey = nextIndexedKey;
985      return blockSeek(key, seekBefore);
986    }
987
988    /** Returns True if v &lt;= 0 or v &gt; current block buffer limit. */
989    protected final boolean checkKeyLen(final int v) {
990      return v <= 0 || v > this.blockBuffer.limit();
991    }
992
993    /** Returns True if v &lt; 0 or v &gt; current block buffer limit. */
994    protected final boolean checkLen(final int v) {
995      return v < 0 || v > this.blockBuffer.limit();
996    }
997
998    /**
999     * Check key and value lengths are wholesome.
1000     */
1001    protected final void checkKeyValueLen() {
1002      if (checkKeyLen(this.currKeyLen) || checkLen(this.currValueLen)) {
1003        throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen
1004          + " or currValueLen " + this.currValueLen + ". Block offset: " + this.curBlock.getOffset()
1005          + ", block length: " + this.blockBuffer.limit() + ", position: "
1006          + this.blockBuffer.position() + " (without header)." + ", path=" + reader.getPath());
1007      }
1008    }
1009
1010    /**
1011     * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first
1012     * key/value pair.
1013     * @param newBlock the block read by {@link HFileReaderImpl#readBlock}, it's a totally new block
1014     *                 with new allocated {@link ByteBuff}, so if no further reference to this
1015     *                 block, we should release it carefully.
1016     */
1017    protected void updateCurrentBlock(HFileBlock newBlock) throws IOException {
1018      try {
1019        if (newBlock.getBlockType() != BlockType.DATA) {
1020          throw new IllegalStateException(
1021            "ScannerV2 works only on data blocks, got " + newBlock.getBlockType() + "; "
1022              + "HFileName=" + reader.getPath() + ", " + "dataBlockEncoder="
1023              + reader.getDataBlockEncoding() + ", " + "isCompaction=" + isCompaction);
1024        }
1025        updateCurrBlockRef(newBlock);
1026        blockBuffer = newBlock.getBufferWithoutHeader();
1027        readKeyValueLen();
1028      } finally {
1029        releaseIfNotCurBlock(newBlock);
1030      }
1031      // Reset the next indexed key
1032      this.nextIndexedKey = null;
1033    }
1034
1035    protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) {
1036      ByteBuff buffer = curBlock.getBufferWithoutHeader();
1037      // It is safe to manipulate this buffer because we own the buffer object.
1038      buffer.rewind();
1039      int klen = buffer.getInt();
1040      buffer.skip(Bytes.SIZEOF_INT);// Skip value len part
1041      ByteBuffer keyBuff = buffer.asSubByteBuffer(klen);
1042      if (keyBuff.hasArray()) {
1043        return new KeyValue.KeyOnlyKeyValue(keyBuff.array(),
1044          keyBuff.arrayOffset() + keyBuff.position(), klen);
1045      } else {
1046        return new ByteBufferKeyOnlyKeyValue(keyBuff, keyBuff.position(), klen);
1047      }
1048    }
1049
1050    public int compareKey(CellComparator comparator, Cell key) {
1051      blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, pair);
1052      this.bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), currKeyLen, rowLen);
1053      return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, this.bufBackedKeyOnlyKv);
1054    }
1055
1056    @Override
1057    public void shipped() throws IOException {
1058      this.returnBlocks(false);
1059    }
1060  }
1061
1062  @Override
1063  public Path getPath() {
1064    return path;
1065  }
1066
1067  @Override
1068  public DataBlockEncoding getDataBlockEncoding() {
1069    return dataBlockEncoder.getDataBlockEncoding();
1070  }
1071
1072  @Override
1073  public Configuration getConf() {
1074    return conf;
1075  }
1076
1077  @Override
1078  public void setConf(Configuration conf) {
1079    this.conf = conf;
1080  }
1081
1082  /** Minor versions in HFile starting with this number have hbase checksums */
1083  public static final int MINOR_VERSION_WITH_CHECKSUM = 1;
1084  /** In HFile minor version that does not support checksums */
1085  public static final int MINOR_VERSION_NO_CHECKSUM = 0;
1086
1087  /** HFile minor version that introduced pbuf filetrailer */
1088  public static final int PBUF_TRAILER_MINOR_VERSION = 2;
1089
1090  /**
1091   * The size of a (key length, value length) tuple that prefixes each entry in a data block.
1092   */
1093  public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT;
1094
1095  /**
1096   * Retrieve block from cache. Validates the retrieved block's type vs {@code expectedBlockType}
1097   * and its encoding vs. {@code expectedDataBlockEncoding}. Unpacks the block as necessary.
1098   */
1099  private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock,
1100    boolean updateCacheMetrics, BlockType expectedBlockType,
1101    DataBlockEncoding expectedDataBlockEncoding) throws IOException {
1102    // Check cache for block. If found return.
1103    BlockCache cache = cacheConf.getBlockCache().orElse(null);
1104    if (cache != null) {
1105      HFileBlock cachedBlock = (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock,
1106        updateCacheMetrics, expectedBlockType);
1107      if (cachedBlock != null) {
1108        if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) {
1109          HFileBlock compressedBlock = cachedBlock;
1110          cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
1111          // In case of compressed block after unpacking we can release the compressed block
1112          if (compressedBlock != cachedBlock) {
1113            compressedBlock.release();
1114          }
1115        }
1116        try {
1117          validateBlockType(cachedBlock, expectedBlockType);
1118        } catch (IOException e) {
1119          returnAndEvictBlock(cache, cacheKey, cachedBlock);
1120          throw e;
1121        }
1122
1123        if (expectedDataBlockEncoding == null) {
1124          return cachedBlock;
1125        }
1126        DataBlockEncoding actualDataBlockEncoding = cachedBlock.getDataBlockEncoding();
1127        // Block types other than data blocks always have
1128        // DataBlockEncoding.NONE. To avoid false negative cache misses, only
1129        // perform this check if cached block is a data block.
1130        if (
1131          cachedBlock.getBlockType().isData()
1132            && !actualDataBlockEncoding.equals(expectedDataBlockEncoding)
1133        ) {
1134          // This mismatch may happen if a Scanner, which is used for say a
1135          // compaction, tries to read an encoded block from the block cache.
1136          // The reverse might happen when an EncodedScanner tries to read
1137          // un-encoded blocks which were cached earlier.
1138          //
1139          // Because returning a data block with an implicit BlockType mismatch
1140          // will cause the requesting scanner to throw a disk read should be
1141          // forced here. This will potentially cause a significant number of
1142          // cache misses, so update so we should keep track of this as it might
1143          // justify the work on a CompoundScanner.
1144          if (
1145            !expectedDataBlockEncoding.equals(DataBlockEncoding.NONE)
1146              && !actualDataBlockEncoding.equals(DataBlockEncoding.NONE)
1147          ) {
1148            // If the block is encoded but the encoding does not match the
1149            // expected encoding it is likely the encoding was changed but the
1150            // block was not yet evicted. Evictions on file close happen async
1151            // so blocks with the old encoding still linger in cache for some
1152            // period of time. This event should be rare as it only happens on
1153            // schema definition change.
1154            LOG.info(
1155              "Evicting cached block with key {} because data block encoding mismatch; "
1156                + "expected {}, actual {}, path={}",
1157              cacheKey, actualDataBlockEncoding, expectedDataBlockEncoding, path);
1158            // This is an error scenario. so here we need to release the block.
1159            returnAndEvictBlock(cache, cacheKey, cachedBlock);
1160          }
1161          return null;
1162        }
1163        return cachedBlock;
1164      }
1165    }
1166    return null;
1167  }
1168
1169  private void returnAndEvictBlock(BlockCache cache, BlockCacheKey cacheKey, Cacheable block) {
1170    block.release();
1171    cache.evictBlock(cacheKey);
1172  }
1173
1174  /**
1175   * @param cacheBlock Add block to cache, if found
1176   * @return block wrapped in a ByteBuffer, with header skipped
1177   */
1178  @Override
1179  public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException {
1180    if (trailer.getMetaIndexCount() == 0) {
1181      return null; // there are no meta blocks
1182    }
1183    if (metaBlockIndexReader == null) {
1184      throw new IOException(path + " meta index not loaded");
1185    }
1186
1187    byte[] mbname = Bytes.toBytes(metaBlockName);
1188    int block = metaBlockIndexReader.rootBlockContainingKey(mbname, 0, mbname.length);
1189    if (block == -1) {
1190      return null;
1191    }
1192    long blockSize = metaBlockIndexReader.getRootBlockDataSize(block);
1193
1194    // Per meta key from any given file, synchronize reads for said block. This
1195    // is OK to do for meta blocks because the meta block index is always
1196    // single-level.
1197    synchronized (metaBlockIndexReader.getRootBlockKey(block)) {
1198      // Check cache for block. If found return.
1199      long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
1200      BlockCacheKey cacheKey =
1201        new BlockCacheKey(name, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META);
1202
1203      cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory());
1204      HFileBlock cachedBlock =
1205        getCachedBlock(cacheKey, cacheBlock, false, true, BlockType.META, null);
1206      if (cachedBlock != null) {
1207        assert cachedBlock.isUnpacked() : "Packed block leak.";
1208        // Return a distinct 'shallow copy' of the block,
1209        // so pos does not get messed by the scanner
1210        return cachedBlock;
1211      }
1212      // Cache Miss, please load.
1213
1214      HFileBlock compressedBlock =
1215        fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false, true);
1216      HFileBlock uncompressedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
1217      if (compressedBlock != uncompressedBlock) {
1218        compressedBlock.release();
1219      }
1220
1221      // Cache the block
1222      if (cacheBlock) {
1223        cacheConf.getBlockCache().ifPresent(
1224          cache -> cache.cacheBlock(cacheKey, uncompressedBlock, cacheConf.isInMemory()));
1225      }
1226      return uncompressedBlock;
1227    }
1228  }
1229
1230  /**
1231   * Whether we use heap or not depends on our intent to cache the block. We want to avoid
1232   * allocating to off-heap if we intend to cache into the on-heap L1 cache. Otherwise, it's more
1233   * efficient to allocate to off-heap since we can control GC ourselves for those. So our decision
1234   * here breaks down as follows: <br>
1235   * If block cache is disabled, don't use heap. If we're not using the CombinedBlockCache, use heap
1236   * unless caching is disabled for the request. Otherwise, only use heap if caching is enabled and
1237   * the expected block type is not DATA (which goes to off-heap L2 in combined cache).
1238   * @see org.apache.hadoop.hbase.io.hfile.HFileBlock.FSReader#readBlockData(long, long, boolean,
1239   *      boolean, boolean)
1240   */
1241  private boolean shouldUseHeap(BlockType expectedBlockType, boolean cacheBlock) {
1242    if (!cacheConf.getBlockCache().isPresent()) {
1243      return false;
1244    }
1245
1246    // we only cache a block if cacheBlock is true and caching-on-read is enabled in CacheConfig
1247    // we can really only check for that if have an expectedBlockType
1248    if (expectedBlockType != null) {
1249      cacheBlock &= cacheConf.shouldCacheBlockOnRead(expectedBlockType.getCategory());
1250    }
1251
1252    if (!cacheConf.isCombinedBlockCache()) {
1253      // Block to cache in LruBlockCache must be an heap one, if caching enabled. So just allocate
1254      // block memory from heap for saving an extra off-heap to heap copying in that case.
1255      return cacheBlock;
1256    }
1257
1258    return cacheBlock && expectedBlockType != null && !expectedBlockType.isData();
1259  }
1260
1261  @Override
1262  public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final boolean cacheBlock,
1263    boolean pread, final boolean isCompaction, boolean updateCacheMetrics,
1264    BlockType expectedBlockType, DataBlockEncoding expectedDataBlockEncoding) throws IOException {
1265    return readBlock(dataBlockOffset, onDiskBlockSize, cacheBlock, pread, isCompaction,
1266      updateCacheMetrics, expectedBlockType, expectedDataBlockEncoding, false);
1267  }
1268
1269  @Override
1270  public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final boolean cacheBlock,
1271    boolean pread, final boolean isCompaction, boolean updateCacheMetrics,
1272    BlockType expectedBlockType, DataBlockEncoding expectedDataBlockEncoding, boolean cacheOnly)
1273    throws IOException {
1274    if (dataBlockIndexReader == null) {
1275      throw new IOException(path + " block index not loaded");
1276    }
1277    long trailerOffset = trailer.getLoadOnOpenDataOffset();
1278    if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) {
1279      throw new IOException("Requested block is out of range: " + dataBlockOffset
1280        + ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset()
1281        + ", trailer.getLoadOnOpenDataOffset: " + trailerOffset + ", path=" + path);
1282    }
1283    // For any given block from any given file, synchronize reads for said
1284    // block.
1285    // Without a cache, this synchronizing is needless overhead, but really
1286    // the other choice is to duplicate work (which the cache would prevent you
1287    // from doing).
1288
1289    BlockCacheKey cacheKey =
1290      new BlockCacheKey(path, dataBlockOffset, this.isPrimaryReplicaReader(), expectedBlockType);
1291    Attributes attributes = Attributes.of(BLOCK_CACHE_KEY_KEY, cacheKey.toString());
1292
1293    boolean useLock = false;
1294    IdLock.Entry lockEntry = null;
1295    final Span span = Span.current();
1296    try {
1297      while (true) {
1298        // Check cache for block. If found return.
1299        if (cacheConf.shouldReadBlockFromCache(expectedBlockType) && !cacheOnly) {
1300          if (useLock) {
1301            lockEntry = offsetLock.getLockEntry(dataBlockOffset);
1302          }
1303          // Try and get the block from the block cache. If the useLock variable is true then this
1304          // is the second time through the loop and it should not be counted as a block cache miss.
1305          HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, useLock, updateCacheMetrics,
1306            expectedBlockType, expectedDataBlockEncoding);
1307          if (cachedBlock != null) {
1308            if (LOG.isTraceEnabled()) {
1309              LOG.trace("Block for file {} is coming from Cache {}",
1310                Bytes.toString(cachedBlock.getHFileContext().getTableName()), cachedBlock);
1311            }
1312            span.addEvent("block cache hit", attributes);
1313            assert cachedBlock.isUnpacked() : "Packed block leak.";
1314            if (cachedBlock.getBlockType().isData()) {
1315              if (updateCacheMetrics) {
1316                HFile.DATABLOCK_READ_COUNT.increment();
1317              }
1318              // Validate encoding type for data blocks. We include encoding
1319              // type in the cache key, and we expect it to match on a cache hit.
1320              if (cachedBlock.getDataBlockEncoding() != dataBlockEncoder.getDataBlockEncoding()) {
1321                // Remember to release the block when in exceptional path.
1322                cacheConf.getBlockCache().ifPresent(cache -> {
1323                  returnAndEvictBlock(cache, cacheKey, cachedBlock);
1324                });
1325                throw new IOException("Cached block under key " + cacheKey + " "
1326                  + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: "
1327                  + dataBlockEncoder.getDataBlockEncoding() + "), path=" + path);
1328              }
1329            }
1330            // Cache-hit. Return!
1331            return cachedBlock;
1332          }
1333
1334          if (!useLock && cacheBlock && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) {
1335            // check cache again with lock
1336            useLock = true;
1337            continue;
1338          }
1339          // Carry on, please load.
1340        }
1341
1342        span.addEvent("block cache miss", attributes);
1343        // Load block from filesystem.
1344        HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread,
1345          !isCompaction, shouldUseHeap(expectedBlockType, cacheBlock));
1346        try {
1347          validateBlockType(hfileBlock, expectedBlockType);
1348        } catch (IOException e) {
1349          hfileBlock.release();
1350          throw e;
1351        }
1352        BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory();
1353        final boolean cacheCompressed = cacheConf.shouldCacheCompressed(category);
1354        final boolean cacheOnRead = cacheConf.shouldCacheBlockOnRead(category);
1355
1356        // Don't need the unpacked block back and we're storing the block in the cache compressed
1357        if (cacheOnly && cacheCompressed && cacheOnRead) {
1358          HFileBlock blockNoChecksum = BlockCacheUtil.getBlockForCaching(cacheConf, hfileBlock);
1359          cacheConf.getBlockCache().ifPresent(cache -> {
1360            LOG.debug("Skipping decompression of block {} in prefetch", cacheKey);
1361            // Cache the block if necessary
1362            if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) {
1363              cache.cacheBlock(cacheKey, blockNoChecksum, cacheConf.isInMemory(), cacheOnly);
1364            }
1365          });
1366
1367          if (updateCacheMetrics && hfileBlock.getBlockType().isData()) {
1368            HFile.DATABLOCK_READ_COUNT.increment();
1369          }
1370          return blockNoChecksum;
1371        }
1372        HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader);
1373        HFileBlock unpackedNoChecksum = BlockCacheUtil.getBlockForCaching(cacheConf, unpacked);
1374        // Cache the block if necessary
1375        cacheConf.getBlockCache().ifPresent(cache -> {
1376          if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) {
1377            // Using the wait on cache during compaction and prefetching.
1378            cache.cacheBlock(cacheKey,
1379              cacheCompressed
1380                ? BlockCacheUtil.getBlockForCaching(cacheConf, hfileBlock)
1381                : unpackedNoChecksum,
1382              cacheConf.isInMemory(), cacheOnly);
1383          }
1384        });
1385        if (unpacked != hfileBlock) {
1386          // End of life here if hfileBlock is an independent block.
1387          hfileBlock.release();
1388        }
1389        if (updateCacheMetrics && hfileBlock.getBlockType().isData()) {
1390          HFile.DATABLOCK_READ_COUNT.increment();
1391        }
1392
1393        return unpackedNoChecksum;
1394      }
1395    } finally {
1396      if (lockEntry != null) {
1397        offsetLock.releaseLockEntry(lockEntry);
1398      }
1399    }
1400  }
1401
1402  @Override
1403  public boolean hasMVCCInfo() {
1404    return fileInfo.shouldIncludeMemStoreTS() && fileInfo.isDecodeMemstoreTS();
1405  }
1406
1407  /**
1408   * Compares the actual type of a block retrieved from cache or disk with its expected type and
1409   * throws an exception in case of a mismatch. Expected block type of {@link BlockType#DATA} is
1410   * considered to match the actual block type [@link {@link BlockType#ENCODED_DATA} as well.
1411   * @param block             a block retrieved from cache or disk
1412   * @param expectedBlockType the expected block type, or null to skip the check
1413   */
1414  private void validateBlockType(HFileBlock block, BlockType expectedBlockType) throws IOException {
1415    if (expectedBlockType == null) {
1416      return;
1417    }
1418    BlockType actualBlockType = block.getBlockType();
1419    if (expectedBlockType.isData() && actualBlockType.isData()) {
1420      // We consider DATA to match ENCODED_DATA for the purpose of this
1421      // verification.
1422      return;
1423    }
1424    if (actualBlockType != expectedBlockType) {
1425      throw new IOException("Expected block type " + expectedBlockType + ", " + "but got "
1426        + actualBlockType + ": " + block + ", path=" + path);
1427    }
1428  }
1429
1430  /**
1431   * @return Last key as cell in the file. May be null if file has no entries. Note that this is not
1432   *         the last row key, but it is the Cell representation of the last key
1433   */
1434  @Override
1435  public Optional<Cell> getLastKey() {
1436    return dataBlockIndexReader.isEmpty()
1437      ? Optional.empty()
1438      : Optional.of(fileInfo.getLastKeyCell());
1439  }
1440
1441  /**
1442   * @return Midkey for this file. We work with block boundaries only so returned midkey is an
1443   *         approximation only.
1444   */
1445  @Override
1446  public Optional<Cell> midKey() throws IOException {
1447    return Optional.ofNullable(dataBlockIndexReader.midkey(this));
1448  }
1449
1450  @Override
1451  public void close() throws IOException {
1452    close(cacheConf.shouldEvictOnClose());
1453  }
1454
1455  @Override
1456  public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) {
1457    return dataBlockEncoder.getEffectiveEncodingInCache(isCompaction);
1458  }
1459
1460  /** For testing */
1461  @Override
1462  public HFileBlock.FSReader getUncachedBlockReader() {
1463    return fsBlockReader;
1464  }
1465
1466  /**
1467   * Scanner that operates on encoded data blocks.
1468   */
1469  protected static class EncodedScanner extends HFileScannerImpl {
1470    private final HFileBlockDecodingContext decodingCtx;
1471    private final DataBlockEncoder.EncodedSeeker seeker;
1472    private final DataBlockEncoder dataBlockEncoder;
1473
1474    public EncodedScanner(HFile.Reader reader, boolean cacheBlocks, boolean pread,
1475      boolean isCompaction, HFileContext meta, Configuration conf) {
1476      super(reader, cacheBlocks, pread, isCompaction);
1477      DataBlockEncoding encoding = reader.getDataBlockEncoding();
1478      dataBlockEncoder = encoding.getEncoder();
1479      decodingCtx = dataBlockEncoder.newDataBlockDecodingContext(conf, meta);
1480      seeker = dataBlockEncoder.createSeeker(decodingCtx);
1481    }
1482
1483    @Override
1484    public boolean isSeeked() {
1485      return curBlock != null;
1486    }
1487
1488    @Override
1489    public void setNonSeekedState() {
1490      reset();
1491    }
1492
1493    /**
1494     * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first
1495     * key/value pair.
1496     * @param newBlock the block to make current, and read by {@link HFileReaderImpl#readBlock},
1497     *                 it's a totally new block with new allocated {@link ByteBuff}, so if no
1498     *                 further reference to this block, we should release it carefully.
1499     */
1500    @Override
1501    protected void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException {
1502      try {
1503        // sanity checks
1504        if (newBlock.getBlockType() != BlockType.ENCODED_DATA) {
1505          throw new IllegalStateException("EncodedScanner works only on encoded data blocks");
1506        }
1507        short dataBlockEncoderId = newBlock.getDataBlockEncodingId();
1508        if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) {
1509          String encoderCls = dataBlockEncoder.getClass().getName();
1510          throw new CorruptHFileException(
1511            "Encoder " + encoderCls + " doesn't support data block encoding "
1512              + DataBlockEncoding.getNameFromId(dataBlockEncoderId) + ",path=" + reader.getPath());
1513        }
1514        updateCurrBlockRef(newBlock);
1515        ByteBuff encodedBuffer = getEncodedBuffer(newBlock);
1516        seeker.setCurrentBuffer(encodedBuffer);
1517      } finally {
1518        releaseIfNotCurBlock(newBlock);
1519      }
1520      // Reset the next indexed key
1521      this.nextIndexedKey = null;
1522    }
1523
1524    private ByteBuff getEncodedBuffer(HFileBlock newBlock) {
1525      ByteBuff origBlock = newBlock.getBufferReadOnly();
1526      int pos = newBlock.headerSize() + DataBlockEncoding.ID_SIZE;
1527      origBlock.position(pos);
1528      origBlock
1529        .limit(pos + newBlock.getUncompressedSizeWithoutHeader() - DataBlockEncoding.ID_SIZE);
1530      return origBlock.slice();
1531    }
1532
1533    @Override
1534    protected boolean processFirstDataBlock() throws IOException {
1535      seeker.rewind();
1536      return true;
1537    }
1538
1539    @Override
1540    public boolean next() throws IOException {
1541      boolean isValid = seeker.next();
1542      if (!isValid) {
1543        HFileBlock newBlock = readNextDataBlock();
1544        isValid = newBlock != null;
1545        if (isValid) {
1546          updateCurrentBlock(newBlock);
1547        } else {
1548          setNonSeekedState();
1549        }
1550      }
1551      return isValid;
1552    }
1553
1554    @Override
1555    public Cell getKey() {
1556      assertValidSeek();
1557      return seeker.getKey();
1558    }
1559
1560    @Override
1561    public ByteBuffer getValue() {
1562      assertValidSeek();
1563      return seeker.getValueShallowCopy();
1564    }
1565
1566    @Override
1567    public Cell getCell() {
1568      if (this.curBlock == null) {
1569        return null;
1570      }
1571      return seeker.getCell();
1572    }
1573
1574    private void assertValidSeek() {
1575      if (this.curBlock == null) {
1576        throw new NotSeekedException(reader.getPath());
1577      }
1578    }
1579
1580    @Override
1581    protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) {
1582      return dataBlockEncoder.getFirstKeyCellInBlock(getEncodedBuffer(curBlock));
1583    }
1584
1585    @Override
1586    protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind,
1587      Cell key, boolean seekBefore) throws IOException {
1588      if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
1589        updateCurrentBlock(seekToBlock);
1590      } else if (rewind) {
1591        seeker.rewind();
1592      }
1593      this.nextIndexedKey = nextIndexedKey;
1594      return seeker.seekToKeyInBlock(key, seekBefore);
1595    }
1596
1597    @Override
1598    public int compareKey(CellComparator comparator, Cell key) {
1599      return seeker.compareKey(comparator, key);
1600    }
1601  }
1602
1603  /**
1604   * Returns a buffer with the Bloom filter metadata. The caller takes ownership of the buffer.
1605   */
1606  @Override
1607  public DataInput getGeneralBloomFilterMetadata() throws IOException {
1608    return this.getBloomFilterMetadata(BlockType.GENERAL_BLOOM_META);
1609  }
1610
1611  @Override
1612  public DataInput getDeleteBloomFilterMetadata() throws IOException {
1613    return this.getBloomFilterMetadata(BlockType.DELETE_FAMILY_BLOOM_META);
1614  }
1615
1616  private DataInput getBloomFilterMetadata(BlockType blockType) throws IOException {
1617    if (
1618      blockType != BlockType.GENERAL_BLOOM_META && blockType != BlockType.DELETE_FAMILY_BLOOM_META
1619    ) {
1620      throw new RuntimeException(
1621        "Block Type: " + blockType.toString() + " is not supported, path=" + path);
1622    }
1623
1624    for (HFileBlock b : fileInfo.getLoadOnOpenBlocks()) {
1625      if (b.getBlockType() == blockType) {
1626        return b.getByteStream();
1627      }
1628    }
1629    return null;
1630  }
1631
1632  public boolean isFileInfoLoaded() {
1633    return true; // We load file info in constructor in version 2.
1634  }
1635
1636  @Override
1637  public HFileContext getFileContext() {
1638    return hfileContext;
1639  }
1640
1641  /**
1642   * Returns false if block prefetching was requested for this file and has not completed, true
1643   * otherwise
1644   */
1645  @Override
1646  public boolean prefetchComplete() {
1647    return PrefetchExecutor.isCompleted(path);
1648  }
1649
1650  /**
1651   * Returns true if block prefetching was started after waiting for specified delay, false
1652   * otherwise
1653   */
1654  @Override
1655  public boolean prefetchStarted() {
1656    return PrefetchExecutor.isPrefetchStarted();
1657  }
1658
1659  /**
1660   * Create a Scanner on this file. No seeks or reads are done on creation. Call
1661   * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is nothing to clean up
1662   * in a Scanner. Letting go of your references to the scanner is sufficient. NOTE: Do not use this
1663   * overload of getScanner for compactions. See
1664   * {@link #getScanner(Configuration, boolean, boolean, boolean)}
1665   * @param conf        Store configuration.
1666   * @param cacheBlocks True if we should cache blocks read in by this scanner.
1667   * @param pread       Use positional read rather than seek+read if true (pread is better for
1668   *                    random reads, seek+read is better scanning).
1669   * @return Scanner on this file.
1670   */
1671  @Override
1672  public HFileScanner getScanner(Configuration conf, boolean cacheBlocks, final boolean pread) {
1673    return getScanner(conf, cacheBlocks, pread, false);
1674  }
1675
1676  /**
1677   * Create a Scanner on this file. No seeks or reads are done on creation. Call
1678   * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is nothing to clean up
1679   * in a Scanner. Letting go of your references to the scanner is sufficient.
1680   * @param conf         Store configuration.
1681   * @param cacheBlocks  True if we should cache blocks read in by this scanner.
1682   * @param pread        Use positional read rather than seek+read if true (pread is better for
1683   *                     random reads, seek+read is better scanning).
1684   * @param isCompaction is scanner being used for a compaction?
1685   * @return Scanner on this file.
1686   */
1687  @Override
1688  public HFileScanner getScanner(Configuration conf, boolean cacheBlocks, final boolean pread,
1689    final boolean isCompaction) {
1690    if (dataBlockEncoder.useEncodedScanner()) {
1691      return new EncodedScanner(this, cacheBlocks, pread, isCompaction, this.hfileContext, conf);
1692    }
1693    return new HFileScannerImpl(this, cacheBlocks, pread, isCompaction);
1694  }
1695
1696  public int getMajorVersion() {
1697    return 3;
1698  }
1699
1700  @Override
1701  public void unbufferStream() {
1702    fsBlockReader.unbufferStream();
1703  }
1704}