001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import io.opentelemetry.api.trace.Span;
021import io.opentelemetry.context.Scope;
022import java.io.DataInput;
023import java.io.IOException;
024import java.nio.ByteBuffer;
025import java.util.ArrayList;
026import java.util.Optional;
027import org.apache.hadoop.conf.Configurable;
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.fs.Path;
030import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue;
031import org.apache.hadoop.hbase.Cell;
032import org.apache.hadoop.hbase.CellComparator;
033import org.apache.hadoop.hbase.CellUtil;
034import org.apache.hadoop.hbase.HConstants;
035import org.apache.hadoop.hbase.KeyValue;
036import org.apache.hadoop.hbase.PrivateCellUtil;
037import org.apache.hadoop.hbase.SizeCachedByteBufferKeyValue;
038import org.apache.hadoop.hbase.SizeCachedKeyValue;
039import org.apache.hadoop.hbase.SizeCachedNoTagsByteBufferKeyValue;
040import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue;
041import org.apache.hadoop.hbase.io.compress.Compression;
042import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
043import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
044import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
045import org.apache.hadoop.hbase.nio.ByteBuff;
046import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
047import org.apache.hadoop.hbase.trace.TraceUtil;
048import org.apache.hadoop.hbase.util.ByteBufferUtils;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.apache.hadoop.hbase.util.IdLock;
051import org.apache.hadoop.hbase.util.ObjectIntPair;
052import org.apache.hadoop.io.WritableUtils;
053import org.apache.yetus.audience.InterfaceAudience;
054import org.slf4j.Logger;
055import org.slf4j.LoggerFactory;
056
057/**
058 * Implementation that can handle all hfile versions of {@link HFile.Reader}.
059 */
060@InterfaceAudience.Private
061@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
062public abstract class HFileReaderImpl implements HFile.Reader, Configurable {
063  // This class is HFileReaderV3 + HFileReaderV2 + AbstractHFileReader all squashed together into
064  // one file. Ditto for all the HFileReader.ScannerV? implementations. I was running up against
065  // the MaxInlineLevel limit because too many tiers involved reading from an hfile. Was also hard
066  // to navigate the source code when so many classes participating in read.
067  private static final Logger LOG = LoggerFactory.getLogger(HFileReaderImpl.class);
068
069  /** Data block index reader keeping the root data index in memory */
070  protected HFileBlockIndex.CellBasedKeyBlockIndexReader dataBlockIndexReader;
071
072  /** Meta block index reader -- always single level */
073  protected HFileBlockIndex.ByteArrayKeyBlockIndexReader metaBlockIndexReader;
074
075  protected FixedFileTrailer trailer;
076
077  private final boolean primaryReplicaReader;
078
079  /**
080   * What kind of data block encoding should be used while reading, writing, and handling cache.
081   */
082  protected HFileDataBlockEncoder dataBlockEncoder = NoOpDataBlockEncoder.INSTANCE;
083
084  /** Block cache configuration. */
085  protected final CacheConfig cacheConf;
086
087  protected ReaderContext context;
088
089  protected final HFileInfo fileInfo;
090
091  /** Path of file */
092  protected final Path path;
093
094  /** File name to be used for block names */
095  protected final String name;
096
097  private Configuration conf;
098
099  protected HFileContext hfileContext;
100
101  /** Filesystem-level block reader. */
102  protected HFileBlock.FSReader fsBlockReader;
103
104  /**
105   * A "sparse lock" implementation allowing to lock on a particular block identified by offset. The
106   * purpose of this is to avoid two clients loading the same block, and have all but one client
107   * wait to get the block from the cache.
108   */
109  private IdLock offsetLock = new IdLock();
110
111  /** Minimum minor version supported by this HFile format */
112  static final int MIN_MINOR_VERSION = 0;
113
114  /** Maximum minor version supported by this HFile format */
115  // We went to version 2 when we moved to pb'ing fileinfo and the trailer on
116  // the file. This version can read Writables version 1.
117  static final int MAX_MINOR_VERSION = 3;
118
119  /** Minor versions starting with this number have faked index key */
120  static final int MINOR_VERSION_WITH_FAKED_KEY = 3;
121
122  /**
123   * Opens a HFile.
124   * @param context   Reader context info
125   * @param fileInfo  HFile info
126   * @param cacheConf Cache configuration.
127   * @param conf      Configuration
128   */
129  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
130  public HFileReaderImpl(ReaderContext context, HFileInfo fileInfo, CacheConfig cacheConf,
131    Configuration conf) throws IOException {
132    this.cacheConf = cacheConf;
133    this.context = context;
134    this.path = context.getFilePath();
135    this.name = path.getName();
136    this.conf = conf;
137    this.primaryReplicaReader = context.isPrimaryReplicaReader();
138    this.fileInfo = fileInfo;
139    this.trailer = fileInfo.getTrailer();
140    this.hfileContext = fileInfo.getHFileContext();
141    this.fsBlockReader =
142      new HFileBlock.FSReaderImpl(context, hfileContext, cacheConf.getByteBuffAllocator(), conf);
143    this.dataBlockEncoder = HFileDataBlockEncoderImpl.createFromFileInfo(fileInfo);
144    fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf);
145    dataBlockIndexReader = fileInfo.getDataBlockIndexReader();
146    metaBlockIndexReader = fileInfo.getMetaBlockIndexReader();
147  }
148
149  @SuppressWarnings("serial")
150  public static class BlockIndexNotLoadedException extends IllegalStateException {
151    public BlockIndexNotLoadedException(Path path) {
152      // Add a message in case anyone relies on it as opposed to class name.
153      super(path + " block index not loaded");
154    }
155  }
156
157  private Optional<String> toStringFirstKey() {
158    return getFirstKey().map(CellUtil::getCellKeyAsString);
159  }
160
161  private Optional<String> toStringLastKey() {
162    return getLastKey().map(CellUtil::getCellKeyAsString);
163  }
164
165  @Override
166  public String toString() {
167    return "reader=" + path.toString()
168      + (!isFileInfoLoaded()
169        ? ""
170        : ", compression=" + trailer.getCompressionCodec().getName() + ", cacheConf=" + cacheConf
171          + ", firstKey=" + toStringFirstKey() + ", lastKey=" + toStringLastKey())
172      + ", avgKeyLen=" + fileInfo.getAvgKeyLen() + ", avgValueLen=" + fileInfo.getAvgValueLen()
173      + ", entries=" + trailer.getEntryCount() + ", length=" + context.getFileSize();
174  }
175
176  @Override
177  public long length() {
178    return context.getFileSize();
179  }
180
181  /**
182   * @return the first key in the file. May be null if file has no entries. Note that this is not
183   *         the first row key, but rather the byte form of the first KeyValue.
184   */
185  @Override
186  public Optional<Cell> getFirstKey() {
187    if (dataBlockIndexReader == null) {
188      throw new BlockIndexNotLoadedException(path);
189    }
190    return dataBlockIndexReader.isEmpty()
191      ? Optional.empty()
192      : Optional.of(dataBlockIndexReader.getRootBlockKey(0));
193  }
194
195  /**
196   * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's patch goes in to
197   * eliminate {@link KeyValue} here.
198   * @return the first row key, or null if the file is empty.
199   */
200  @Override
201  public Optional<byte[]> getFirstRowKey() {
202    // We have to copy the row part to form the row key alone
203    return getFirstKey().map(CellUtil::cloneRow);
204  }
205
206  /**
207   * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's patch goes in to
208   * eliminate {@link KeyValue} here.
209   * @return the last row key, or null if the file is empty.
210   */
211  @Override
212  public Optional<byte[]> getLastRowKey() {
213    // We have to copy the row part to form the row key alone
214    return getLastKey().map(CellUtil::cloneRow);
215  }
216
217  /** @return number of KV entries in this HFile */
218  @Override
219  public long getEntries() {
220    return trailer.getEntryCount();
221  }
222
223  /** @return comparator */
224  @Override
225  public CellComparator getComparator() {
226    return this.hfileContext.getCellComparator();
227  }
228
229  public Compression.Algorithm getCompressionAlgorithm() {
230    return trailer.getCompressionCodec();
231  }
232
233  /**
234   * @return the total heap size of data and meta block indexes in bytes. Does not take into account
235   *         non-root blocks of a multilevel data index.
236   */
237  @Override
238  public long indexSize() {
239    return (dataBlockIndexReader != null ? dataBlockIndexReader.heapSize() : 0)
240      + ((metaBlockIndexReader != null) ? metaBlockIndexReader.heapSize() : 0);
241  }
242
243  @Override
244  public String getName() {
245    return name;
246  }
247
248  @Override
249  public void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder) {
250    this.dataBlockEncoder = dataBlockEncoder;
251    this.fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf);
252  }
253
254  @Override
255  public void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader) {
256    this.dataBlockIndexReader = reader;
257  }
258
259  @Override
260  public HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader() {
261    return dataBlockIndexReader;
262  }
263
264  @Override
265  public void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader) {
266    this.metaBlockIndexReader = reader;
267  }
268
269  @Override
270  public HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader() {
271    return metaBlockIndexReader;
272  }
273
274  @Override
275  public FixedFileTrailer getTrailer() {
276    return trailer;
277  }
278
279  @Override
280  public ReaderContext getContext() {
281    return this.context;
282  }
283
284  @Override
285  public HFileInfo getHFileInfo() {
286    return this.fileInfo;
287  }
288
289  @Override
290  public boolean isPrimaryReplicaReader() {
291    return primaryReplicaReader;
292  }
293
294  /**
295   * An exception thrown when an operation requiring a scanner to be seeked is invoked on a scanner
296   * that is not seeked.
297   */
298  @SuppressWarnings("serial")
299  public static class NotSeekedException extends IllegalStateException {
300    public NotSeekedException(Path path) {
301      super(path + " not seeked to a key/value");
302    }
303  }
304
305  protected static class HFileScannerImpl implements HFileScanner {
306    private ByteBuff blockBuffer;
307    protected final boolean cacheBlocks;
308    protected final boolean pread;
309    protected final boolean isCompaction;
310    private int currKeyLen;
311    private int currValueLen;
312    private int currMemstoreTSLen;
313    private long currMemstoreTS;
314    protected final HFile.Reader reader;
315    private int currTagsLen;
316    private short rowLen;
317    // buffer backed keyonlyKV
318    private ByteBufferKeyOnlyKeyValue bufBackedKeyOnlyKv = new ByteBufferKeyOnlyKeyValue();
319    // A pair for reusing in blockSeek() so that we don't garbage lot of objects
320    final ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>();
321
322    /**
323     * The next indexed key is to keep track of the indexed key of the next data block. If the
324     * nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the current data block is the
325     * last data block. If the nextIndexedKey is null, it means the nextIndexedKey has not been
326     * loaded yet.
327     */
328    protected Cell nextIndexedKey;
329    // Current block being used. NOTICE: DON't release curBlock separately except in shipped() or
330    // close() methods. Because the shipped() or close() will do the release finally, even if any
331    // exception occur the curBlock will be released by the close() method (see
332    // RegionScannerImpl#handleException). Call the releaseIfNotCurBlock() to release the
333    // unreferenced block please.
334    protected HFileBlock curBlock;
335    // Previous blocks that were used in the course of the read
336    protected final ArrayList<HFileBlock> prevBlocks = new ArrayList<>();
337
338    public HFileScannerImpl(final HFile.Reader reader, final boolean cacheBlocks,
339      final boolean pread, final boolean isCompaction) {
340      this.reader = reader;
341      this.cacheBlocks = cacheBlocks;
342      this.pread = pread;
343      this.isCompaction = isCompaction;
344    }
345
346    void updateCurrBlockRef(HFileBlock block) {
347      if (block != null && curBlock != null && block.getOffset() == curBlock.getOffset()) {
348        return;
349      }
350      if (this.curBlock != null && this.curBlock.isSharedMem()) {
351        prevBlocks.add(this.curBlock);
352      }
353      this.curBlock = block;
354    }
355
356    void reset() {
357      // We don't have to keep ref to heap block
358      if (this.curBlock != null && this.curBlock.isSharedMem()) {
359        this.prevBlocks.add(this.curBlock);
360      }
361      this.curBlock = null;
362    }
363
364    private void returnBlocks(boolean returnAll) {
365      this.prevBlocks.forEach(HFileBlock::release);
366      this.prevBlocks.clear();
367      if (returnAll && this.curBlock != null) {
368        this.curBlock.release();
369        this.curBlock = null;
370      }
371    }
372
373    @Override
374    public boolean isSeeked() {
375      return blockBuffer != null;
376    }
377
378    @Override
379    public String toString() {
380      return "HFileScanner for reader " + String.valueOf(getReader());
381    }
382
383    protected void assertSeeked() {
384      if (!isSeeked()) {
385        throw new NotSeekedException(reader.getPath());
386      }
387    }
388
389    @Override
390    public HFile.Reader getReader() {
391      return reader;
392    }
393
394    // From non encoded HFiles, we always read back KeyValue or its descendant.(Note: When HFile
395    // block is in DBB, it will be OffheapKV). So all parts of the Cell is in a contiguous
396    // array/buffer. How many bytes we should wrap to make the KV is what this method returns.
397    private int getKVBufSize() {
398      int kvBufSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen;
399      if (currTagsLen > 0) {
400        kvBufSize += Bytes.SIZEOF_SHORT + currTagsLen;
401      }
402      return kvBufSize;
403    }
404
405    @Override
406    public void close() {
407      if (!pread) {
408        // For seek + pread stream socket should be closed when the scanner is closed. HBASE-9393
409        reader.unbufferStream();
410      }
411      this.returnBlocks(true);
412    }
413
414    // Returns the #bytes in HFile for the current cell. Used to skip these many bytes in current
415    // HFile block's buffer so as to position to the next cell.
416    private int getCurCellSerializedSize() {
417      int curCellSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen + currMemstoreTSLen;
418      if (this.reader.getFileContext().isIncludesTags()) {
419        curCellSize += Bytes.SIZEOF_SHORT + currTagsLen;
420      }
421      return curCellSize;
422    }
423
424    protected void readKeyValueLen() {
425      // This is a hot method. We go out of our way to make this method short so it can be
426      // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves
427      // because it is faster than going via range-checked ByteBuffer methods or going through a
428      // byte buffer array a byte at a time.
429      // Get a long at a time rather than read two individual ints. In micro-benchmarking, even
430      // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints.
431      // Trying to imitate what was done - need to profile if this is better or
432      // earlier way is better by doing mark and reset?
433      // But ensure that you read long instead of two ints
434      long ll = blockBuffer.getLongAfterPosition(0);
435      // Read top half as an int of key length and bottom int as value length
436      this.currKeyLen = (int) (ll >> Integer.SIZE);
437      this.currValueLen = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
438      checkKeyValueLen();
439      this.rowLen = blockBuffer.getShortAfterPosition(Bytes.SIZEOF_LONG);
440      // Move position past the key and value lengths and then beyond the key and value
441      int p = (Bytes.SIZEOF_LONG + currKeyLen + currValueLen);
442      if (reader.getFileContext().isIncludesTags()) {
443        // Tags length is a short.
444        this.currTagsLen = blockBuffer.getShortAfterPosition(p);
445        checkTagsLen();
446        p += (Bytes.SIZEOF_SHORT + currTagsLen);
447      }
448      readMvccVersion(p);
449    }
450
451    private final void checkTagsLen() {
452      if (checkLen(this.currTagsLen)) {
453        throw new IllegalStateException(
454          "Invalid currTagsLen " + this.currTagsLen + ". Block offset: " + curBlock.getOffset()
455            + ", block length: " + this.blockBuffer.limit() + ", position: "
456            + this.blockBuffer.position() + " (without header)." + " path=" + reader.getPath());
457      }
458    }
459
460    /**
461     * Read mvcc. Does checks to see if we even need to read the mvcc at all.
462     */
463    protected void readMvccVersion(final int offsetFromPos) {
464      // See if we even need to decode mvcc.
465      if (!this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
466        return;
467      }
468      if (!this.reader.getHFileInfo().isDecodeMemstoreTS()) {
469        currMemstoreTS = 0;
470        currMemstoreTSLen = 1;
471        return;
472      }
473      _readMvccVersion(offsetFromPos);
474    }
475
476    /**
477     * Actually do the mvcc read. Does no checks.
478     */
479    private void _readMvccVersion(int offsetFromPos) {
480      // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e.
481      // previous if one-byte vint, we'd redo the vint call to find int size.
482      // Also the method is kept small so can be inlined.
483      byte firstByte = blockBuffer.getByteAfterPosition(offsetFromPos);
484      int len = WritableUtils.decodeVIntSize(firstByte);
485      if (len == 1) {
486        this.currMemstoreTS = firstByte;
487      } else {
488        int remaining = len - 1;
489        long i = 0;
490        offsetFromPos++;
491        if (remaining >= Bytes.SIZEOF_INT) {
492          // The int read has to be converted to unsigned long so the & op
493          i = (blockBuffer.getIntAfterPosition(offsetFromPos) & 0x00000000ffffffffL);
494          remaining -= Bytes.SIZEOF_INT;
495          offsetFromPos += Bytes.SIZEOF_INT;
496        }
497        if (remaining >= Bytes.SIZEOF_SHORT) {
498          short s = blockBuffer.getShortAfterPosition(offsetFromPos);
499          i = i << 16;
500          i = i | (s & 0xFFFF);
501          remaining -= Bytes.SIZEOF_SHORT;
502          offsetFromPos += Bytes.SIZEOF_SHORT;
503        }
504        for (int idx = 0; idx < remaining; idx++) {
505          byte b = blockBuffer.getByteAfterPosition(offsetFromPos + idx);
506          i = i << 8;
507          i = i | (b & 0xFF);
508        }
509        currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
510      }
511      this.currMemstoreTSLen = len;
512    }
513
514    /**
515     * Within a loaded block, seek looking for the last key that is smaller than (or equal to?) the
516     * key we are interested in. A note on the seekBefore: if you have seekBefore = true, AND the
517     * first key in the block = key, then you'll get thrown exceptions. The caller has to check for
518     * that case and load the previous block as appropriate. n * the key to find n * find the key
519     * before the given key in case of exact match.
520     * @return 0 in case of an exact key match, 1 in case of an inexact match, -2 in case of an
521     *         inexact match and furthermore, the input key less than the first key of current
522     *         block(e.g. using a faked index key)
523     */
524    protected int blockSeek(Cell key, boolean seekBefore) {
525      int klen, vlen, tlen = 0;
526      int lastKeyValueSize = -1;
527      int offsetFromPos;
528      do {
529        offsetFromPos = 0;
530        // Better to ensure that we use the BB Utils here
531        long ll = blockBuffer.getLongAfterPosition(offsetFromPos);
532        klen = (int) (ll >> Integer.SIZE);
533        vlen = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
534        if (checkKeyLen(klen) || checkLen(vlen)) {
535          throw new IllegalStateException(
536            "Invalid klen " + klen + " or vlen " + vlen + ". Block offset: " + curBlock.getOffset()
537              + ", block length: " + blockBuffer.limit() + ", position: " + blockBuffer.position()
538              + " (without header)." + " path=" + reader.getPath());
539        }
540        offsetFromPos += Bytes.SIZEOF_LONG;
541        this.rowLen = blockBuffer.getShortAfterPosition(offsetFromPos);
542        blockBuffer.asSubByteBuffer(blockBuffer.position() + offsetFromPos, klen, pair);
543        bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), klen, rowLen);
544        int comp =
545          PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, bufBackedKeyOnlyKv);
546        offsetFromPos += klen + vlen;
547        if (this.reader.getFileContext().isIncludesTags()) {
548          // Read short as unsigned, high byte first
549          tlen = ((blockBuffer.getByteAfterPosition(offsetFromPos) & 0xff) << 8)
550            ^ (blockBuffer.getByteAfterPosition(offsetFromPos + 1) & 0xff);
551          if (checkLen(tlen)) {
552            throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: "
553              + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
554              + blockBuffer.position() + " (without header)." + " path=" + reader.getPath());
555          }
556          // add the two bytes read for the tags.
557          offsetFromPos += tlen + (Bytes.SIZEOF_SHORT);
558        }
559        if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
560          // Directly read the mvcc based on current position
561          readMvccVersion(offsetFromPos);
562        }
563        if (comp == 0) {
564          if (seekBefore) {
565            if (lastKeyValueSize < 0) {
566              throw new IllegalStateException("blockSeek with seekBefore "
567                + "at the first key of the block: key=" + CellUtil.getCellKeyAsString(key)
568                + ", blockOffset=" + curBlock.getOffset() + ", onDiskSize="
569                + curBlock.getOnDiskSizeWithHeader() + ", path=" + reader.getPath());
570            }
571            blockBuffer.moveBack(lastKeyValueSize);
572            readKeyValueLen();
573            return 1; // non exact match.
574          }
575          currKeyLen = klen;
576          currValueLen = vlen;
577          currTagsLen = tlen;
578          return 0; // indicate exact match
579        } else if (comp < 0) {
580          if (lastKeyValueSize > 0) {
581            blockBuffer.moveBack(lastKeyValueSize);
582          }
583          readKeyValueLen();
584          if (lastKeyValueSize == -1 && blockBuffer.position() == 0) {
585            return HConstants.INDEX_KEY_MAGIC;
586          }
587          return 1;
588        }
589        // The size of this key/value tuple, including key/value length fields.
590        lastKeyValueSize = klen + vlen + currMemstoreTSLen + KEY_VALUE_LEN_SIZE;
591        // include tag length also if tags included with KV
592        if (reader.getFileContext().isIncludesTags()) {
593          lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT;
594        }
595        blockBuffer.skip(lastKeyValueSize);
596      } while (blockBuffer.hasRemaining());
597
598      // Seek to the last key we successfully read. This will happen if this is
599      // the last key/value pair in the file, in which case the following call
600      // to next() has to return false.
601      blockBuffer.moveBack(lastKeyValueSize);
602      readKeyValueLen();
603      return 1; // didn't exactly find it.
604    }
605
606    @Override
607    public Cell getNextIndexedKey() {
608      return nextIndexedKey;
609    }
610
611    @Override
612    public int seekTo(Cell key) throws IOException {
613      return seekTo(key, true);
614    }
615
616    @Override
617    public int reseekTo(Cell key) throws IOException {
618      int compared;
619      if (isSeeked()) {
620        compared = compareKey(reader.getComparator(), key);
621        if (compared < 1) {
622          // If the required key is less than or equal to current key, then
623          // don't do anything.
624          return compared;
625        } else {
626          // The comparison with no_next_index_key has to be checked
627          if (
628            this.nextIndexedKey != null && (this.nextIndexedKey
629                == KeyValueScanner.NO_NEXT_INDEXED_KEY
630              || PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, nextIndexedKey)
631                  < 0)
632          ) {
633            // The reader shall continue to scan the current data block instead
634            // of querying the
635            // block index as long as it knows the target key is strictly
636            // smaller than
637            // the next indexed key or the current data block is the last data
638            // block.
639            return loadBlockAndSeekToKey(this.curBlock, nextIndexedKey, false, key, false);
640          }
641        }
642      }
643      // Don't rewind on a reseek operation, because reseek implies that we are
644      // always going forward in the file.
645      return seekTo(key, false);
646    }
647
648    /**
649     * An internal API function. Seek to the given key, optionally rewinding to the first key of the
650     * block before doing the seek.
651     * @param key    - a cell representing the key that we need to fetch
652     * @param rewind whether to rewind to the first key of the block before doing the seek. If this
653     *               is false, we are assuming we never go back, otherwise the result is undefined.
654     * @return -1 if the key is earlier than the first key of the file, 0 if we are at the given
655     *         key, 1 if we are past the given key -2 if the key is earlier than the first key of
656     *         the file while using a faked index key
657     */
658    public int seekTo(Cell key, boolean rewind) throws IOException {
659      HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader();
660      BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, curBlock,
661        cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding(), reader);
662      if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) {
663        // This happens if the key e.g. falls before the beginning of the file.
664        return -1;
665      }
666      return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(),
667        blockWithScanInfo.getNextIndexedKey(), rewind, key, false);
668    }
669
670    @Override
671    public boolean seekBefore(Cell key) throws IOException {
672      HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, curBlock,
673        cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction), reader);
674      if (seekToBlock == null) {
675        return false;
676      }
677      Cell firstKey = getFirstKeyCellInBlock(seekToBlock);
678      if (PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), firstKey, key) >= 0) {
679        long previousBlockOffset = seekToBlock.getPrevBlockOffset();
680        // The key we are interested in
681        if (previousBlockOffset == -1) {
682          // we have a 'problem', the key we want is the first of the file.
683          releaseIfNotCurBlock(seekToBlock);
684          return false;
685        }
686
687        // The first key in the current block 'seekToBlock' is greater than the given
688        // seekBefore key. We will go ahead by reading the next block that satisfies the
689        // given key. Return the current block before reading the next one.
690        releaseIfNotCurBlock(seekToBlock);
691        // It is important that we compute and pass onDiskSize to the block
692        // reader so that it does not have to read the header separately to
693        // figure out the size. Currently, we do not have a way to do this
694        // correctly in the general case however.
695        // TODO: See https://issues.apache.org/jira/browse/HBASE-14576
696        int prevBlockSize = -1;
697        seekToBlock = reader.readBlock(previousBlockOffset, prevBlockSize, cacheBlocks, pread,
698          isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
699        // TODO shortcut: seek forward in this block to the last key of the
700        // block.
701      }
702      loadBlockAndSeekToKey(seekToBlock, firstKey, true, key, true);
703      return true;
704    }
705
706    /**
707     * The curBlock will be released by shipping or close method, so only need to consider releasing
708     * the block, which was read from HFile before and not referenced by curBlock.
709     */
710    protected void releaseIfNotCurBlock(HFileBlock block) {
711      if (curBlock != block) {
712        block.release();
713      }
714    }
715
716    /**
717     * Scans blocks in the "scanned" section of the {@link HFile} until the next data block is
718     * found.
719     * @return the next block, or null if there are no more data blocks
720     */
721    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH",
722        justification = "Yeah, unnecessary null check; could do w/ clean up")
723    protected HFileBlock readNextDataBlock() throws IOException {
724      long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
725      if (curBlock == null) {
726        return null;
727      }
728      HFileBlock block = this.curBlock;
729      do {
730        if (block.getOffset() >= lastDataBlockOffset) {
731          releaseIfNotCurBlock(block);
732          return null;
733        }
734        if (block.getOffset() < 0) {
735          releaseIfNotCurBlock(block);
736          throw new IOException("Invalid block offset: " + block + ", path=" + reader.getPath());
737        }
738        // We are reading the next block without block type validation, because
739        // it might turn out to be a non-data block.
740        block = reader.readBlock(block.getOffset() + block.getOnDiskSizeWithHeader(),
741          block.getNextBlockOnDiskSize(), cacheBlocks, pread, isCompaction, true, null,
742          getEffectiveDataBlockEncoding());
743        if (block != null && !block.getBlockType().isData()) {
744          // Whatever block we read we will be returning it unless
745          // it is a datablock. Just in case the blocks are non data blocks
746          block.release();
747        }
748      } while (!block.getBlockType().isData());
749      return block;
750    }
751
752    public DataBlockEncoding getEffectiveDataBlockEncoding() {
753      return this.reader.getEffectiveEncodingInCache(isCompaction);
754    }
755
756    @Override
757    public Cell getCell() {
758      if (!isSeeked()) {
759        return null;
760      }
761
762      Cell ret;
763      int cellBufSize = getKVBufSize();
764      long seqId = 0L;
765      if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
766        seqId = currMemstoreTS;
767      }
768      if (blockBuffer.hasArray()) {
769        // TODO : reduce the varieties of KV here. Check if based on a boolean
770        // we can handle the 'no tags' case.
771        if (currTagsLen > 0) {
772          ret = new SizeCachedKeyValue(blockBuffer.array(),
773            blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen,
774            rowLen);
775        } else {
776          ret = new SizeCachedNoTagsKeyValue(blockBuffer.array(),
777            blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen,
778            rowLen);
779        }
780      } else {
781        ByteBuffer buf = blockBuffer.asSubByteBuffer(cellBufSize);
782        if (buf.isDirect()) {
783          ret = currTagsLen > 0
784            ? new SizeCachedByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId, currKeyLen,
785              rowLen)
786            : new SizeCachedNoTagsByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId,
787              currKeyLen, rowLen);
788        } else {
789          if (currTagsLen > 0) {
790            ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + buf.position(),
791              cellBufSize, seqId, currKeyLen, rowLen);
792          } else {
793            ret = new SizeCachedNoTagsKeyValue(buf.array(), buf.arrayOffset() + buf.position(),
794              cellBufSize, seqId, currKeyLen, rowLen);
795          }
796        }
797      }
798      return ret;
799    }
800
801    @Override
802    public Cell getKey() {
803      assertSeeked();
804      // Create a new object so that this getKey is cached as firstKey, lastKey
805      ObjectIntPair<ByteBuffer> keyPair = new ObjectIntPair<>();
806      blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, keyPair);
807      ByteBuffer keyBuf = keyPair.getFirst();
808      if (keyBuf.hasArray()) {
809        return new KeyValue.KeyOnlyKeyValue(keyBuf.array(),
810          keyBuf.arrayOffset() + keyPair.getSecond(), currKeyLen);
811      } else {
812        // Better to do a copy here instead of holding on to this BB so that
813        // we could release the blocks referring to this key. This key is specifically used
814        // in HalfStoreFileReader to get the firstkey and lastkey by creating a new scanner
815        // every time. So holding onto the BB (incase of DBB) is not advised here.
816        byte[] key = new byte[currKeyLen];
817        ByteBufferUtils.copyFromBufferToArray(key, keyBuf, keyPair.getSecond(), 0, currKeyLen);
818        return new KeyValue.KeyOnlyKeyValue(key, 0, currKeyLen);
819      }
820    }
821
822    @Override
823    public ByteBuffer getValue() {
824      assertSeeked();
825      // Okie to create new Pair. Not used in hot path
826      ObjectIntPair<ByteBuffer> valuePair = new ObjectIntPair<>();
827      this.blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen,
828        currValueLen, valuePair);
829      ByteBuffer valBuf = valuePair.getFirst().duplicate();
830      valBuf.position(valuePair.getSecond());
831      valBuf.limit(currValueLen + valuePair.getSecond());
832      return valBuf.slice();
833    }
834
835    protected void setNonSeekedState() {
836      reset();
837      blockBuffer = null;
838      currKeyLen = 0;
839      currValueLen = 0;
840      currMemstoreTS = 0;
841      currMemstoreTSLen = 0;
842      currTagsLen = 0;
843    }
844
845    /**
846     * Set the position on current backing blockBuffer.
847     */
848    private void positionThisBlockBuffer() {
849      try {
850        blockBuffer.skip(getCurCellSerializedSize());
851      } catch (IllegalArgumentException e) {
852        LOG.error("Current pos = " + blockBuffer.position() + "; currKeyLen = " + currKeyLen
853          + "; currValLen = " + currValueLen + "; block limit = " + blockBuffer.limit()
854          + "; currBlock currBlockOffset = " + this.curBlock.getOffset() + "; path="
855          + reader.getPath());
856        throw e;
857      }
858    }
859
860    /**
861     * Set our selves up for the next 'next' invocation, set up next block.
862     * @return True is more to read else false if at the end.
863     */
864    private boolean positionForNextBlock() throws IOException {
865      // Methods are small so they get inlined because they are 'hot'.
866      long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
867      if (this.curBlock.getOffset() >= lastDataBlockOffset) {
868        setNonSeekedState();
869        return false;
870      }
871      return isNextBlock();
872    }
873
874    private boolean isNextBlock() throws IOException {
875      // Methods are small so they get inlined because they are 'hot'.
876      HFileBlock nextBlock = readNextDataBlock();
877      if (nextBlock == null) {
878        setNonSeekedState();
879        return false;
880      }
881      updateCurrentBlock(nextBlock);
882      return true;
883    }
884
885    private final boolean _next() throws IOException {
886      // Small method so can be inlined. It is a hot one.
887      if (blockBuffer.remaining() <= 0) {
888        return positionForNextBlock();
889      }
890
891      // We are still in the same block.
892      readKeyValueLen();
893      return true;
894    }
895
896    /**
897     * Go to the next key/value in the block section. Loads the next block if necessary. If
898     * successful, {@link #getKey()} and {@link #getValue()} can be called.
899     * @return true if successfully navigated to the next key/value
900     */
901    @Override
902    public boolean next() throws IOException {
903      // This is a hot method so extreme measures taken to ensure it is small and inlineable.
904      // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation
905      assertSeeked();
906      positionThisBlockBuffer();
907      return _next();
908    }
909
910    /**
911     * Positions this scanner at the start of the file.
912     * @return false if empty file; i.e. a call to next would return false and the current key and
913     *         value are undefined.
914     */
915    @Override
916    public boolean seekTo() throws IOException {
917      if (reader == null) {
918        return false;
919      }
920
921      if (reader.getTrailer().getEntryCount() == 0) {
922        // No data blocks.
923        return false;
924      }
925
926      long firstDataBlockOffset = reader.getTrailer().getFirstDataBlockOffset();
927      if (curBlock != null && curBlock.getOffset() == firstDataBlockOffset) {
928        return processFirstDataBlock();
929      }
930
931      readAndUpdateNewBlock(firstDataBlockOffset);
932      return true;
933    }
934
935    protected boolean processFirstDataBlock() throws IOException {
936      blockBuffer.rewind();
937      readKeyValueLen();
938      return true;
939    }
940
941    protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException {
942      HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
943        isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
944      if (newBlock.getOffset() < 0) {
945        releaseIfNotCurBlock(newBlock);
946        throw new IOException(
947          "Invalid offset=" + newBlock.getOffset() + ", path=" + reader.getPath());
948      }
949      updateCurrentBlock(newBlock);
950    }
951
952    protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind,
953      Cell key, boolean seekBefore) throws IOException {
954      if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
955        updateCurrentBlock(seekToBlock);
956      } else if (rewind) {
957        blockBuffer.rewind();
958      }
959      // Update the nextIndexedKey
960      this.nextIndexedKey = nextIndexedKey;
961      return blockSeek(key, seekBefore);
962    }
963
964    /**
965     * @return True if v &lt;= 0 or v &gt; current block buffer limit.
966     */
967    protected final boolean checkKeyLen(final int v) {
968      return v <= 0 || v > this.blockBuffer.limit();
969    }
970
971    /**
972     * @return True if v &lt; 0 or v &gt; current block buffer limit.
973     */
974    protected final boolean checkLen(final int v) {
975      return v < 0 || v > this.blockBuffer.limit();
976    }
977
978    /**
979     * Check key and value lengths are wholesome.
980     */
981    protected final void checkKeyValueLen() {
982      if (checkKeyLen(this.currKeyLen) || checkLen(this.currValueLen)) {
983        throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen
984          + " or currValueLen " + this.currValueLen + ". Block offset: " + this.curBlock.getOffset()
985          + ", block length: " + this.blockBuffer.limit() + ", position: "
986          + this.blockBuffer.position() + " (without header)." + ", path=" + reader.getPath());
987      }
988    }
989
990    /**
991     * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first
992     * key/value pair.
993     * @param newBlock the block read by {@link HFileReaderImpl#readBlock}, it's a totally new block
994     *                 with new allocated {@link ByteBuff}, so if no further reference to this
995     *                 block, we should release it carefully.
996     */
997    protected void updateCurrentBlock(HFileBlock newBlock) throws IOException {
998      try {
999        if (newBlock.getBlockType() != BlockType.DATA) {
1000          throw new IllegalStateException(
1001            "ScannerV2 works only on data blocks, got " + newBlock.getBlockType() + "; "
1002              + "HFileName=" + reader.getPath() + ", " + "dataBlockEncoder="
1003              + reader.getDataBlockEncoding() + ", " + "isCompaction=" + isCompaction);
1004        }
1005        updateCurrBlockRef(newBlock);
1006        blockBuffer = newBlock.getBufferWithoutHeader();
1007        readKeyValueLen();
1008      } finally {
1009        releaseIfNotCurBlock(newBlock);
1010      }
1011      // Reset the next indexed key
1012      this.nextIndexedKey = null;
1013    }
1014
1015    protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) {
1016      ByteBuff buffer = curBlock.getBufferWithoutHeader();
1017      // It is safe to manipulate this buffer because we own the buffer object.
1018      buffer.rewind();
1019      int klen = buffer.getInt();
1020      buffer.skip(Bytes.SIZEOF_INT);// Skip value len part
1021      ByteBuffer keyBuff = buffer.asSubByteBuffer(klen);
1022      if (keyBuff.hasArray()) {
1023        return new KeyValue.KeyOnlyKeyValue(keyBuff.array(),
1024          keyBuff.arrayOffset() + keyBuff.position(), klen);
1025      } else {
1026        return new ByteBufferKeyOnlyKeyValue(keyBuff, keyBuff.position(), klen);
1027      }
1028    }
1029
1030    @Override
1031    public String getKeyString() {
1032      return CellUtil.toString(getKey(), false);
1033    }
1034
1035    @Override
1036    public String getValueString() {
1037      return ByteBufferUtils.toStringBinary(getValue());
1038    }
1039
1040    public int compareKey(CellComparator comparator, Cell key) {
1041      blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, pair);
1042      this.bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), currKeyLen, rowLen);
1043      return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, this.bufBackedKeyOnlyKv);
1044    }
1045
1046    @Override
1047    public void shipped() throws IOException {
1048      this.returnBlocks(false);
1049    }
1050  }
1051
1052  @Override
1053  public Path getPath() {
1054    return path;
1055  }
1056
1057  @Override
1058  public DataBlockEncoding getDataBlockEncoding() {
1059    return dataBlockEncoder.getDataBlockEncoding();
1060  }
1061
1062  @Override
1063  public Configuration getConf() {
1064    return conf;
1065  }
1066
1067  @Override
1068  public void setConf(Configuration conf) {
1069    this.conf = conf;
1070  }
1071
1072  /** Minor versions in HFile starting with this number have hbase checksums */
1073  public static final int MINOR_VERSION_WITH_CHECKSUM = 1;
1074  /** In HFile minor version that does not support checksums */
1075  public static final int MINOR_VERSION_NO_CHECKSUM = 0;
1076
1077  /** HFile minor version that introduced pbuf filetrailer */
1078  public static final int PBUF_TRAILER_MINOR_VERSION = 2;
1079
1080  /**
1081   * The size of a (key length, value length) tuple that prefixes each entry in a data block.
1082   */
1083  public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT;
1084
1085  /**
1086   * Retrieve block from cache. Validates the retrieved block's type vs {@code expectedBlockType}
1087   * and its encoding vs. {@code expectedDataBlockEncoding}. Unpacks the block as necessary.
1088   */
1089  private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock,
1090    boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType,
1091    DataBlockEncoding expectedDataBlockEncoding) throws IOException {
1092    // Check cache for block. If found return.
1093    BlockCache cache = cacheConf.getBlockCache().orElse(null);
1094    if (cache != null) {
1095      HFileBlock cachedBlock = (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock,
1096        updateCacheMetrics, expectedBlockType);
1097      if (cachedBlock != null) {
1098        if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) {
1099          HFileBlock compressedBlock = cachedBlock;
1100          cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
1101          // In case of compressed block after unpacking we can release the compressed block
1102          if (compressedBlock != cachedBlock) {
1103            compressedBlock.release();
1104          }
1105        }
1106        try {
1107          validateBlockType(cachedBlock, expectedBlockType);
1108        } catch (IOException e) {
1109          returnAndEvictBlock(cache, cacheKey, cachedBlock);
1110          throw e;
1111        }
1112
1113        if (expectedDataBlockEncoding == null) {
1114          return cachedBlock;
1115        }
1116        DataBlockEncoding actualDataBlockEncoding = cachedBlock.getDataBlockEncoding();
1117        // Block types other than data blocks always have
1118        // DataBlockEncoding.NONE. To avoid false negative cache misses, only
1119        // perform this check if cached block is a data block.
1120        if (
1121          cachedBlock.getBlockType().isData()
1122            && !actualDataBlockEncoding.equals(expectedDataBlockEncoding)
1123        ) {
1124          // This mismatch may happen if a Scanner, which is used for say a
1125          // compaction, tries to read an encoded block from the block cache.
1126          // The reverse might happen when an EncodedScanner tries to read
1127          // un-encoded blocks which were cached earlier.
1128          //
1129          // Because returning a data block with an implicit BlockType mismatch
1130          // will cause the requesting scanner to throw a disk read should be
1131          // forced here. This will potentially cause a significant number of
1132          // cache misses, so update so we should keep track of this as it might
1133          // justify the work on a CompoundScanner.
1134          if (
1135            !expectedDataBlockEncoding.equals(DataBlockEncoding.NONE)
1136              && !actualDataBlockEncoding.equals(DataBlockEncoding.NONE)
1137          ) {
1138            // If the block is encoded but the encoding does not match the
1139            // expected encoding it is likely the encoding was changed but the
1140            // block was not yet evicted. Evictions on file close happen async
1141            // so blocks with the old encoding still linger in cache for some
1142            // period of time. This event should be rare as it only happens on
1143            // schema definition change.
1144            LOG.info(
1145              "Evicting cached block with key {} because data block encoding mismatch; "
1146                + "expected {}, actual {}, path={}",
1147              cacheKey, actualDataBlockEncoding, expectedDataBlockEncoding, path);
1148            // This is an error scenario. so here we need to release the block.
1149            returnAndEvictBlock(cache, cacheKey, cachedBlock);
1150          }
1151          return null;
1152        }
1153        return cachedBlock;
1154      }
1155    }
1156    return null;
1157  }
1158
1159  private void returnAndEvictBlock(BlockCache cache, BlockCacheKey cacheKey, Cacheable block) {
1160    block.release();
1161    cache.evictBlock(cacheKey);
1162  }
1163
1164  /**
1165   * @param cacheBlock Add block to cache, if found
1166   * @return block wrapped in a ByteBuffer, with header skipped
1167   */
1168  @Override
1169  public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException {
1170    if (trailer.getMetaIndexCount() == 0) {
1171      return null; // there are no meta blocks
1172    }
1173    if (metaBlockIndexReader == null) {
1174      throw new IOException(path + " meta index not loaded");
1175    }
1176
1177    byte[] mbname = Bytes.toBytes(metaBlockName);
1178    int block = metaBlockIndexReader.rootBlockContainingKey(mbname, 0, mbname.length);
1179    if (block == -1) {
1180      return null;
1181    }
1182    long blockSize = metaBlockIndexReader.getRootBlockDataSize(block);
1183
1184    // Per meta key from any given file, synchronize reads for said block. This
1185    // is OK to do for meta blocks because the meta block index is always
1186    // single-level.
1187    synchronized (metaBlockIndexReader.getRootBlockKey(block)) {
1188      // Check cache for block. If found return.
1189      long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
1190      BlockCacheKey cacheKey =
1191        new BlockCacheKey(name, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META);
1192
1193      cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory());
1194      HFileBlock cachedBlock =
1195        getCachedBlock(cacheKey, cacheBlock, false, true, true, BlockType.META, null);
1196      if (cachedBlock != null) {
1197        assert cachedBlock.isUnpacked() : "Packed block leak.";
1198        // Return a distinct 'shallow copy' of the block,
1199        // so pos does not get messed by the scanner
1200        return cachedBlock;
1201      }
1202      // Cache Miss, please load.
1203
1204      HFileBlock compressedBlock =
1205        fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false, true);
1206      HFileBlock uncompressedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
1207      if (compressedBlock != uncompressedBlock) {
1208        compressedBlock.release();
1209      }
1210
1211      // Cache the block
1212      if (cacheBlock) {
1213        cacheConf.getBlockCache().ifPresent(
1214          cache -> cache.cacheBlock(cacheKey, uncompressedBlock, cacheConf.isInMemory()));
1215      }
1216      return uncompressedBlock;
1217    }
1218  }
1219
1220  /**
1221   * If expected block is data block, we'll allocate the ByteBuff of block from
1222   * {@link org.apache.hadoop.hbase.io.ByteBuffAllocator} and it's usually an off-heap one,
1223   * otherwise it will allocate from heap.
1224   * @see org.apache.hadoop.hbase.io.hfile.HFileBlock.FSReader#readBlockData(long, long, boolean,
1225   *      boolean, boolean)
1226   */
1227  private boolean shouldUseHeap(BlockType expectedBlockType) {
1228    if (!cacheConf.getBlockCache().isPresent()) {
1229      return false;
1230    } else if (!cacheConf.isCombinedBlockCache()) {
1231      // Block to cache in LruBlockCache must be an heap one. So just allocate block memory from
1232      // heap for saving an extra off-heap to heap copying.
1233      return true;
1234    }
1235    return expectedBlockType != null && !expectedBlockType.isData();
1236  }
1237
1238  @Override
1239  public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final boolean cacheBlock,
1240    boolean pread, final boolean isCompaction, boolean updateCacheMetrics,
1241    BlockType expectedBlockType, DataBlockEncoding expectedDataBlockEncoding) throws IOException {
1242    if (dataBlockIndexReader == null) {
1243      throw new IOException(path + " block index not loaded");
1244    }
1245    long trailerOffset = trailer.getLoadOnOpenDataOffset();
1246    if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) {
1247      throw new IOException("Requested block is out of range: " + dataBlockOffset
1248        + ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset()
1249        + ", trailer.getLoadOnOpenDataOffset: " + trailerOffset + ", path=" + path);
1250    }
1251    // For any given block from any given file, synchronize reads for said
1252    // block.
1253    // Without a cache, this synchronizing is needless overhead, but really
1254    // the other choice is to duplicate work (which the cache would prevent you
1255    // from doing).
1256
1257    BlockCacheKey cacheKey =
1258      new BlockCacheKey(name, dataBlockOffset, this.isPrimaryReplicaReader(), expectedBlockType);
1259
1260    boolean useLock = false;
1261    IdLock.Entry lockEntry = null;
1262    Span span = TraceUtil.getGlobalTracer().spanBuilder("HFileReaderImpl.readBlock").startSpan();
1263    try (Scope traceScope = span.makeCurrent()) {
1264      while (true) {
1265        // Check cache for block. If found return.
1266        if (cacheConf.shouldReadBlockFromCache(expectedBlockType)) {
1267          if (useLock) {
1268            lockEntry = offsetLock.getLockEntry(dataBlockOffset);
1269          }
1270          // Try and get the block from the block cache. If the useLock variable is true then this
1271          // is the second time through the loop and it should not be counted as a block cache miss.
1272          HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, useLock, isCompaction,
1273            updateCacheMetrics, expectedBlockType, expectedDataBlockEncoding);
1274          if (cachedBlock != null) {
1275            if (LOG.isTraceEnabled()) {
1276              LOG.trace("From Cache " + cachedBlock);
1277            }
1278            span.addEvent("blockCacheHit");
1279            assert cachedBlock.isUnpacked() : "Packed block leak.";
1280            if (cachedBlock.getBlockType().isData()) {
1281              if (updateCacheMetrics) {
1282                HFile.DATABLOCK_READ_COUNT.increment();
1283              }
1284              // Validate encoding type for data blocks. We include encoding
1285              // type in the cache key, and we expect it to match on a cache hit.
1286              if (cachedBlock.getDataBlockEncoding() != dataBlockEncoder.getDataBlockEncoding()) {
1287                // Remember to release the block when in exceptional path.
1288                cacheConf.getBlockCache().ifPresent(cache -> {
1289                  returnAndEvictBlock(cache, cacheKey, cachedBlock);
1290                });
1291                throw new IOException("Cached block under key " + cacheKey + " "
1292                  + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: "
1293                  + dataBlockEncoder.getDataBlockEncoding() + "), path=" + path);
1294              }
1295            }
1296            // Cache-hit. Return!
1297            return cachedBlock;
1298          }
1299
1300          if (!useLock && cacheBlock && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) {
1301            // check cache again with lock
1302            useLock = true;
1303            continue;
1304          }
1305          // Carry on, please load.
1306        }
1307
1308        span.addEvent("blockCacheMiss");
1309        // Load block from filesystem.
1310        HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread,
1311          !isCompaction, shouldUseHeap(expectedBlockType));
1312        validateBlockType(hfileBlock, expectedBlockType);
1313        HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader);
1314        BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory();
1315
1316        // Cache the block if necessary
1317        cacheConf.getBlockCache().ifPresent(cache -> {
1318          if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) {
1319            cache.cacheBlock(cacheKey,
1320              cacheConf.shouldCacheCompressed(category) ? hfileBlock : unpacked,
1321              cacheConf.isInMemory());
1322          }
1323        });
1324        if (unpacked != hfileBlock) {
1325          // End of life here if hfileBlock is an independent block.
1326          hfileBlock.release();
1327        }
1328        if (updateCacheMetrics && hfileBlock.getBlockType().isData()) {
1329          HFile.DATABLOCK_READ_COUNT.increment();
1330        }
1331
1332        return unpacked;
1333      }
1334    } finally {
1335      if (lockEntry != null) {
1336        offsetLock.releaseLockEntry(lockEntry);
1337      }
1338      span.end();
1339    }
1340  }
1341
1342  @Override
1343  public boolean hasMVCCInfo() {
1344    return fileInfo.shouldIncludeMemStoreTS() && fileInfo.isDecodeMemstoreTS();
1345  }
1346
1347  /**
1348   * Compares the actual type of a block retrieved from cache or disk with its expected type and
1349   * throws an exception in case of a mismatch. Expected block type of {@link BlockType#DATA} is
1350   * considered to match the actual block type [@link {@link BlockType#ENCODED_DATA} as well.
1351   * @param block             a block retrieved from cache or disk
1352   * @param expectedBlockType the expected block type, or null to skip the check
1353   */
1354  private void validateBlockType(HFileBlock block, BlockType expectedBlockType) throws IOException {
1355    if (expectedBlockType == null) {
1356      return;
1357    }
1358    BlockType actualBlockType = block.getBlockType();
1359    if (expectedBlockType.isData() && actualBlockType.isData()) {
1360      // We consider DATA to match ENCODED_DATA for the purpose of this
1361      // verification.
1362      return;
1363    }
1364    if (actualBlockType != expectedBlockType) {
1365      throw new IOException("Expected block type " + expectedBlockType + ", " + "but got "
1366        + actualBlockType + ": " + block + ", path=" + path);
1367    }
1368  }
1369
1370  /**
1371   * @return Last key as cell in the file. May be null if file has no entries. Note that this is not
1372   *         the last row key, but it is the Cell representation of the last key
1373   */
1374  @Override
1375  public Optional<Cell> getLastKey() {
1376    return dataBlockIndexReader.isEmpty()
1377      ? Optional.empty()
1378      : Optional.of(fileInfo.getLastKeyCell());
1379  }
1380
1381  /**
1382   * @return Midkey for this file. We work with block boundaries only so returned midkey is an
1383   *         approximation only.
1384   */
1385  @Override
1386  public Optional<Cell> midKey() throws IOException {
1387    return Optional.ofNullable(dataBlockIndexReader.midkey(this));
1388  }
1389
1390  @Override
1391  public void close() throws IOException {
1392    close(cacheConf.shouldEvictOnClose());
1393  }
1394
1395  @Override
1396  public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) {
1397    return dataBlockEncoder.getEffectiveEncodingInCache(isCompaction);
1398  }
1399
1400  /** For testing */
1401  @Override
1402  public HFileBlock.FSReader getUncachedBlockReader() {
1403    return fsBlockReader;
1404  }
1405
1406  /**
1407   * Scanner that operates on encoded data blocks.
1408   */
1409  protected static class EncodedScanner extends HFileScannerImpl {
1410    private final HFileBlockDecodingContext decodingCtx;
1411    private final DataBlockEncoder.EncodedSeeker seeker;
1412    private final DataBlockEncoder dataBlockEncoder;
1413
1414    public EncodedScanner(HFile.Reader reader, boolean cacheBlocks, boolean pread,
1415      boolean isCompaction, HFileContext meta, Configuration conf) {
1416      super(reader, cacheBlocks, pread, isCompaction);
1417      DataBlockEncoding encoding = reader.getDataBlockEncoding();
1418      dataBlockEncoder = encoding.getEncoder();
1419      decodingCtx = dataBlockEncoder.newDataBlockDecodingContext(conf, meta);
1420      seeker = dataBlockEncoder.createSeeker(decodingCtx);
1421    }
1422
1423    @Override
1424    public boolean isSeeked() {
1425      return curBlock != null;
1426    }
1427
1428    @Override
1429    public void setNonSeekedState() {
1430      reset();
1431    }
1432
1433    /**
1434     * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first
1435     * key/value pair.
1436     * @param newBlock the block to make current, and read by {@link HFileReaderImpl#readBlock},
1437     *                 it's a totally new block with new allocated {@link ByteBuff}, so if no
1438     *                 further reference to this block, we should release it carefully.
1439     */
1440    @Override
1441    protected void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException {
1442      try {
1443        // sanity checks
1444        if (newBlock.getBlockType() != BlockType.ENCODED_DATA) {
1445          throw new IllegalStateException("EncodedScanner works only on encoded data blocks");
1446        }
1447        short dataBlockEncoderId = newBlock.getDataBlockEncodingId();
1448        if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) {
1449          String encoderCls = dataBlockEncoder.getClass().getName();
1450          throw new CorruptHFileException(
1451            "Encoder " + encoderCls + " doesn't support data block encoding "
1452              + DataBlockEncoding.getNameFromId(dataBlockEncoderId) + ",path=" + reader.getPath());
1453        }
1454        updateCurrBlockRef(newBlock);
1455        ByteBuff encodedBuffer = getEncodedBuffer(newBlock);
1456        seeker.setCurrentBuffer(encodedBuffer);
1457      } finally {
1458        releaseIfNotCurBlock(newBlock);
1459      }
1460      // Reset the next indexed key
1461      this.nextIndexedKey = null;
1462    }
1463
1464    private ByteBuff getEncodedBuffer(HFileBlock newBlock) {
1465      ByteBuff origBlock = newBlock.getBufferReadOnly();
1466      int pos = newBlock.headerSize() + DataBlockEncoding.ID_SIZE;
1467      origBlock.position(pos);
1468      origBlock
1469        .limit(pos + newBlock.getUncompressedSizeWithoutHeader() - DataBlockEncoding.ID_SIZE);
1470      return origBlock.slice();
1471    }
1472
1473    @Override
1474    protected boolean processFirstDataBlock() throws IOException {
1475      seeker.rewind();
1476      return true;
1477    }
1478
1479    @Override
1480    public boolean next() throws IOException {
1481      boolean isValid = seeker.next();
1482      if (!isValid) {
1483        HFileBlock newBlock = readNextDataBlock();
1484        isValid = newBlock != null;
1485        if (isValid) {
1486          updateCurrentBlock(newBlock);
1487        } else {
1488          setNonSeekedState();
1489        }
1490      }
1491      return isValid;
1492    }
1493
1494    @Override
1495    public Cell getKey() {
1496      assertValidSeek();
1497      return seeker.getKey();
1498    }
1499
1500    @Override
1501    public ByteBuffer getValue() {
1502      assertValidSeek();
1503      return seeker.getValueShallowCopy();
1504    }
1505
1506    @Override
1507    public Cell getCell() {
1508      if (this.curBlock == null) {
1509        return null;
1510      }
1511      return seeker.getCell();
1512    }
1513
1514    @Override
1515    public String getKeyString() {
1516      return CellUtil.toString(getKey(), true);
1517    }
1518
1519    @Override
1520    public String getValueString() {
1521      ByteBuffer valueBuffer = getValue();
1522      return ByteBufferUtils.toStringBinary(valueBuffer);
1523    }
1524
1525    private void assertValidSeek() {
1526      if (this.curBlock == null) {
1527        throw new NotSeekedException(reader.getPath());
1528      }
1529    }
1530
1531    @Override
1532    protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) {
1533      return dataBlockEncoder.getFirstKeyCellInBlock(getEncodedBuffer(curBlock));
1534    }
1535
1536    @Override
1537    protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind,
1538      Cell key, boolean seekBefore) throws IOException {
1539      if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
1540        updateCurrentBlock(seekToBlock);
1541      } else if (rewind) {
1542        seeker.rewind();
1543      }
1544      this.nextIndexedKey = nextIndexedKey;
1545      return seeker.seekToKeyInBlock(key, seekBefore);
1546    }
1547
1548    @Override
1549    public int compareKey(CellComparator comparator, Cell key) {
1550      return seeker.compareKey(comparator, key);
1551    }
1552  }
1553
1554  /**
1555   * Returns a buffer with the Bloom filter metadata. The caller takes ownership of the buffer.
1556   */
1557  @Override
1558  public DataInput getGeneralBloomFilterMetadata() throws IOException {
1559    return this.getBloomFilterMetadata(BlockType.GENERAL_BLOOM_META);
1560  }
1561
1562  @Override
1563  public DataInput getDeleteBloomFilterMetadata() throws IOException {
1564    return this.getBloomFilterMetadata(BlockType.DELETE_FAMILY_BLOOM_META);
1565  }
1566
1567  private DataInput getBloomFilterMetadata(BlockType blockType) throws IOException {
1568    if (
1569      blockType != BlockType.GENERAL_BLOOM_META && blockType != BlockType.DELETE_FAMILY_BLOOM_META
1570    ) {
1571      throw new RuntimeException(
1572        "Block Type: " + blockType.toString() + " is not supported, path=" + path);
1573    }
1574
1575    for (HFileBlock b : fileInfo.getLoadOnOpenBlocks()) {
1576      if (b.getBlockType() == blockType) {
1577        return b.getByteStream();
1578      }
1579    }
1580    return null;
1581  }
1582
1583  public boolean isFileInfoLoaded() {
1584    return true; // We load file info in constructor in version 2.
1585  }
1586
1587  @Override
1588  public HFileContext getFileContext() {
1589    return hfileContext;
1590  }
1591
1592  /**
1593   * Returns false if block prefetching was requested for this file and has not completed, true
1594   * otherwise
1595   */
1596  @Override
1597  public boolean prefetchComplete() {
1598    return PrefetchExecutor.isCompleted(path);
1599  }
1600
1601  /**
1602   * Create a Scanner on this file. No seeks or reads are done on creation. Call
1603   * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is nothing to clean up
1604   * in a Scanner. Letting go of your references to the scanner is sufficient. NOTE: Do not use this
1605   * overload of getScanner for compactions. See
1606   * {@link #getScanner(Configuration, boolean, boolean, boolean)}
1607   * @param conf        Store configuration.
1608   * @param cacheBlocks True if we should cache blocks read in by this scanner.
1609   * @param pread       Use positional read rather than seek+read if true (pread is better for
1610   *                    random reads, seek+read is better scanning).
1611   * @return Scanner on this file.
1612   */
1613  @Override
1614  public HFileScanner getScanner(Configuration conf, boolean cacheBlocks, final boolean pread) {
1615    return getScanner(conf, cacheBlocks, pread, false);
1616  }
1617
1618  /**
1619   * Create a Scanner on this file. No seeks or reads are done on creation. Call
1620   * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is nothing to clean up
1621   * in a Scanner. Letting go of your references to the scanner is sufficient. n * Store
1622   * configuration. n * True if we should cache blocks read in by this scanner. n * Use positional
1623   * read rather than seek+read if true (pread is better for random reads, seek+read is better
1624   * scanning). n * is scanner being used for a compaction?
1625   * @return Scanner on this file.
1626   */
1627  @Override
1628  public HFileScanner getScanner(Configuration conf, boolean cacheBlocks, final boolean pread,
1629    final boolean isCompaction) {
1630    if (dataBlockEncoder.useEncodedScanner()) {
1631      return new EncodedScanner(this, cacheBlocks, pread, isCompaction, this.hfileContext, conf);
1632    }
1633    return new HFileScannerImpl(this, cacheBlocks, pread, isCompaction);
1634  }
1635
1636  public int getMajorVersion() {
1637    return 3;
1638  }
1639
1640  @Override
1641  public void unbufferStream() {
1642    fsBlockReader.unbufferStream();
1643  }
1644}