001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.apache.hadoop.hbase.trace.HBaseSemanticAttributes.BLOCK_CACHE_KEY_KEY;
021
022import io.opentelemetry.api.common.Attributes;
023import io.opentelemetry.api.trace.Span;
024import java.io.DataInput;
025import java.io.IOException;
026import java.nio.ByteBuffer;
027import java.util.ArrayList;
028import java.util.Optional;
029import java.util.function.IntConsumer;
030import org.apache.hadoop.conf.Configurable;
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.fs.Path;
033import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue;
034import org.apache.hadoop.hbase.Cell;
035import org.apache.hadoop.hbase.CellComparator;
036import org.apache.hadoop.hbase.CellUtil;
037import org.apache.hadoop.hbase.ExtendedCell;
038import org.apache.hadoop.hbase.HConstants;
039import org.apache.hadoop.hbase.KeyValue;
040import org.apache.hadoop.hbase.PrivateCellUtil;
041import org.apache.hadoop.hbase.SizeCachedByteBufferKeyValue;
042import org.apache.hadoop.hbase.SizeCachedKeyValue;
043import org.apache.hadoop.hbase.SizeCachedNoTagsByteBufferKeyValue;
044import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue;
045import org.apache.hadoop.hbase.io.compress.Compression;
046import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
047import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
048import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
049import org.apache.hadoop.hbase.nio.ByteBuff;
050import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
051import org.apache.hadoop.hbase.util.ByteBufferUtils;
052import org.apache.hadoop.hbase.util.Bytes;
053import org.apache.hadoop.hbase.util.IdLock;
054import org.apache.hadoop.hbase.util.ObjectIntPair;
055import org.apache.hadoop.io.WritableUtils;
056import org.apache.yetus.audience.InterfaceAudience;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060/**
061 * Implementation that can handle all hfile versions of {@link HFile.Reader}.
062 */
063@InterfaceAudience.Private
064@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
065public abstract class HFileReaderImpl implements HFile.Reader, Configurable {
066  // This class is HFileReaderV3 + HFileReaderV2 + AbstractHFileReader all squashed together into
067  // one file. Ditto for all the HFileReader.ScannerV? implementations. I was running up against
068  // the MaxInlineLevel limit because too many tiers involved reading from an hfile. Was also hard
069  // to navigate the source code when so many classes participating in read.
070  private static final Logger LOG = LoggerFactory.getLogger(HFileReaderImpl.class);
071
072  /** Data block index reader keeping the root data index in memory */
073  protected HFileBlockIndex.CellBasedKeyBlockIndexReader dataBlockIndexReader;
074
075  /** Meta block index reader -- always single level */
076  protected HFileBlockIndex.ByteArrayKeyBlockIndexReader metaBlockIndexReader;
077
078  protected FixedFileTrailer trailer;
079
080  private final boolean primaryReplicaReader;
081
082  /**
083   * What kind of data block encoding should be used while reading, writing, and handling cache.
084   */
085  protected HFileDataBlockEncoder dataBlockEncoder = NoOpDataBlockEncoder.INSTANCE;
086
087  /** Block cache configuration. */
088  protected final CacheConfig cacheConf;
089
090  protected ReaderContext context;
091
092  protected final HFileInfo fileInfo;
093
094  /** Path of file */
095  protected final Path path;
096
097  /** File name to be used for block names */
098  protected final String name;
099
100  private Configuration conf;
101
102  protected HFileContext hfileContext;
103
104  /** Filesystem-level block reader. */
105  protected HFileBlock.FSReader fsBlockReader;
106
107  /**
108   * A "sparse lock" implementation allowing to lock on a particular block identified by offset. The
109   * purpose of this is to avoid two clients loading the same block, and have all but one client
110   * wait to get the block from the cache.
111   */
112  private IdLock offsetLock = new IdLock();
113
114  /** Minimum minor version supported by this HFile format */
115  static final int MIN_MINOR_VERSION = 0;
116
117  /** Maximum minor version supported by this HFile format */
118  // We went to version 2 when we moved to pb'ing fileinfo and the trailer on
119  // the file. This version can read Writables version 1.
120  static final int MAX_MINOR_VERSION = 3;
121
122  /** Minor versions starting with this number have faked index key */
123  static final int MINOR_VERSION_WITH_FAKED_KEY = 3;
124
125  /**
126   * Opens a HFile.
127   * @param context   Reader context info
128   * @param fileInfo  HFile info
129   * @param cacheConf Cache configuration.
130   * @param conf      Configuration
131   */
132  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
133  public HFileReaderImpl(ReaderContext context, HFileInfo fileInfo, CacheConfig cacheConf,
134    Configuration conf) throws IOException {
135    this.cacheConf = cacheConf;
136    this.context = context;
137    this.path = context.getFilePath();
138    this.name = path.getName();
139    this.conf = conf;
140    this.primaryReplicaReader = context.isPrimaryReplicaReader();
141    this.fileInfo = fileInfo;
142    this.trailer = fileInfo.getTrailer();
143    this.hfileContext = fileInfo.getHFileContext();
144    this.fsBlockReader =
145      new HFileBlock.FSReaderImpl(context, hfileContext, cacheConf.getByteBuffAllocator(), conf);
146    this.dataBlockEncoder = HFileDataBlockEncoderImpl.createFromFileInfo(fileInfo);
147    fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf);
148    dataBlockIndexReader = fileInfo.getDataBlockIndexReader();
149    metaBlockIndexReader = fileInfo.getMetaBlockIndexReader();
150  }
151
152  @SuppressWarnings("serial")
153  public static class BlockIndexNotLoadedException extends IllegalStateException {
154    public BlockIndexNotLoadedException(Path path) {
155      // Add a message in case anyone relies on it as opposed to class name.
156      super(path + " block index not loaded");
157    }
158  }
159
160  public CacheConfig getCacheConf() {
161    return cacheConf;
162  }
163
164  private Optional<String> toStringFirstKey() {
165    return getFirstKey().map(CellUtil::getCellKeyAsString);
166  }
167
168  private Optional<String> toStringLastKey() {
169    return getLastKey().map(CellUtil::getCellKeyAsString);
170  }
171
172  @Override
173  public String toString() {
174    return "reader=" + path.toString()
175      + (!isFileInfoLoaded()
176        ? ""
177        : ", compression=" + trailer.getCompressionCodec().getName() + ", cacheConf=" + cacheConf
178          + ", firstKey=" + toStringFirstKey() + ", lastKey=" + toStringLastKey())
179      + ", avgKeyLen=" + fileInfo.getAvgKeyLen() + ", avgValueLen=" + fileInfo.getAvgValueLen()
180      + ", entries=" + trailer.getEntryCount() + ", length=" + context.getFileSize();
181  }
182
183  @Override
184  public long length() {
185    return context.getFileSize();
186  }
187
188  /**
189   * @return the first key in the file. May be null if file has no entries. Note that this is not
190   *         the first row key, but rather the byte form of the first KeyValue.
191   */
192  @Override
193  public Optional<ExtendedCell> getFirstKey() {
194    if (dataBlockIndexReader == null) {
195      throw new BlockIndexNotLoadedException(path);
196    }
197    return dataBlockIndexReader.isEmpty()
198      ? Optional.empty()
199      : Optional.of(dataBlockIndexReader.getRootBlockKey(0));
200  }
201
202  /**
203   * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's patch goes in to
204   * eliminate {@link KeyValue} here.
205   * @return the first row key, or null if the file is empty.
206   */
207  @Override
208  public Optional<byte[]> getFirstRowKey() {
209    // We have to copy the row part to form the row key alone
210    return getFirstKey().map(CellUtil::cloneRow);
211  }
212
213  /**
214   * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's patch goes in to
215   * eliminate {@link KeyValue} here.
216   * @return the last row key, or null if the file is empty.
217   */
218  @Override
219  public Optional<byte[]> getLastRowKey() {
220    // We have to copy the row part to form the row key alone
221    return getLastKey().map(CellUtil::cloneRow);
222  }
223
224  /** Returns number of KV entries in this HFile */
225  @Override
226  public long getEntries() {
227    return trailer.getEntryCount();
228  }
229
230  /** Returns comparator */
231  @Override
232  public CellComparator getComparator() {
233    return this.hfileContext.getCellComparator();
234  }
235
236  public Compression.Algorithm getCompressionAlgorithm() {
237    return trailer.getCompressionCodec();
238  }
239
240  /**
241   * @return the total heap size of data and meta block indexes in bytes. Does not take into account
242   *         non-root blocks of a multilevel data index.
243   */
244  @Override
245  public long indexSize() {
246    return (dataBlockIndexReader != null ? dataBlockIndexReader.heapSize() : 0)
247      + ((metaBlockIndexReader != null) ? metaBlockIndexReader.heapSize() : 0);
248  }
249
250  @Override
251  public String getName() {
252    return name;
253  }
254
255  @Override
256  public void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder) {
257    this.dataBlockEncoder = dataBlockEncoder;
258    this.fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf);
259  }
260
261  @Override
262  public void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader) {
263    this.dataBlockIndexReader = reader;
264  }
265
266  @Override
267  public HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader() {
268    return dataBlockIndexReader;
269  }
270
271  @Override
272  public void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader) {
273    this.metaBlockIndexReader = reader;
274  }
275
276  @Override
277  public HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader() {
278    return metaBlockIndexReader;
279  }
280
281  @Override
282  public FixedFileTrailer getTrailer() {
283    return trailer;
284  }
285
286  @Override
287  public ReaderContext getContext() {
288    return this.context;
289  }
290
291  @Override
292  public HFileInfo getHFileInfo() {
293    return this.fileInfo;
294  }
295
296  @Override
297  public boolean isPrimaryReplicaReader() {
298    return primaryReplicaReader;
299  }
300
301  /**
302   * An exception thrown when an operation requiring a scanner to be seeked is invoked on a scanner
303   * that is not seeked.
304   */
305  @SuppressWarnings("serial")
306  public static class NotSeekedException extends IllegalStateException {
307    public NotSeekedException(Path path) {
308      super(path + " not seeked to a key/value");
309    }
310  }
311
312  public static class HFileScannerImpl implements HFileScanner {
313    private ByteBuff blockBuffer;
314    protected final boolean cacheBlocks;
315    protected final boolean pread;
316    protected final boolean isCompaction;
317    private int currKeyLen;
318    private int currValueLen;
319    private int currMemstoreTSLen;
320    private long currMemstoreTS;
321    protected final HFile.Reader reader;
322    private int currTagsLen;
323    private short rowLen;
324    // buffer backed keyonlyKV
325    private ByteBufferKeyOnlyKeyValue bufBackedKeyOnlyKv = new ByteBufferKeyOnlyKeyValue();
326    // A pair for reusing in blockSeek() so that we don't garbage lot of objects
327    final ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>();
328
329    /**
330     * The next indexed key is to keep track of the indexed key of the next data block. If the
331     * nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the current data block is the
332     * last data block. If the nextIndexedKey is null, it means the nextIndexedKey has not been
333     * loaded yet.
334     */
335    protected ExtendedCell nextIndexedKey;
336
337    // Current block being used. NOTICE: DON't release curBlock separately except in shipped() or
338    // close() methods. Because the shipped() or close() will do the release finally, even if any
339    // exception occur the curBlock will be released by the close() method (see
340    // RegionScannerImpl#handleException). Call the releaseIfNotCurBlock() to release the
341    // unreferenced block please.
342    protected HFileBlock curBlock;
343    // Whether we returned a result for curBlock's size in recordBlockSize().
344    // gets reset whenever curBlock is changed.
345    private boolean providedCurrentBlockSize = false;
346
347    public HFileBlock getCurBlock() {
348      return curBlock;
349    }
350
351    // Previous blocks that were used in the course of the read
352    protected final ArrayList<HFileBlock> prevBlocks = new ArrayList<>();
353
354    public HFileScannerImpl(final HFile.Reader reader, final boolean cacheBlocks,
355      final boolean pread, final boolean isCompaction) {
356      this.reader = reader;
357      this.cacheBlocks = cacheBlocks;
358      this.pread = pread;
359      this.isCompaction = isCompaction;
360    }
361
362    void updateCurrBlockRef(HFileBlock block) {
363      if (block != null && curBlock != null && block.getOffset() == curBlock.getOffset()) {
364        return;
365      }
366      if (this.curBlock != null && this.curBlock.isSharedMem()) {
367        prevBlocks.add(this.curBlock);
368      }
369      this.curBlock = block;
370      this.providedCurrentBlockSize = false;
371    }
372
373    void reset() {
374      // We don't have to keep ref to heap block
375      if (this.curBlock != null && this.curBlock.isSharedMem()) {
376        this.prevBlocks.add(this.curBlock);
377      }
378      this.curBlock = null;
379    }
380
381    private void returnBlocks(boolean returnAll) {
382      this.prevBlocks.forEach(HFileBlock::release);
383      this.prevBlocks.clear();
384      if (returnAll && this.curBlock != null) {
385        this.curBlock.release();
386        this.curBlock = null;
387      }
388    }
389
390    @Override
391    public boolean isSeeked() {
392      return blockBuffer != null;
393    }
394
395    @Override
396    public String toString() {
397      return "HFileScanner for reader " + String.valueOf(getReader());
398    }
399
400    protected void assertSeeked() {
401      if (!isSeeked()) {
402        throw new NotSeekedException(reader.getPath());
403      }
404    }
405
406    @Override
407    public HFile.Reader getReader() {
408      return reader;
409    }
410
411    // From non encoded HFiles, we always read back KeyValue or its descendant.(Note: When HFile
412    // block is in DBB, it will be OffheapKV). So all parts of the Cell is in a contiguous
413    // array/buffer. How many bytes we should wrap to make the KV is what this method returns.
414    private int getKVBufSize() {
415      int kvBufSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen;
416      if (currTagsLen > 0) {
417        kvBufSize += Bytes.SIZEOF_SHORT + currTagsLen;
418      }
419      return kvBufSize;
420    }
421
422    @Override
423    public void close() {
424      if (!pread) {
425        // For seek + pread stream socket should be closed when the scanner is closed. HBASE-9393
426        reader.unbufferStream();
427      }
428      this.returnBlocks(true);
429    }
430
431    @Override
432    public void recordBlockSize(IntConsumer blockSizeConsumer) {
433      if (!providedCurrentBlockSize && curBlock != null) {
434        providedCurrentBlockSize = true;
435        blockSizeConsumer.accept(curBlock.getUncompressedSizeWithoutHeader());
436      }
437    }
438
439    // Returns the #bytes in HFile for the current cell. Used to skip these many bytes in current
440    // HFile block's buffer so as to position to the next cell.
441    private int getCurCellSerializedSize() {
442      int curCellSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen + currMemstoreTSLen;
443      if (this.reader.getFileContext().isIncludesTags()) {
444        curCellSize += Bytes.SIZEOF_SHORT + currTagsLen;
445      }
446      return curCellSize;
447    }
448
449    protected void readKeyValueLen() {
450      // This is a hot method. We go out of our way to make this method short so it can be
451      // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves
452      // because it is faster than going via range-checked ByteBuffer methods or going through a
453      // byte buffer array a byte at a time.
454      // Get a long at a time rather than read two individual ints. In micro-benchmarking, even
455      // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints.
456      // Trying to imitate what was done - need to profile if this is better or
457      // earlier way is better by doing mark and reset?
458      // But ensure that you read long instead of two ints
459      long ll = blockBuffer.getLongAfterPosition(0);
460      // Read top half as an int of key length and bottom int as value length
461      this.currKeyLen = (int) (ll >> Integer.SIZE);
462      this.currValueLen = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
463      checkKeyValueLen();
464      this.rowLen = blockBuffer.getShortAfterPosition(Bytes.SIZEOF_LONG);
465      // Move position past the key and value lengths and then beyond the key and value
466      int p = (Bytes.SIZEOF_LONG + currKeyLen + currValueLen);
467      if (reader.getFileContext().isIncludesTags()) {
468        // Tags length is a short.
469        this.currTagsLen = blockBuffer.getShortAfterPosition(p);
470        checkTagsLen();
471        p += (Bytes.SIZEOF_SHORT + currTagsLen);
472      }
473      readMvccVersion(p);
474    }
475
476    private final void checkTagsLen() {
477      if (checkLen(this.currTagsLen)) {
478        throw new IllegalStateException(
479          "Invalid currTagsLen " + this.currTagsLen + ". Block offset: " + curBlock.getOffset()
480            + ", block length: " + this.blockBuffer.limit() + ", position: "
481            + this.blockBuffer.position() + " (without header)." + " path=" + reader.getPath());
482      }
483    }
484
485    /**
486     * Read mvcc. Does checks to see if we even need to read the mvcc at all.
487     */
488    protected void readMvccVersion(final int offsetFromPos) {
489      // See if we even need to decode mvcc.
490      if (!this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
491        return;
492      }
493      if (!this.reader.getHFileInfo().isDecodeMemstoreTS()) {
494        currMemstoreTS = 0;
495        currMemstoreTSLen = 1;
496        return;
497      }
498      _readMvccVersion(offsetFromPos);
499    }
500
501    /**
502     * Actually do the mvcc read. Does no checks.
503     */
504    private void _readMvccVersion(int offsetFromPos) {
505      // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e.
506      // previous if one-byte vint, we'd redo the vint call to find int size.
507      // Also the method is kept small so can be inlined.
508      byte firstByte = blockBuffer.getByteAfterPosition(offsetFromPos);
509      int len = WritableUtils.decodeVIntSize(firstByte);
510      if (len == 1) {
511        this.currMemstoreTS = firstByte;
512      } else {
513        int remaining = len - 1;
514        long i = 0;
515        offsetFromPos++;
516        if (remaining >= Bytes.SIZEOF_INT) {
517          // The int read has to be converted to unsigned long so the & op
518          i = (blockBuffer.getIntAfterPosition(offsetFromPos) & 0x00000000ffffffffL);
519          remaining -= Bytes.SIZEOF_INT;
520          offsetFromPos += Bytes.SIZEOF_INT;
521        }
522        if (remaining >= Bytes.SIZEOF_SHORT) {
523          short s = blockBuffer.getShortAfterPosition(offsetFromPos);
524          i = i << 16;
525          i = i | (s & 0xFFFF);
526          remaining -= Bytes.SIZEOF_SHORT;
527          offsetFromPos += Bytes.SIZEOF_SHORT;
528        }
529        for (int idx = 0; idx < remaining; idx++) {
530          byte b = blockBuffer.getByteAfterPosition(offsetFromPos + idx);
531          i = i << 8;
532          i = i | (b & 0xFF);
533        }
534        currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
535      }
536      this.currMemstoreTSLen = len;
537    }
538
539    /**
540     * Within a loaded block, seek looking for the last key that is smaller than (or equal to?) the
541     * key we are interested in. A note on the seekBefore: if you have seekBefore = true, AND the
542     * first key in the block = key, then you'll get thrown exceptions. The caller has to check for
543     * that case and load the previous block as appropriate. the key to find find the key before the
544     * given key in case of exact match.
545     * @return 0 in case of an exact key match, 1 in case of an inexact match, -2 in case of an
546     *         inexact match and furthermore, the input key less than the first key of current
547     *         block(e.g. using a faked index key)
548     */
549    protected int blockSeek(Cell key, boolean seekBefore) {
550      int klen, vlen, tlen = 0;
551      int lastKeyValueSize = -1;
552      int offsetFromPos;
553      do {
554        offsetFromPos = 0;
555        // Better to ensure that we use the BB Utils here
556        long ll = blockBuffer.getLongAfterPosition(offsetFromPos);
557        klen = (int) (ll >> Integer.SIZE);
558        vlen = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
559        if (checkKeyLen(klen) || checkLen(vlen)) {
560          throw new IllegalStateException(
561            "Invalid klen " + klen + " or vlen " + vlen + ". Block offset: " + curBlock.getOffset()
562              + ", block length: " + blockBuffer.limit() + ", position: " + blockBuffer.position()
563              + " (without header)." + " path=" + reader.getPath());
564        }
565        offsetFromPos += Bytes.SIZEOF_LONG;
566        this.rowLen = blockBuffer.getShortAfterPosition(offsetFromPos);
567        blockBuffer.asSubByteBuffer(blockBuffer.position() + offsetFromPos, klen, pair);
568        bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), klen, rowLen);
569        int comp =
570          PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, bufBackedKeyOnlyKv);
571        offsetFromPos += klen + vlen;
572        if (this.reader.getFileContext().isIncludesTags()) {
573          // Read short as unsigned, high byte first
574          tlen = ((blockBuffer.getByteAfterPosition(offsetFromPos) & 0xff) << 8)
575            ^ (blockBuffer.getByteAfterPosition(offsetFromPos + 1) & 0xff);
576          if (checkLen(tlen)) {
577            throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: "
578              + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
579              + blockBuffer.position() + " (without header)." + " path=" + reader.getPath());
580          }
581          // add the two bytes read for the tags.
582          offsetFromPos += tlen + (Bytes.SIZEOF_SHORT);
583        }
584        if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
585          // Directly read the mvcc based on current position
586          readMvccVersion(offsetFromPos);
587        }
588        if (comp == 0) {
589          if (seekBefore) {
590            if (lastKeyValueSize < 0) {
591              throw new IllegalStateException("blockSeek with seekBefore "
592                + "at the first key of the block: key=" + CellUtil.getCellKeyAsString(key)
593                + ", blockOffset=" + curBlock.getOffset() + ", onDiskSize="
594                + curBlock.getOnDiskSizeWithHeader() + ", path=" + reader.getPath());
595            }
596            blockBuffer.moveBack(lastKeyValueSize);
597            readKeyValueLen();
598            return 1; // non exact match.
599          }
600          currKeyLen = klen;
601          currValueLen = vlen;
602          currTagsLen = tlen;
603          return 0; // indicate exact match
604        } else if (comp < 0) {
605          if (lastKeyValueSize > 0) {
606            blockBuffer.moveBack(lastKeyValueSize);
607          }
608          readKeyValueLen();
609          if (lastKeyValueSize == -1 && blockBuffer.position() == 0) {
610            return HConstants.INDEX_KEY_MAGIC;
611          }
612          return 1;
613        }
614        // The size of this key/value tuple, including key/value length fields.
615        lastKeyValueSize = klen + vlen + currMemstoreTSLen + KEY_VALUE_LEN_SIZE;
616        // include tag length also if tags included with KV
617        if (reader.getFileContext().isIncludesTags()) {
618          lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT;
619        }
620        blockBuffer.skip(lastKeyValueSize);
621      } while (blockBuffer.hasRemaining());
622
623      // Seek to the last key we successfully read. This will happen if this is
624      // the last key/value pair in the file, in which case the following call
625      // to next() has to return false.
626      blockBuffer.moveBack(lastKeyValueSize);
627      readKeyValueLen();
628      return 1; // didn't exactly find it.
629    }
630
631    @Override
632    public ExtendedCell getNextIndexedKey() {
633      return nextIndexedKey;
634    }
635
636    @Override
637    public int seekTo(ExtendedCell key) throws IOException {
638      return seekTo(key, true);
639    }
640
641    @Override
642    public int reseekTo(ExtendedCell key) throws IOException {
643      int compared;
644      if (isSeeked()) {
645        compared = compareKey(reader.getComparator(), key);
646        if (compared < 1) {
647          // If the required key is less than or equal to current key, then
648          // don't do anything.
649          return compared;
650        } else {
651          // The comparison with no_next_index_key has to be checked
652          if (
653            this.nextIndexedKey != null && (this.nextIndexedKey
654                == KeyValueScanner.NO_NEXT_INDEXED_KEY
655              || PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, nextIndexedKey)
656                  < 0)
657          ) {
658            // The reader shall continue to scan the current data block instead
659            // of querying the
660            // block index as long as it knows the target key is strictly
661            // smaller than
662            // the next indexed key or the current data block is the last data
663            // block.
664            return loadBlockAndSeekToKey(this.curBlock, nextIndexedKey, false, key, false);
665          }
666        }
667      }
668      // Don't rewind on a reseek operation, because reseek implies that we are
669      // always going forward in the file.
670      return seekTo(key, false);
671    }
672
673    /**
674     * An internal API function. Seek to the given key, optionally rewinding to the first key of the
675     * block before doing the seek.
676     * @param key    - a cell representing the key that we need to fetch
677     * @param rewind whether to rewind to the first key of the block before doing the seek. If this
678     *               is false, we are assuming we never go back, otherwise the result is undefined.
679     * @return -1 if the key is earlier than the first key of the file, 0 if we are at the given
680     *         key, 1 if we are past the given key -2 if the key is earlier than the first key of
681     *         the file while using a faked index key
682     */
683    public int seekTo(ExtendedCell key, boolean rewind) throws IOException {
684      HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader();
685      BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, curBlock,
686        cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding(), reader);
687      if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) {
688        // This happens if the key e.g. falls before the beginning of the file.
689        return -1;
690      }
691      return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(),
692        blockWithScanInfo.getNextIndexedKey(), rewind, key, false);
693    }
694
695    @Override
696    public boolean seekBefore(ExtendedCell key) throws IOException {
697      HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, curBlock,
698        cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction), reader);
699      if (seekToBlock == null) {
700        return false;
701      }
702      ExtendedCell firstKey = getFirstKeyCellInBlock(seekToBlock);
703      if (PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), firstKey, key) >= 0) {
704        long previousBlockOffset = seekToBlock.getPrevBlockOffset();
705        // The key we are interested in
706        if (previousBlockOffset == -1) {
707          // we have a 'problem', the key we want is the first of the file.
708          releaseIfNotCurBlock(seekToBlock);
709          return false;
710        }
711
712        // The first key in the current block 'seekToBlock' is greater than the given
713        // seekBefore key. We will go ahead by reading the next block that satisfies the
714        // given key. Return the current block before reading the next one.
715        releaseIfNotCurBlock(seekToBlock);
716        // It is important that we compute and pass onDiskSize to the block
717        // reader so that it does not have to read the header separately to
718        // figure out the size. Currently, we do not have a way to do this
719        // correctly in the general case however.
720        // TODO: See https://issues.apache.org/jira/browse/HBASE-14576
721        int prevBlockSize = -1;
722        seekToBlock = reader.readBlock(previousBlockOffset, prevBlockSize, cacheBlocks, pread,
723          isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
724        // TODO shortcut: seek forward in this block to the last key of the
725        // block.
726      }
727      loadBlockAndSeekToKey(seekToBlock, firstKey, true, key, true);
728      return true;
729    }
730
731    /**
732     * The curBlock will be released by shipping or close method, so only need to consider releasing
733     * the block, which was read from HFile before and not referenced by curBlock.
734     */
735    protected void releaseIfNotCurBlock(HFileBlock block) {
736      if (curBlock != block) {
737        block.release();
738      }
739    }
740
741    /**
742     * Scans blocks in the "scanned" section of the {@link HFile} until the next data block is
743     * found.
744     * @return the next block, or null if there are no more data blocks
745     */
746    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH",
747        justification = "Yeah, unnecessary null check; could do w/ clean up")
748    protected HFileBlock readNextDataBlock() throws IOException {
749      long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
750      if (curBlock == null) {
751        return null;
752      }
753      HFileBlock block = this.curBlock;
754      do {
755        if (block.getOffset() >= lastDataBlockOffset) {
756          releaseIfNotCurBlock(block);
757          return null;
758        }
759        if (block.getOffset() < 0) {
760          releaseIfNotCurBlock(block);
761          throw new IOException("Invalid block offset: " + block + ", path=" + reader.getPath());
762        }
763        // We are reading the next block without block type validation, because
764        // it might turn out to be a non-data block.
765        block = reader.readBlock(block.getOffset() + block.getOnDiskSizeWithHeader(),
766          block.getNextBlockOnDiskSize(), cacheBlocks, pread, isCompaction, true, null,
767          getEffectiveDataBlockEncoding());
768        if (block != null && !block.getBlockType().isData()) {
769          // Whatever block we read we will be returning it unless
770          // it is a datablock. Just in case the blocks are non data blocks
771          block.release();
772        }
773      } while (!block.getBlockType().isData());
774      return block;
775    }
776
777    public DataBlockEncoding getEffectiveDataBlockEncoding() {
778      return this.reader.getEffectiveEncodingInCache(isCompaction);
779    }
780
781    @Override
782    public ExtendedCell getCell() {
783      if (!isSeeked()) {
784        return null;
785      }
786
787      ExtendedCell ret;
788      int cellBufSize = getKVBufSize();
789      long seqId = 0L;
790      if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
791        seqId = currMemstoreTS;
792      }
793      if (blockBuffer.hasArray()) {
794        // TODO : reduce the varieties of KV here. Check if based on a boolean
795        // we can handle the 'no tags' case.
796        if (currTagsLen > 0) {
797          ret = new SizeCachedKeyValue(blockBuffer.array(),
798            blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen,
799            rowLen);
800        } else {
801          ret = new SizeCachedNoTagsKeyValue(blockBuffer.array(),
802            blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen,
803            rowLen);
804        }
805      } else {
806        ByteBuffer buf = blockBuffer.asSubByteBuffer(cellBufSize);
807        if (buf.isDirect()) {
808          ret = currTagsLen > 0
809            ? new SizeCachedByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId, currKeyLen,
810              rowLen)
811            : new SizeCachedNoTagsByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId,
812              currKeyLen, rowLen);
813        } else {
814          if (currTagsLen > 0) {
815            ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + buf.position(),
816              cellBufSize, seqId, currKeyLen, rowLen);
817          } else {
818            ret = new SizeCachedNoTagsKeyValue(buf.array(), buf.arrayOffset() + buf.position(),
819              cellBufSize, seqId, currKeyLen, rowLen);
820          }
821        }
822      }
823      return ret;
824    }
825
826    @Override
827    public ExtendedCell getKey() {
828      assertSeeked();
829      // Create a new object so that this getKey is cached as firstKey, lastKey
830      ObjectIntPair<ByteBuffer> keyPair = new ObjectIntPair<>();
831      blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, keyPair);
832      ByteBuffer keyBuf = keyPair.getFirst();
833      if (keyBuf.hasArray()) {
834        return new KeyValue.KeyOnlyKeyValue(keyBuf.array(),
835          keyBuf.arrayOffset() + keyPair.getSecond(), currKeyLen);
836      } else {
837        // Better to do a copy here instead of holding on to this BB so that
838        // we could release the blocks referring to this key. This key is specifically used
839        // in HalfStoreFileReader to get the firstkey and lastkey by creating a new scanner
840        // every time. So holding onto the BB (incase of DBB) is not advised here.
841        byte[] key = new byte[currKeyLen];
842        ByteBufferUtils.copyFromBufferToArray(key, keyBuf, keyPair.getSecond(), 0, currKeyLen);
843        return new KeyValue.KeyOnlyKeyValue(key, 0, currKeyLen);
844      }
845    }
846
847    @Override
848    public ByteBuffer getValue() {
849      assertSeeked();
850      // Okie to create new Pair. Not used in hot path
851      ObjectIntPair<ByteBuffer> valuePair = new ObjectIntPair<>();
852      this.blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen,
853        currValueLen, valuePair);
854      ByteBuffer valBuf = valuePair.getFirst().duplicate();
855      valBuf.position(valuePair.getSecond());
856      valBuf.limit(currValueLen + valuePair.getSecond());
857      return valBuf.slice();
858    }
859
860    protected void setNonSeekedState() {
861      reset();
862      blockBuffer = null;
863      currKeyLen = 0;
864      currValueLen = 0;
865      currMemstoreTS = 0;
866      currMemstoreTSLen = 0;
867      currTagsLen = 0;
868    }
869
870    /**
871     * Set the position on current backing blockBuffer.
872     */
873    private void positionThisBlockBuffer() {
874      try {
875        blockBuffer.skip(getCurCellSerializedSize());
876      } catch (IllegalArgumentException e) {
877        LOG.error("Current pos = " + blockBuffer.position() + "; currKeyLen = " + currKeyLen
878          + "; currValLen = " + currValueLen + "; block limit = " + blockBuffer.limit()
879          + "; currBlock currBlockOffset = " + this.curBlock.getOffset() + "; path="
880          + reader.getPath());
881        throw e;
882      }
883    }
884
885    /**
886     * Set our selves up for the next 'next' invocation, set up next block.
887     * @return True is more to read else false if at the end.
888     */
889    private boolean positionForNextBlock() throws IOException {
890      // Methods are small so they get inlined because they are 'hot'.
891      long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
892      if (this.curBlock.getOffset() >= lastDataBlockOffset) {
893        setNonSeekedState();
894        return false;
895      }
896      return isNextBlock();
897    }
898
899    private boolean isNextBlock() throws IOException {
900      // Methods are small so they get inlined because they are 'hot'.
901      HFileBlock nextBlock = readNextDataBlock();
902      if (nextBlock == null) {
903        setNonSeekedState();
904        return false;
905      }
906      updateCurrentBlock(nextBlock);
907      return true;
908    }
909
910    private final boolean _next() throws IOException {
911      // Small method so can be inlined. It is a hot one.
912      if (blockBuffer.remaining() <= 0) {
913        return positionForNextBlock();
914      }
915
916      // We are still in the same block.
917      readKeyValueLen();
918      return true;
919    }
920
921    /**
922     * Go to the next key/value in the block section. Loads the next block if necessary. If
923     * successful, {@link #getKey()} and {@link #getValue()} can be called.
924     * @return true if successfully navigated to the next key/value
925     */
926    @Override
927    public boolean next() throws IOException {
928      // This is a hot method so extreme measures taken to ensure it is small and inlineable.
929      // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation
930      assertSeeked();
931      positionThisBlockBuffer();
932      return _next();
933    }
934
935    /**
936     * Positions this scanner at the start of the file.
937     * @return false if empty file; i.e. a call to next would return false and the current key and
938     *         value are undefined.
939     */
940    @Override
941    public boolean seekTo() throws IOException {
942      if (reader == null) {
943        return false;
944      }
945
946      if (reader.getTrailer().getEntryCount() == 0) {
947        // No data blocks.
948        return false;
949      }
950
951      long firstDataBlockOffset = reader.getTrailer().getFirstDataBlockOffset();
952      if (curBlock != null && curBlock.getOffset() == firstDataBlockOffset) {
953        return processFirstDataBlock();
954      }
955
956      readAndUpdateNewBlock(firstDataBlockOffset);
957      return true;
958    }
959
960    protected boolean processFirstDataBlock() throws IOException {
961      blockBuffer.rewind();
962      readKeyValueLen();
963      return true;
964    }
965
966    protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException {
967      HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
968        isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
969      if (newBlock.getOffset() < 0) {
970        releaseIfNotCurBlock(newBlock);
971        throw new IOException(
972          "Invalid offset=" + newBlock.getOffset() + ", path=" + reader.getPath());
973      }
974      updateCurrentBlock(newBlock);
975    }
976
977    protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, ExtendedCell nextIndexedKey,
978      boolean rewind, ExtendedCell key, boolean seekBefore) throws IOException {
979      if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
980        updateCurrentBlock(seekToBlock);
981      } else if (rewind) {
982        blockBuffer.rewind();
983      }
984      // Update the nextIndexedKey
985      this.nextIndexedKey = nextIndexedKey;
986      return blockSeek(key, seekBefore);
987    }
988
989    /** Returns True if v &lt;= 0 or v &gt; current block buffer limit. */
990    protected final boolean checkKeyLen(final int v) {
991      return v <= 0 || v > this.blockBuffer.limit();
992    }
993
994    /** Returns True if v &lt; 0 or v &gt; current block buffer limit. */
995    protected final boolean checkLen(final int v) {
996      return v < 0 || v > this.blockBuffer.limit();
997    }
998
999    /**
1000     * Check key and value lengths are wholesome.
1001     */
1002    protected final void checkKeyValueLen() {
1003      if (checkKeyLen(this.currKeyLen) || checkLen(this.currValueLen)) {
1004        throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen
1005          + " or currValueLen " + this.currValueLen + ". Block offset: " + this.curBlock.getOffset()
1006          + ", block length: " + this.blockBuffer.limit() + ", position: "
1007          + this.blockBuffer.position() + " (without header)." + ", path=" + reader.getPath());
1008      }
1009    }
1010
1011    /**
1012     * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first
1013     * key/value pair.
1014     * @param newBlock the block read by {@link HFileReaderImpl#readBlock}, it's a totally new block
1015     *                 with new allocated {@link ByteBuff}, so if no further reference to this
1016     *                 block, we should release it carefully.
1017     */
1018    protected void updateCurrentBlock(HFileBlock newBlock) throws IOException {
1019      try {
1020        if (newBlock.getBlockType() != BlockType.DATA) {
1021          throw new IllegalStateException(
1022            "ScannerV2 works only on data blocks, got " + newBlock.getBlockType() + "; "
1023              + "HFileName=" + reader.getPath() + ", " + "dataBlockEncoder="
1024              + reader.getDataBlockEncoding() + ", " + "isCompaction=" + isCompaction);
1025        }
1026        updateCurrBlockRef(newBlock);
1027        blockBuffer = newBlock.getBufferWithoutHeader();
1028        readKeyValueLen();
1029      } finally {
1030        releaseIfNotCurBlock(newBlock);
1031      }
1032      // Reset the next indexed key
1033      this.nextIndexedKey = null;
1034    }
1035
1036    protected ExtendedCell getFirstKeyCellInBlock(HFileBlock curBlock) {
1037      ByteBuff buffer = curBlock.getBufferWithoutHeader();
1038      // It is safe to manipulate this buffer because we own the buffer object.
1039      buffer.rewind();
1040      int klen = buffer.getInt();
1041      buffer.skip(Bytes.SIZEOF_INT);// Skip value len part
1042      ByteBuffer keyBuff = buffer.asSubByteBuffer(klen);
1043      if (keyBuff.hasArray()) {
1044        return new KeyValue.KeyOnlyKeyValue(keyBuff.array(),
1045          keyBuff.arrayOffset() + keyBuff.position(), klen);
1046      } else {
1047        return new ByteBufferKeyOnlyKeyValue(keyBuff, keyBuff.position(), klen);
1048      }
1049    }
1050
1051    public int compareKey(CellComparator comparator, ExtendedCell key) {
1052      blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, pair);
1053      this.bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), currKeyLen, rowLen);
1054      return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, this.bufBackedKeyOnlyKv);
1055    }
1056
1057    @Override
1058    public void shipped() throws IOException {
1059      this.returnBlocks(false);
1060    }
1061  }
1062
1063  @Override
1064  public Path getPath() {
1065    return path;
1066  }
1067
1068  @Override
1069  public DataBlockEncoding getDataBlockEncoding() {
1070    return dataBlockEncoder.getDataBlockEncoding();
1071  }
1072
1073  @Override
1074  public Configuration getConf() {
1075    return conf;
1076  }
1077
1078  @Override
1079  public void setConf(Configuration conf) {
1080    this.conf = conf;
1081  }
1082
1083  /** Minor versions in HFile starting with this number have hbase checksums */
1084  public static final int MINOR_VERSION_WITH_CHECKSUM = 1;
1085  /** In HFile minor version that does not support checksums */
1086  public static final int MINOR_VERSION_NO_CHECKSUM = 0;
1087
1088  /** HFile minor version that introduced pbuf filetrailer */
1089  public static final int PBUF_TRAILER_MINOR_VERSION = 2;
1090
1091  /**
1092   * The size of a (key length, value length) tuple that prefixes each entry in a data block.
1093   */
1094  public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT;
1095
1096  /**
1097   * Retrieve block from cache. Validates the retrieved block's type vs {@code expectedBlockType}
1098   * and its encoding vs. {@code expectedDataBlockEncoding}. Unpacks the block as necessary.
1099   */
1100  private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock,
1101    boolean updateCacheMetrics, BlockType expectedBlockType,
1102    DataBlockEncoding expectedDataBlockEncoding) throws IOException {
1103    // Check cache for block. If found return.
1104    BlockCache cache = cacheConf.getBlockCache().orElse(null);
1105    if (cache != null) {
1106      HFileBlock cachedBlock = (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock,
1107        updateCacheMetrics, expectedBlockType);
1108      if (cachedBlock != null) {
1109        if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) {
1110          HFileBlock compressedBlock = cachedBlock;
1111          cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
1112          // In case of compressed block after unpacking we can release the compressed block
1113          if (compressedBlock != cachedBlock) {
1114            compressedBlock.release();
1115          }
1116        }
1117        try {
1118          validateBlockType(cachedBlock, expectedBlockType);
1119        } catch (IOException e) {
1120          returnAndEvictBlock(cache, cacheKey, cachedBlock);
1121          throw e;
1122        }
1123
1124        if (expectedDataBlockEncoding == null) {
1125          return cachedBlock;
1126        }
1127        DataBlockEncoding actualDataBlockEncoding = cachedBlock.getDataBlockEncoding();
1128        // Block types other than data blocks always have
1129        // DataBlockEncoding.NONE. To avoid false negative cache misses, only
1130        // perform this check if cached block is a data block.
1131        if (
1132          cachedBlock.getBlockType().isData()
1133            && !actualDataBlockEncoding.equals(expectedDataBlockEncoding)
1134        ) {
1135          // This mismatch may happen if a Scanner, which is used for say a
1136          // compaction, tries to read an encoded block from the block cache.
1137          // The reverse might happen when an EncodedScanner tries to read
1138          // un-encoded blocks which were cached earlier.
1139          //
1140          // Because returning a data block with an implicit BlockType mismatch
1141          // will cause the requesting scanner to throw a disk read should be
1142          // forced here. This will potentially cause a significant number of
1143          // cache misses, so update so we should keep track of this as it might
1144          // justify the work on a CompoundScanner.
1145          if (
1146            !expectedDataBlockEncoding.equals(DataBlockEncoding.NONE)
1147              && !actualDataBlockEncoding.equals(DataBlockEncoding.NONE)
1148          ) {
1149            // If the block is encoded but the encoding does not match the
1150            // expected encoding it is likely the encoding was changed but the
1151            // block was not yet evicted. Evictions on file close happen async
1152            // so blocks with the old encoding still linger in cache for some
1153            // period of time. This event should be rare as it only happens on
1154            // schema definition change.
1155            LOG.info(
1156              "Evicting cached block with key {} because data block encoding mismatch; "
1157                + "expected {}, actual {}, path={}",
1158              cacheKey, actualDataBlockEncoding, expectedDataBlockEncoding, path);
1159            // This is an error scenario. so here we need to release the block.
1160            returnAndEvictBlock(cache, cacheKey, cachedBlock);
1161          }
1162          return null;
1163        }
1164        return cachedBlock;
1165      }
1166    }
1167    return null;
1168  }
1169
1170  private void returnAndEvictBlock(BlockCache cache, BlockCacheKey cacheKey, Cacheable block) {
1171    block.release();
1172    cache.evictBlock(cacheKey);
1173  }
1174
1175  /**
1176   * @param cacheBlock Add block to cache, if found
1177   * @return block wrapped in a ByteBuffer, with header skipped
1178   */
1179  @Override
1180  public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException {
1181    if (trailer.getMetaIndexCount() == 0) {
1182      return null; // there are no meta blocks
1183    }
1184    if (metaBlockIndexReader == null) {
1185      throw new IOException(path + " meta index not loaded");
1186    }
1187
1188    byte[] mbname = Bytes.toBytes(metaBlockName);
1189    int block = metaBlockIndexReader.rootBlockContainingKey(mbname, 0, mbname.length);
1190    if (block == -1) {
1191      return null;
1192    }
1193    long blockSize = metaBlockIndexReader.getRootBlockDataSize(block);
1194
1195    // Per meta key from any given file, synchronize reads for said block. This
1196    // is OK to do for meta blocks because the meta block index is always
1197    // single-level.
1198    synchronized (metaBlockIndexReader.getRootBlockKey(block)) {
1199      // Check cache for block. If found return.
1200      long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
1201      BlockCacheKey cacheKey =
1202        new BlockCacheKey(name, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META);
1203
1204      cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory());
1205      HFileBlock cachedBlock =
1206        getCachedBlock(cacheKey, cacheBlock, false, true, BlockType.META, null);
1207      if (cachedBlock != null) {
1208        assert cachedBlock.isUnpacked() : "Packed block leak.";
1209        // Return a distinct 'shallow copy' of the block,
1210        // so pos does not get messed by the scanner
1211        return cachedBlock;
1212      }
1213      // Cache Miss, please load.
1214
1215      HFileBlock compressedBlock =
1216        fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false, true);
1217      HFileBlock uncompressedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
1218      if (compressedBlock != uncompressedBlock) {
1219        compressedBlock.release();
1220      }
1221
1222      // Cache the block
1223      if (cacheBlock) {
1224        cacheConf.getBlockCache().ifPresent(
1225          cache -> cache.cacheBlock(cacheKey, uncompressedBlock, cacheConf.isInMemory()));
1226      }
1227      return uncompressedBlock;
1228    }
1229  }
1230
1231  /**
1232   * Whether we use heap or not depends on our intent to cache the block. We want to avoid
1233   * allocating to off-heap if we intend to cache into the on-heap L1 cache. Otherwise, it's more
1234   * efficient to allocate to off-heap since we can control GC ourselves for those. So our decision
1235   * here breaks down as follows: <br>
1236   * If block cache is disabled, don't use heap. If we're not using the CombinedBlockCache, use heap
1237   * unless caching is disabled for the request. Otherwise, only use heap if caching is enabled and
1238   * the expected block type is not DATA (which goes to off-heap L2 in combined cache).
1239   * @see org.apache.hadoop.hbase.io.hfile.HFileBlock.FSReader#readBlockData(long, long, boolean,
1240   *      boolean, boolean)
1241   */
1242  private boolean shouldUseHeap(BlockType expectedBlockType, boolean cacheBlock) {
1243    if (!cacheConf.getBlockCache().isPresent()) {
1244      return false;
1245    }
1246
1247    // we only cache a block if cacheBlock is true and caching-on-read is enabled in CacheConfig
1248    // we can really only check for that if have an expectedBlockType
1249    if (expectedBlockType != null) {
1250      cacheBlock &= cacheConf.shouldCacheBlockOnRead(expectedBlockType.getCategory());
1251    }
1252
1253    if (!cacheConf.isCombinedBlockCache()) {
1254      // Block to cache in LruBlockCache must be an heap one, if caching enabled. So just allocate
1255      // block memory from heap for saving an extra off-heap to heap copying in that case.
1256      return cacheBlock;
1257    }
1258
1259    return cacheBlock && expectedBlockType != null && !expectedBlockType.isData();
1260  }
1261
1262  @Override
1263  public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final boolean cacheBlock,
1264    boolean pread, final boolean isCompaction, boolean updateCacheMetrics,
1265    BlockType expectedBlockType, DataBlockEncoding expectedDataBlockEncoding) throws IOException {
1266    return readBlock(dataBlockOffset, onDiskBlockSize, cacheBlock, pread, isCompaction,
1267      updateCacheMetrics, expectedBlockType, expectedDataBlockEncoding, false);
1268  }
1269
1270  @Override
1271  public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final boolean cacheBlock,
1272    boolean pread, final boolean isCompaction, boolean updateCacheMetrics,
1273    BlockType expectedBlockType, DataBlockEncoding expectedDataBlockEncoding, boolean cacheOnly)
1274    throws IOException {
1275    if (dataBlockIndexReader == null) {
1276      throw new IOException(path + " block index not loaded");
1277    }
1278    long trailerOffset = trailer.getLoadOnOpenDataOffset();
1279    if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) {
1280      throw new IOException("Requested block is out of range: " + dataBlockOffset
1281        + ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset()
1282        + ", trailer.getLoadOnOpenDataOffset: " + trailerOffset + ", path=" + path);
1283    }
1284    // For any given block from any given file, synchronize reads for said
1285    // block.
1286    // Without a cache, this synchronizing is needless overhead, but really
1287    // the other choice is to duplicate work (which the cache would prevent you
1288    // from doing).
1289
1290    BlockCacheKey cacheKey =
1291      new BlockCacheKey(path, dataBlockOffset, this.isPrimaryReplicaReader(), expectedBlockType);
1292    Attributes attributes = Attributes.of(BLOCK_CACHE_KEY_KEY, cacheKey.toString());
1293
1294    boolean useLock = false;
1295    IdLock.Entry lockEntry = null;
1296    final Span span = Span.current();
1297    try {
1298      while (true) {
1299        // Check cache for block. If found return.
1300        if (cacheConf.shouldReadBlockFromCache(expectedBlockType) && !cacheOnly) {
1301          if (useLock) {
1302            lockEntry = offsetLock.getLockEntry(dataBlockOffset);
1303          }
1304          // Try and get the block from the block cache. If the useLock variable is true then this
1305          // is the second time through the loop and it should not be counted as a block cache miss.
1306          HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, useLock, updateCacheMetrics,
1307            expectedBlockType, expectedDataBlockEncoding);
1308          if (cachedBlock != null) {
1309            if (LOG.isTraceEnabled()) {
1310              LOG.trace("Block for file {} is coming from Cache {}",
1311                Bytes.toString(cachedBlock.getHFileContext().getTableName()), cachedBlock);
1312            }
1313            span.addEvent("block cache hit", attributes);
1314            assert cachedBlock.isUnpacked() : "Packed block leak.";
1315            if (cachedBlock.getBlockType().isData()) {
1316              if (updateCacheMetrics) {
1317                HFile.DATABLOCK_READ_COUNT.increment();
1318              }
1319              // Validate encoding type for data blocks. We include encoding
1320              // type in the cache key, and we expect it to match on a cache hit.
1321              if (cachedBlock.getDataBlockEncoding() != dataBlockEncoder.getDataBlockEncoding()) {
1322                // Remember to release the block when in exceptional path.
1323                cacheConf.getBlockCache().ifPresent(cache -> {
1324                  returnAndEvictBlock(cache, cacheKey, cachedBlock);
1325                });
1326                throw new IOException("Cached block under key " + cacheKey + " "
1327                  + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: "
1328                  + dataBlockEncoder.getDataBlockEncoding() + "), path=" + path);
1329              }
1330            }
1331            // Cache-hit. Return!
1332            return cachedBlock;
1333          }
1334
1335          if (!useLock && cacheBlock && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) {
1336            // check cache again with lock
1337            useLock = true;
1338            continue;
1339          }
1340          // Carry on, please load.
1341        }
1342
1343        span.addEvent("block cache miss", attributes);
1344        // Load block from filesystem.
1345        HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread,
1346          !isCompaction, shouldUseHeap(expectedBlockType, cacheBlock));
1347        try {
1348          validateBlockType(hfileBlock, expectedBlockType);
1349        } catch (IOException e) {
1350          hfileBlock.release();
1351          throw e;
1352        }
1353        BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory();
1354        final boolean cacheCompressed = cacheConf.shouldCacheCompressed(category);
1355        final boolean cacheOnRead = cacheConf.shouldCacheBlockOnRead(category);
1356
1357        // Don't need the unpacked block back and we're storing the block in the cache compressed
1358        if (cacheOnly && cacheCompressed && cacheOnRead) {
1359          HFileBlock blockNoChecksum = BlockCacheUtil.getBlockForCaching(cacheConf, hfileBlock);
1360          cacheConf.getBlockCache().ifPresent(cache -> {
1361            LOG.debug("Skipping decompression of block {} in prefetch", cacheKey);
1362            // Cache the block if necessary
1363            if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) {
1364              cache.cacheBlock(cacheKey, blockNoChecksum, cacheConf.isInMemory(), cacheOnly);
1365            }
1366          });
1367
1368          if (updateCacheMetrics && hfileBlock.getBlockType().isData()) {
1369            HFile.DATABLOCK_READ_COUNT.increment();
1370          }
1371          return blockNoChecksum;
1372        }
1373        HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader);
1374        HFileBlock unpackedNoChecksum = BlockCacheUtil.getBlockForCaching(cacheConf, unpacked);
1375        // Cache the block if necessary
1376        cacheConf.getBlockCache().ifPresent(cache -> {
1377          if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) {
1378            // Using the wait on cache during compaction and prefetching.
1379            cache.cacheBlock(cacheKey,
1380              cacheCompressed
1381                ? BlockCacheUtil.getBlockForCaching(cacheConf, hfileBlock)
1382                : unpackedNoChecksum,
1383              cacheConf.isInMemory(), cacheOnly);
1384          }
1385        });
1386        if (unpacked != hfileBlock) {
1387          // End of life here if hfileBlock is an independent block.
1388          hfileBlock.release();
1389        }
1390        if (updateCacheMetrics && hfileBlock.getBlockType().isData()) {
1391          HFile.DATABLOCK_READ_COUNT.increment();
1392        }
1393
1394        return unpackedNoChecksum;
1395      }
1396    } finally {
1397      if (lockEntry != null) {
1398        offsetLock.releaseLockEntry(lockEntry);
1399      }
1400    }
1401  }
1402
1403  @Override
1404  public boolean hasMVCCInfo() {
1405    return fileInfo.shouldIncludeMemStoreTS() && fileInfo.isDecodeMemstoreTS();
1406  }
1407
1408  /**
1409   * Compares the actual type of a block retrieved from cache or disk with its expected type and
1410   * throws an exception in case of a mismatch. Expected block type of {@link BlockType#DATA} is
1411   * considered to match the actual block type [@link {@link BlockType#ENCODED_DATA} as well.
1412   * @param block             a block retrieved from cache or disk
1413   * @param expectedBlockType the expected block type, or null to skip the check
1414   */
1415  private void validateBlockType(HFileBlock block, BlockType expectedBlockType) throws IOException {
1416    if (expectedBlockType == null) {
1417      return;
1418    }
1419    BlockType actualBlockType = block.getBlockType();
1420    if (expectedBlockType.isData() && actualBlockType.isData()) {
1421      // We consider DATA to match ENCODED_DATA for the purpose of this
1422      // verification.
1423      return;
1424    }
1425    if (actualBlockType != expectedBlockType) {
1426      throw new IOException("Expected block type " + expectedBlockType + ", " + "but got "
1427        + actualBlockType + ": " + block + ", path=" + path);
1428    }
1429  }
1430
1431  /**
1432   * @return Last key as cell in the file. May be null if file has no entries. Note that this is not
1433   *         the last row key, but it is the Cell representation of the last key
1434   */
1435  @Override
1436  public Optional<ExtendedCell> getLastKey() {
1437    return dataBlockIndexReader.isEmpty()
1438      ? Optional.empty()
1439      : Optional.of(fileInfo.getLastKeyCell());
1440  }
1441
1442  /**
1443   * @return Midkey for this file. We work with block boundaries only so returned midkey is an
1444   *         approximation only.
1445   */
1446  @Override
1447  public Optional<ExtendedCell> midKey() throws IOException {
1448    return Optional.ofNullable(dataBlockIndexReader.midkey(this));
1449  }
1450
1451  @Override
1452  public void close() throws IOException {
1453    close(cacheConf.shouldEvictOnClose());
1454  }
1455
1456  @Override
1457  public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) {
1458    return dataBlockEncoder.getEffectiveEncodingInCache(isCompaction);
1459  }
1460
1461  /** For testing */
1462  @Override
1463  public HFileBlock.FSReader getUncachedBlockReader() {
1464    return fsBlockReader;
1465  }
1466
1467  /**
1468   * Scanner that operates on encoded data blocks.
1469   */
1470  protected static class EncodedScanner extends HFileScannerImpl {
1471    private final HFileBlockDecodingContext decodingCtx;
1472    private final DataBlockEncoder.EncodedSeeker seeker;
1473    private final DataBlockEncoder dataBlockEncoder;
1474
1475    public EncodedScanner(HFile.Reader reader, boolean cacheBlocks, boolean pread,
1476      boolean isCompaction, HFileContext meta, Configuration conf) {
1477      super(reader, cacheBlocks, pread, isCompaction);
1478      DataBlockEncoding encoding = reader.getDataBlockEncoding();
1479      dataBlockEncoder = encoding.getEncoder();
1480      decodingCtx = dataBlockEncoder.newDataBlockDecodingContext(conf, meta);
1481      seeker = dataBlockEncoder.createSeeker(decodingCtx);
1482    }
1483
1484    @Override
1485    public boolean isSeeked() {
1486      return curBlock != null;
1487    }
1488
1489    @Override
1490    public void setNonSeekedState() {
1491      reset();
1492    }
1493
1494    /**
1495     * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first
1496     * key/value pair.
1497     * @param newBlock the block to make current, and read by {@link HFileReaderImpl#readBlock},
1498     *                 it's a totally new block with new allocated {@link ByteBuff}, so if no
1499     *                 further reference to this block, we should release it carefully.
1500     */
1501    @Override
1502    protected void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException {
1503      try {
1504        // sanity checks
1505        if (newBlock.getBlockType() != BlockType.ENCODED_DATA) {
1506          throw new IllegalStateException("EncodedScanner works only on encoded data blocks");
1507        }
1508        short dataBlockEncoderId = newBlock.getDataBlockEncodingId();
1509        if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) {
1510          String encoderCls = dataBlockEncoder.getClass().getName();
1511          throw new CorruptHFileException(
1512            "Encoder " + encoderCls + " doesn't support data block encoding "
1513              + DataBlockEncoding.getNameFromId(dataBlockEncoderId) + ",path=" + reader.getPath());
1514        }
1515        updateCurrBlockRef(newBlock);
1516        ByteBuff encodedBuffer = getEncodedBuffer(newBlock);
1517        seeker.setCurrentBuffer(encodedBuffer);
1518      } finally {
1519        releaseIfNotCurBlock(newBlock);
1520      }
1521      // Reset the next indexed key
1522      this.nextIndexedKey = null;
1523    }
1524
1525    private ByteBuff getEncodedBuffer(HFileBlock newBlock) {
1526      ByteBuff origBlock = newBlock.getBufferReadOnly();
1527      int pos = newBlock.headerSize() + DataBlockEncoding.ID_SIZE;
1528      origBlock.position(pos);
1529      origBlock
1530        .limit(pos + newBlock.getUncompressedSizeWithoutHeader() - DataBlockEncoding.ID_SIZE);
1531      return origBlock.slice();
1532    }
1533
1534    @Override
1535    protected boolean processFirstDataBlock() throws IOException {
1536      seeker.rewind();
1537      return true;
1538    }
1539
1540    @Override
1541    public boolean next() throws IOException {
1542      boolean isValid = seeker.next();
1543      if (!isValid) {
1544        HFileBlock newBlock = readNextDataBlock();
1545        isValid = newBlock != null;
1546        if (isValid) {
1547          updateCurrentBlock(newBlock);
1548        } else {
1549          setNonSeekedState();
1550        }
1551      }
1552      return isValid;
1553    }
1554
1555    @Override
1556    public ExtendedCell getKey() {
1557      assertValidSeek();
1558      return seeker.getKey();
1559    }
1560
1561    @Override
1562    public ByteBuffer getValue() {
1563      assertValidSeek();
1564      return seeker.getValueShallowCopy();
1565    }
1566
1567    @Override
1568    public ExtendedCell getCell() {
1569      if (this.curBlock == null) {
1570        return null;
1571      }
1572      return seeker.getCell();
1573    }
1574
1575    private void assertValidSeek() {
1576      if (this.curBlock == null) {
1577        throw new NotSeekedException(reader.getPath());
1578      }
1579    }
1580
1581    @Override
1582    protected ExtendedCell getFirstKeyCellInBlock(HFileBlock curBlock) {
1583      return dataBlockEncoder.getFirstKeyCellInBlock(getEncodedBuffer(curBlock));
1584    }
1585
1586    @Override
1587    protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, ExtendedCell nextIndexedKey,
1588      boolean rewind, ExtendedCell key, boolean seekBefore) throws IOException {
1589      if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
1590        updateCurrentBlock(seekToBlock);
1591      } else if (rewind) {
1592        seeker.rewind();
1593      }
1594      this.nextIndexedKey = nextIndexedKey;
1595      return seeker.seekToKeyInBlock(key, seekBefore);
1596    }
1597
1598    @Override
1599    public int compareKey(CellComparator comparator, ExtendedCell key) {
1600      return seeker.compareKey(comparator, key);
1601    }
1602  }
1603
1604  /**
1605   * Returns a buffer with the Bloom filter metadata. The caller takes ownership of the buffer.
1606   */
1607  @Override
1608  public DataInput getGeneralBloomFilterMetadata() throws IOException {
1609    return this.getBloomFilterMetadata(BlockType.GENERAL_BLOOM_META);
1610  }
1611
1612  @Override
1613  public DataInput getDeleteBloomFilterMetadata() throws IOException {
1614    return this.getBloomFilterMetadata(BlockType.DELETE_FAMILY_BLOOM_META);
1615  }
1616
1617  private DataInput getBloomFilterMetadata(BlockType blockType) throws IOException {
1618    if (
1619      blockType != BlockType.GENERAL_BLOOM_META && blockType != BlockType.DELETE_FAMILY_BLOOM_META
1620    ) {
1621      throw new RuntimeException(
1622        "Block Type: " + blockType.toString() + " is not supported, path=" + path);
1623    }
1624
1625    for (HFileBlock b : fileInfo.getLoadOnOpenBlocks()) {
1626      if (b.getBlockType() == blockType) {
1627        return b.getByteStream();
1628      }
1629    }
1630    return null;
1631  }
1632
1633  public boolean isFileInfoLoaded() {
1634    return true; // We load file info in constructor in version 2.
1635  }
1636
1637  @Override
1638  public HFileContext getFileContext() {
1639    return hfileContext;
1640  }
1641
1642  /**
1643   * Returns false if block prefetching was requested for this file and has not completed, true
1644   * otherwise
1645   */
1646  @Override
1647  public boolean prefetchComplete() {
1648    return PrefetchExecutor.isCompleted(path);
1649  }
1650
1651  /**
1652   * Returns true if block prefetching was started after waiting for specified delay, false
1653   * otherwise
1654   */
1655  @Override
1656  public boolean prefetchStarted() {
1657    return PrefetchExecutor.isPrefetchStarted();
1658  }
1659
1660  /**
1661   * Create a Scanner on this file. No seeks or reads are done on creation. Call
1662   * {@link HFileScanner#seekTo(ExtendedCell)} to position an start the read. There is nothing to
1663   * clean up in a Scanner. Letting go of your references to the scanner is sufficient. NOTE: Do not
1664   * use this overload of getScanner for compactions. See
1665   * {@link #getScanner(Configuration, boolean, boolean, boolean)}
1666   * @param conf        Store configuration.
1667   * @param cacheBlocks True if we should cache blocks read in by this scanner.
1668   * @param pread       Use positional read rather than seek+read if true (pread is better for
1669   *                    random reads, seek+read is better scanning).
1670   * @return Scanner on this file.
1671   */
1672  @Override
1673  public HFileScanner getScanner(Configuration conf, boolean cacheBlocks, final boolean pread) {
1674    return getScanner(conf, cacheBlocks, pread, false);
1675  }
1676
1677  /**
1678   * Create a Scanner on this file. No seeks or reads are done on creation. Call
1679   * {@link HFileScanner#seekTo(ExtendedCell)} to position an start the read. There is nothing to
1680   * clean up in a Scanner. Letting go of your references to the scanner is sufficient.
1681   * @param conf         Store configuration.
1682   * @param cacheBlocks  True if we should cache blocks read in by this scanner.
1683   * @param pread        Use positional read rather than seek+read if true (pread is better for
1684   *                     random reads, seek+read is better scanning).
1685   * @param isCompaction is scanner being used for a compaction?
1686   * @return Scanner on this file.
1687   */
1688  @Override
1689  public HFileScanner getScanner(Configuration conf, boolean cacheBlocks, final boolean pread,
1690    final boolean isCompaction) {
1691    if (dataBlockEncoder.useEncodedScanner()) {
1692      return new EncodedScanner(this, cacheBlocks, pread, isCompaction, this.hfileContext, conf);
1693    }
1694    return new HFileScannerImpl(this, cacheBlocks, pread, isCompaction);
1695  }
1696
1697  public int getMajorVersion() {
1698    return 3;
1699  }
1700
1701  @Override
1702  public void unbufferStream() {
1703    fsBlockReader.unbufferStream();
1704  }
1705}