001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.apache.hadoop.hbase.trace.HBaseSemanticAttributes.BLOCK_CACHE_KEY_KEY;
021
022import io.opentelemetry.api.common.Attributes;
023import io.opentelemetry.api.trace.Span;
024import java.io.DataInput;
025import java.io.IOException;
026import java.nio.ByteBuffer;
027import java.util.ArrayList;
028import java.util.Optional;
029import org.apache.hadoop.conf.Configurable;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.fs.Path;
032import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue;
033import org.apache.hadoop.hbase.Cell;
034import org.apache.hadoop.hbase.CellComparator;
035import org.apache.hadoop.hbase.CellUtil;
036import org.apache.hadoop.hbase.HConstants;
037import org.apache.hadoop.hbase.KeyValue;
038import org.apache.hadoop.hbase.PrivateCellUtil;
039import org.apache.hadoop.hbase.SizeCachedByteBufferKeyValue;
040import org.apache.hadoop.hbase.SizeCachedKeyValue;
041import org.apache.hadoop.hbase.SizeCachedNoTagsByteBufferKeyValue;
042import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue;
043import org.apache.hadoop.hbase.io.compress.Compression;
044import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
045import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
046import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
047import org.apache.hadoop.hbase.nio.ByteBuff;
048import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
049import org.apache.hadoop.hbase.util.ByteBufferUtils;
050import org.apache.hadoop.hbase.util.Bytes;
051import org.apache.hadoop.hbase.util.IdLock;
052import org.apache.hadoop.hbase.util.ObjectIntPair;
053import org.apache.hadoop.io.WritableUtils;
054import org.apache.yetus.audience.InterfaceAudience;
055import org.slf4j.Logger;
056import org.slf4j.LoggerFactory;
057
058/**
059 * Implementation that can handle all hfile versions of {@link HFile.Reader}.
060 */
061@InterfaceAudience.Private
062@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
063public abstract class HFileReaderImpl implements HFile.Reader, Configurable {
064  // This class is HFileReaderV3 + HFileReaderV2 + AbstractHFileReader all squashed together into
065  // one file. Ditto for all the HFileReader.ScannerV? implementations. I was running up against
066  // the MaxInlineLevel limit because too many tiers involved reading from an hfile. Was also hard
067  // to navigate the source code when so many classes participating in read.
068  private static final Logger LOG = LoggerFactory.getLogger(HFileReaderImpl.class);
069
070  /** Data block index reader keeping the root data index in memory */
071  protected HFileBlockIndex.CellBasedKeyBlockIndexReader dataBlockIndexReader;
072
073  /** Meta block index reader -- always single level */
074  protected HFileBlockIndex.ByteArrayKeyBlockIndexReader metaBlockIndexReader;
075
076  protected FixedFileTrailer trailer;
077
078  private final boolean primaryReplicaReader;
079
080  /**
081   * What kind of data block encoding should be used while reading, writing, and handling cache.
082   */
083  protected HFileDataBlockEncoder dataBlockEncoder = NoOpDataBlockEncoder.INSTANCE;
084
085  /** Block cache configuration. */
086  protected final CacheConfig cacheConf;
087
088  protected ReaderContext context;
089
090  protected final HFileInfo fileInfo;
091
092  /** Path of file */
093  protected final Path path;
094
095  /** File name to be used for block names */
096  protected final String name;
097
098  private Configuration conf;
099
100  protected HFileContext hfileContext;
101
102  /** Filesystem-level block reader. */
103  protected HFileBlock.FSReader fsBlockReader;
104
105  /**
106   * A "sparse lock" implementation allowing to lock on a particular block identified by offset. The
107   * purpose of this is to avoid two clients loading the same block, and have all but one client
108   * wait to get the block from the cache.
109   */
110  private IdLock offsetLock = new IdLock();
111
112  /** Minimum minor version supported by this HFile format */
113  static final int MIN_MINOR_VERSION = 0;
114
115  /** Maximum minor version supported by this HFile format */
116  // We went to version 2 when we moved to pb'ing fileinfo and the trailer on
117  // the file. This version can read Writables version 1.
118  static final int MAX_MINOR_VERSION = 3;
119
120  /** Minor versions starting with this number have faked index key */
121  static final int MINOR_VERSION_WITH_FAKED_KEY = 3;
122
123  /**
124   * Opens a HFile.
125   * @param context   Reader context info
126   * @param fileInfo  HFile info
127   * @param cacheConf Cache configuration.
128   * @param conf      Configuration
129   */
130  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
131  public HFileReaderImpl(ReaderContext context, HFileInfo fileInfo, CacheConfig cacheConf,
132    Configuration conf) throws IOException {
133    this.cacheConf = cacheConf;
134    this.context = context;
135    this.path = context.getFilePath();
136    this.name = path.getName();
137    this.conf = conf;
138    this.primaryReplicaReader = context.isPrimaryReplicaReader();
139    this.fileInfo = fileInfo;
140    this.trailer = fileInfo.getTrailer();
141    this.hfileContext = fileInfo.getHFileContext();
142    this.fsBlockReader =
143      new HFileBlock.FSReaderImpl(context, hfileContext, cacheConf.getByteBuffAllocator(), conf);
144    this.dataBlockEncoder = HFileDataBlockEncoderImpl.createFromFileInfo(fileInfo);
145    fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf);
146    dataBlockIndexReader = fileInfo.getDataBlockIndexReader();
147    metaBlockIndexReader = fileInfo.getMetaBlockIndexReader();
148  }
149
150  @SuppressWarnings("serial")
151  public static class BlockIndexNotLoadedException extends IllegalStateException {
152    public BlockIndexNotLoadedException(Path path) {
153      // Add a message in case anyone relies on it as opposed to class name.
154      super(path + " block index not loaded");
155    }
156  }
157
158  private Optional<String> toStringFirstKey() {
159    return getFirstKey().map(CellUtil::getCellKeyAsString);
160  }
161
162  private Optional<String> toStringLastKey() {
163    return getLastKey().map(CellUtil::getCellKeyAsString);
164  }
165
166  @Override
167  public String toString() {
168    return "reader=" + path.toString()
169      + (!isFileInfoLoaded()
170        ? ""
171        : ", compression=" + trailer.getCompressionCodec().getName() + ", cacheConf=" + cacheConf
172          + ", firstKey=" + toStringFirstKey() + ", lastKey=" + toStringLastKey())
173      + ", avgKeyLen=" + fileInfo.getAvgKeyLen() + ", avgValueLen=" + fileInfo.getAvgValueLen()
174      + ", entries=" + trailer.getEntryCount() + ", length=" + context.getFileSize();
175  }
176
177  @Override
178  public long length() {
179    return context.getFileSize();
180  }
181
182  /**
183   * @return the first key in the file. May be null if file has no entries. Note that this is not
184   *         the first row key, but rather the byte form of the first KeyValue.
185   */
186  @Override
187  public Optional<Cell> getFirstKey() {
188    if (dataBlockIndexReader == null) {
189      throw new BlockIndexNotLoadedException(path);
190    }
191    return dataBlockIndexReader.isEmpty()
192      ? Optional.empty()
193      : Optional.of(dataBlockIndexReader.getRootBlockKey(0));
194  }
195
196  /**
197   * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's patch goes in to
198   * eliminate {@link KeyValue} here.
199   * @return the first row key, or null if the file is empty.
200   */
201  @Override
202  public Optional<byte[]> getFirstRowKey() {
203    // We have to copy the row part to form the row key alone
204    return getFirstKey().map(CellUtil::cloneRow);
205  }
206
207  /**
208   * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's patch goes in to
209   * eliminate {@link KeyValue} here.
210   * @return the last row key, or null if the file is empty.
211   */
212  @Override
213  public Optional<byte[]> getLastRowKey() {
214    // We have to copy the row part to form the row key alone
215    return getLastKey().map(CellUtil::cloneRow);
216  }
217
218  /** Returns number of KV entries in this HFile */
219  @Override
220  public long getEntries() {
221    return trailer.getEntryCount();
222  }
223
224  /** Returns comparator */
225  @Override
226  public CellComparator getComparator() {
227    return this.hfileContext.getCellComparator();
228  }
229
230  public Compression.Algorithm getCompressionAlgorithm() {
231    return trailer.getCompressionCodec();
232  }
233
234  /**
235   * @return the total heap size of data and meta block indexes in bytes. Does not take into account
236   *         non-root blocks of a multilevel data index.
237   */
238  @Override
239  public long indexSize() {
240    return (dataBlockIndexReader != null ? dataBlockIndexReader.heapSize() : 0)
241      + ((metaBlockIndexReader != null) ? metaBlockIndexReader.heapSize() : 0);
242  }
243
244  @Override
245  public String getName() {
246    return name;
247  }
248
249  @Override
250  public void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder) {
251    this.dataBlockEncoder = dataBlockEncoder;
252    this.fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf);
253  }
254
255  @Override
256  public void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader) {
257    this.dataBlockIndexReader = reader;
258  }
259
260  @Override
261  public HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader() {
262    return dataBlockIndexReader;
263  }
264
265  @Override
266  public void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader) {
267    this.metaBlockIndexReader = reader;
268  }
269
270  @Override
271  public HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader() {
272    return metaBlockIndexReader;
273  }
274
275  @Override
276  public FixedFileTrailer getTrailer() {
277    return trailer;
278  }
279
280  @Override
281  public ReaderContext getContext() {
282    return this.context;
283  }
284
285  @Override
286  public HFileInfo getHFileInfo() {
287    return this.fileInfo;
288  }
289
290  @Override
291  public boolean isPrimaryReplicaReader() {
292    return primaryReplicaReader;
293  }
294
295  /**
296   * An exception thrown when an operation requiring a scanner to be seeked is invoked on a scanner
297   * that is not seeked.
298   */
299  @SuppressWarnings("serial")
300  public static class NotSeekedException extends IllegalStateException {
301    public NotSeekedException(Path path) {
302      super(path + " not seeked to a key/value");
303    }
304  }
305
306  protected static class HFileScannerImpl implements HFileScanner {
307    private ByteBuff blockBuffer;
308    protected final boolean cacheBlocks;
309    protected final boolean pread;
310    protected final boolean isCompaction;
311    private int currKeyLen;
312    private int currValueLen;
313    private int currMemstoreTSLen;
314    private long currMemstoreTS;
315    protected final HFile.Reader reader;
316    private int currTagsLen;
317    private short rowLen;
318    // buffer backed keyonlyKV
319    private ByteBufferKeyOnlyKeyValue bufBackedKeyOnlyKv = new ByteBufferKeyOnlyKeyValue();
320    // A pair for reusing in blockSeek() so that we don't garbage lot of objects
321    final ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>();
322
323    /**
324     * The next indexed key is to keep track of the indexed key of the next data block. If the
325     * nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the current data block is the
326     * last data block. If the nextIndexedKey is null, it means the nextIndexedKey has not been
327     * loaded yet.
328     */
329    protected Cell nextIndexedKey;
330    // Current block being used. NOTICE: DON't release curBlock separately except in shipped() or
331    // close() methods. Because the shipped() or close() will do the release finally, even if any
332    // exception occur the curBlock will be released by the close() method (see
333    // RegionScannerImpl#handleException). Call the releaseIfNotCurBlock() to release the
334    // unreferenced block please.
335    protected HFileBlock curBlock;
336    // Previous blocks that were used in the course of the read
337    protected final ArrayList<HFileBlock> prevBlocks = new ArrayList<>();
338
339    public HFileScannerImpl(final HFile.Reader reader, final boolean cacheBlocks,
340      final boolean pread, final boolean isCompaction) {
341      this.reader = reader;
342      this.cacheBlocks = cacheBlocks;
343      this.pread = pread;
344      this.isCompaction = isCompaction;
345    }
346
347    void updateCurrBlockRef(HFileBlock block) {
348      if (block != null && curBlock != null && block.getOffset() == curBlock.getOffset()) {
349        return;
350      }
351      if (this.curBlock != null && this.curBlock.isSharedMem()) {
352        prevBlocks.add(this.curBlock);
353      }
354      this.curBlock = block;
355    }
356
357    void reset() {
358      // We don't have to keep ref to heap block
359      if (this.curBlock != null && this.curBlock.isSharedMem()) {
360        this.prevBlocks.add(this.curBlock);
361      }
362      this.curBlock = null;
363    }
364
365    private void returnBlocks(boolean returnAll) {
366      this.prevBlocks.forEach(HFileBlock::release);
367      this.prevBlocks.clear();
368      if (returnAll && this.curBlock != null) {
369        this.curBlock.release();
370        this.curBlock = null;
371      }
372    }
373
374    @Override
375    public boolean isSeeked() {
376      return blockBuffer != null;
377    }
378
379    @Override
380    public String toString() {
381      return "HFileScanner for reader " + String.valueOf(getReader());
382    }
383
384    protected void assertSeeked() {
385      if (!isSeeked()) {
386        throw new NotSeekedException(reader.getPath());
387      }
388    }
389
390    @Override
391    public HFile.Reader getReader() {
392      return reader;
393    }
394
395    // From non encoded HFiles, we always read back KeyValue or its descendant.(Note: When HFile
396    // block is in DBB, it will be OffheapKV). So all parts of the Cell is in a contiguous
397    // array/buffer. How many bytes we should wrap to make the KV is what this method returns.
398    private int getKVBufSize() {
399      int kvBufSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen;
400      if (currTagsLen > 0) {
401        kvBufSize += Bytes.SIZEOF_SHORT + currTagsLen;
402      }
403      return kvBufSize;
404    }
405
406    @Override
407    public void close() {
408      if (!pread) {
409        // For seek + pread stream socket should be closed when the scanner is closed. HBASE-9393
410        reader.unbufferStream();
411      }
412      this.returnBlocks(true);
413    }
414
415    // Returns the #bytes in HFile for the current cell. Used to skip these many bytes in current
416    // HFile block's buffer so as to position to the next cell.
417    private int getCurCellSerializedSize() {
418      int curCellSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen + currMemstoreTSLen;
419      if (this.reader.getFileContext().isIncludesTags()) {
420        curCellSize += Bytes.SIZEOF_SHORT + currTagsLen;
421      }
422      return curCellSize;
423    }
424
425    protected void readKeyValueLen() {
426      // This is a hot method. We go out of our way to make this method short so it can be
427      // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves
428      // because it is faster than going via range-checked ByteBuffer methods or going through a
429      // byte buffer array a byte at a time.
430      // Get a long at a time rather than read two individual ints. In micro-benchmarking, even
431      // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints.
432      // Trying to imitate what was done - need to profile if this is better or
433      // earlier way is better by doing mark and reset?
434      // But ensure that you read long instead of two ints
435      long ll = blockBuffer.getLongAfterPosition(0);
436      // Read top half as an int of key length and bottom int as value length
437      this.currKeyLen = (int) (ll >> Integer.SIZE);
438      this.currValueLen = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
439      checkKeyValueLen();
440      this.rowLen = blockBuffer.getShortAfterPosition(Bytes.SIZEOF_LONG);
441      // Move position past the key and value lengths and then beyond the key and value
442      int p = (Bytes.SIZEOF_LONG + currKeyLen + currValueLen);
443      if (reader.getFileContext().isIncludesTags()) {
444        // Tags length is a short.
445        this.currTagsLen = blockBuffer.getShortAfterPosition(p);
446        checkTagsLen();
447        p += (Bytes.SIZEOF_SHORT + currTagsLen);
448      }
449      readMvccVersion(p);
450    }
451
452    private final void checkTagsLen() {
453      if (checkLen(this.currTagsLen)) {
454        throw new IllegalStateException(
455          "Invalid currTagsLen " + this.currTagsLen + ". Block offset: " + curBlock.getOffset()
456            + ", block length: " + this.blockBuffer.limit() + ", position: "
457            + this.blockBuffer.position() + " (without header)." + " path=" + reader.getPath());
458      }
459    }
460
461    /**
462     * Read mvcc. Does checks to see if we even need to read the mvcc at all.
463     */
464    protected void readMvccVersion(final int offsetFromPos) {
465      // See if we even need to decode mvcc.
466      if (!this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
467        return;
468      }
469      if (!this.reader.getHFileInfo().isDecodeMemstoreTS()) {
470        currMemstoreTS = 0;
471        currMemstoreTSLen = 1;
472        return;
473      }
474      _readMvccVersion(offsetFromPos);
475    }
476
477    /**
478     * Actually do the mvcc read. Does no checks.
479     */
480    private void _readMvccVersion(int offsetFromPos) {
481      // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e.
482      // previous if one-byte vint, we'd redo the vint call to find int size.
483      // Also the method is kept small so can be inlined.
484      byte firstByte = blockBuffer.getByteAfterPosition(offsetFromPos);
485      int len = WritableUtils.decodeVIntSize(firstByte);
486      if (len == 1) {
487        this.currMemstoreTS = firstByte;
488      } else {
489        int remaining = len - 1;
490        long i = 0;
491        offsetFromPos++;
492        if (remaining >= Bytes.SIZEOF_INT) {
493          // The int read has to be converted to unsigned long so the & op
494          i = (blockBuffer.getIntAfterPosition(offsetFromPos) & 0x00000000ffffffffL);
495          remaining -= Bytes.SIZEOF_INT;
496          offsetFromPos += Bytes.SIZEOF_INT;
497        }
498        if (remaining >= Bytes.SIZEOF_SHORT) {
499          short s = blockBuffer.getShortAfterPosition(offsetFromPos);
500          i = i << 16;
501          i = i | (s & 0xFFFF);
502          remaining -= Bytes.SIZEOF_SHORT;
503          offsetFromPos += Bytes.SIZEOF_SHORT;
504        }
505        for (int idx = 0; idx < remaining; idx++) {
506          byte b = blockBuffer.getByteAfterPosition(offsetFromPos + idx);
507          i = i << 8;
508          i = i | (b & 0xFF);
509        }
510        currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
511      }
512      this.currMemstoreTSLen = len;
513    }
514
515    /**
516     * Within a loaded block, seek looking for the last key that is smaller than (or equal to?) the
517     * key we are interested in. A note on the seekBefore: if you have seekBefore = true, AND the
518     * first key in the block = key, then you'll get thrown exceptions. The caller has to check for
519     * that case and load the previous block as appropriate. the key to find find the key before the
520     * given key in case of exact match.
521     * @return 0 in case of an exact key match, 1 in case of an inexact match, -2 in case of an
522     *         inexact match and furthermore, the input key less than the first key of current
523     *         block(e.g. using a faked index key)
524     */
525    protected int blockSeek(Cell key, boolean seekBefore) {
526      int klen, vlen, tlen = 0;
527      int lastKeyValueSize = -1;
528      int offsetFromPos;
529      do {
530        offsetFromPos = 0;
531        // Better to ensure that we use the BB Utils here
532        long ll = blockBuffer.getLongAfterPosition(offsetFromPos);
533        klen = (int) (ll >> Integer.SIZE);
534        vlen = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
535        if (checkKeyLen(klen) || checkLen(vlen)) {
536          throw new IllegalStateException(
537            "Invalid klen " + klen + " or vlen " + vlen + ". Block offset: " + curBlock.getOffset()
538              + ", block length: " + blockBuffer.limit() + ", position: " + blockBuffer.position()
539              + " (without header)." + " path=" + reader.getPath());
540        }
541        offsetFromPos += Bytes.SIZEOF_LONG;
542        this.rowLen = blockBuffer.getShortAfterPosition(offsetFromPos);
543        blockBuffer.asSubByteBuffer(blockBuffer.position() + offsetFromPos, klen, pair);
544        bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), klen, rowLen);
545        int comp =
546          PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, bufBackedKeyOnlyKv);
547        offsetFromPos += klen + vlen;
548        if (this.reader.getFileContext().isIncludesTags()) {
549          // Read short as unsigned, high byte first
550          tlen = ((blockBuffer.getByteAfterPosition(offsetFromPos) & 0xff) << 8)
551            ^ (blockBuffer.getByteAfterPosition(offsetFromPos + 1) & 0xff);
552          if (checkLen(tlen)) {
553            throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: "
554              + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
555              + blockBuffer.position() + " (without header)." + " path=" + reader.getPath());
556          }
557          // add the two bytes read for the tags.
558          offsetFromPos += tlen + (Bytes.SIZEOF_SHORT);
559        }
560        if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
561          // Directly read the mvcc based on current position
562          readMvccVersion(offsetFromPos);
563        }
564        if (comp == 0) {
565          if (seekBefore) {
566            if (lastKeyValueSize < 0) {
567              throw new IllegalStateException("blockSeek with seekBefore "
568                + "at the first key of the block: key=" + CellUtil.getCellKeyAsString(key)
569                + ", blockOffset=" + curBlock.getOffset() + ", onDiskSize="
570                + curBlock.getOnDiskSizeWithHeader() + ", path=" + reader.getPath());
571            }
572            blockBuffer.moveBack(lastKeyValueSize);
573            readKeyValueLen();
574            return 1; // non exact match.
575          }
576          currKeyLen = klen;
577          currValueLen = vlen;
578          currTagsLen = tlen;
579          return 0; // indicate exact match
580        } else if (comp < 0) {
581          if (lastKeyValueSize > 0) {
582            blockBuffer.moveBack(lastKeyValueSize);
583          }
584          readKeyValueLen();
585          if (lastKeyValueSize == -1 && blockBuffer.position() == 0) {
586            return HConstants.INDEX_KEY_MAGIC;
587          }
588          return 1;
589        }
590        // The size of this key/value tuple, including key/value length fields.
591        lastKeyValueSize = klen + vlen + currMemstoreTSLen + KEY_VALUE_LEN_SIZE;
592        // include tag length also if tags included with KV
593        if (reader.getFileContext().isIncludesTags()) {
594          lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT;
595        }
596        blockBuffer.skip(lastKeyValueSize);
597      } while (blockBuffer.hasRemaining());
598
599      // Seek to the last key we successfully read. This will happen if this is
600      // the last key/value pair in the file, in which case the following call
601      // to next() has to return false.
602      blockBuffer.moveBack(lastKeyValueSize);
603      readKeyValueLen();
604      return 1; // didn't exactly find it.
605    }
606
607    @Override
608    public Cell getNextIndexedKey() {
609      return nextIndexedKey;
610    }
611
612    @Override
613    public int seekTo(Cell key) throws IOException {
614      return seekTo(key, true);
615    }
616
617    @Override
618    public int reseekTo(Cell key) throws IOException {
619      int compared;
620      if (isSeeked()) {
621        compared = compareKey(reader.getComparator(), key);
622        if (compared < 1) {
623          // If the required key is less than or equal to current key, then
624          // don't do anything.
625          return compared;
626        } else {
627          // The comparison with no_next_index_key has to be checked
628          if (
629            this.nextIndexedKey != null && (this.nextIndexedKey
630                == KeyValueScanner.NO_NEXT_INDEXED_KEY
631              || PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, nextIndexedKey)
632                  < 0)
633          ) {
634            // The reader shall continue to scan the current data block instead
635            // of querying the
636            // block index as long as it knows the target key is strictly
637            // smaller than
638            // the next indexed key or the current data block is the last data
639            // block.
640            return loadBlockAndSeekToKey(this.curBlock, nextIndexedKey, false, key, false);
641          }
642        }
643      }
644      // Don't rewind on a reseek operation, because reseek implies that we are
645      // always going forward in the file.
646      return seekTo(key, false);
647    }
648
649    /**
650     * An internal API function. Seek to the given key, optionally rewinding to the first key of the
651     * block before doing the seek.
652     * @param key    - a cell representing the key that we need to fetch
653     * @param rewind whether to rewind to the first key of the block before doing the seek. If this
654     *               is false, we are assuming we never go back, otherwise the result is undefined.
655     * @return -1 if the key is earlier than the first key of the file, 0 if we are at the given
656     *         key, 1 if we are past the given key -2 if the key is earlier than the first key of
657     *         the file while using a faked index key
658     */
659    public int seekTo(Cell key, boolean rewind) throws IOException {
660      HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader();
661      BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, curBlock,
662        cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding(), reader);
663      if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) {
664        // This happens if the key e.g. falls before the beginning of the file.
665        return -1;
666      }
667      return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(),
668        blockWithScanInfo.getNextIndexedKey(), rewind, key, false);
669    }
670
671    @Override
672    public boolean seekBefore(Cell key) throws IOException {
673      HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, curBlock,
674        cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction), reader);
675      if (seekToBlock == null) {
676        return false;
677      }
678      Cell firstKey = getFirstKeyCellInBlock(seekToBlock);
679      if (PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), firstKey, key) >= 0) {
680        long previousBlockOffset = seekToBlock.getPrevBlockOffset();
681        // The key we are interested in
682        if (previousBlockOffset == -1) {
683          // we have a 'problem', the key we want is the first of the file.
684          releaseIfNotCurBlock(seekToBlock);
685          return false;
686        }
687
688        // The first key in the current block 'seekToBlock' is greater than the given
689        // seekBefore key. We will go ahead by reading the next block that satisfies the
690        // given key. Return the current block before reading the next one.
691        releaseIfNotCurBlock(seekToBlock);
692        // It is important that we compute and pass onDiskSize to the block
693        // reader so that it does not have to read the header separately to
694        // figure out the size. Currently, we do not have a way to do this
695        // correctly in the general case however.
696        // TODO: See https://issues.apache.org/jira/browse/HBASE-14576
697        int prevBlockSize = -1;
698        seekToBlock = reader.readBlock(previousBlockOffset, prevBlockSize, cacheBlocks, pread,
699          isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
700        // TODO shortcut: seek forward in this block to the last key of the
701        // block.
702      }
703      loadBlockAndSeekToKey(seekToBlock, firstKey, true, key, true);
704      return true;
705    }
706
707    /**
708     * The curBlock will be released by shipping or close method, so only need to consider releasing
709     * the block, which was read from HFile before and not referenced by curBlock.
710     */
711    protected void releaseIfNotCurBlock(HFileBlock block) {
712      if (curBlock != block) {
713        block.release();
714      }
715    }
716
717    /**
718     * Scans blocks in the "scanned" section of the {@link HFile} until the next data block is
719     * found.
720     * @return the next block, or null if there are no more data blocks
721     */
722    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH",
723        justification = "Yeah, unnecessary null check; could do w/ clean up")
724    protected HFileBlock readNextDataBlock() throws IOException {
725      long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
726      if (curBlock == null) {
727        return null;
728      }
729      HFileBlock block = this.curBlock;
730      do {
731        if (block.getOffset() >= lastDataBlockOffset) {
732          releaseIfNotCurBlock(block);
733          return null;
734        }
735        if (block.getOffset() < 0) {
736          releaseIfNotCurBlock(block);
737          throw new IOException("Invalid block offset: " + block + ", path=" + reader.getPath());
738        }
739        // We are reading the next block without block type validation, because
740        // it might turn out to be a non-data block.
741        block = reader.readBlock(block.getOffset() + block.getOnDiskSizeWithHeader(),
742          block.getNextBlockOnDiskSize(), cacheBlocks, pread, isCompaction, true, null,
743          getEffectiveDataBlockEncoding());
744        if (block != null && !block.getBlockType().isData()) {
745          // Whatever block we read we will be returning it unless
746          // it is a datablock. Just in case the blocks are non data blocks
747          block.release();
748        }
749      } while (!block.getBlockType().isData());
750      return block;
751    }
752
753    public DataBlockEncoding getEffectiveDataBlockEncoding() {
754      return this.reader.getEffectiveEncodingInCache(isCompaction);
755    }
756
757    @Override
758    public Cell getCell() {
759      if (!isSeeked()) {
760        return null;
761      }
762
763      Cell ret;
764      int cellBufSize = getKVBufSize();
765      long seqId = 0L;
766      if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
767        seqId = currMemstoreTS;
768      }
769      if (blockBuffer.hasArray()) {
770        // TODO : reduce the varieties of KV here. Check if based on a boolean
771        // we can handle the 'no tags' case.
772        if (currTagsLen > 0) {
773          ret = new SizeCachedKeyValue(blockBuffer.array(),
774            blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen,
775            rowLen);
776        } else {
777          ret = new SizeCachedNoTagsKeyValue(blockBuffer.array(),
778            blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen,
779            rowLen);
780        }
781      } else {
782        ByteBuffer buf = blockBuffer.asSubByteBuffer(cellBufSize);
783        if (buf.isDirect()) {
784          ret = currTagsLen > 0
785            ? new SizeCachedByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId, currKeyLen,
786              rowLen)
787            : new SizeCachedNoTagsByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId,
788              currKeyLen, rowLen);
789        } else {
790          if (currTagsLen > 0) {
791            ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + buf.position(),
792              cellBufSize, seqId, currKeyLen, rowLen);
793          } else {
794            ret = new SizeCachedNoTagsKeyValue(buf.array(), buf.arrayOffset() + buf.position(),
795              cellBufSize, seqId, currKeyLen, rowLen);
796          }
797        }
798      }
799      return ret;
800    }
801
802    @Override
803    public Cell getKey() {
804      assertSeeked();
805      // Create a new object so that this getKey is cached as firstKey, lastKey
806      ObjectIntPair<ByteBuffer> keyPair = new ObjectIntPair<>();
807      blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, keyPair);
808      ByteBuffer keyBuf = keyPair.getFirst();
809      if (keyBuf.hasArray()) {
810        return new KeyValue.KeyOnlyKeyValue(keyBuf.array(),
811          keyBuf.arrayOffset() + keyPair.getSecond(), currKeyLen);
812      } else {
813        // Better to do a copy here instead of holding on to this BB so that
814        // we could release the blocks referring to this key. This key is specifically used
815        // in HalfStoreFileReader to get the firstkey and lastkey by creating a new scanner
816        // every time. So holding onto the BB (incase of DBB) is not advised here.
817        byte[] key = new byte[currKeyLen];
818        ByteBufferUtils.copyFromBufferToArray(key, keyBuf, keyPair.getSecond(), 0, currKeyLen);
819        return new KeyValue.KeyOnlyKeyValue(key, 0, currKeyLen);
820      }
821    }
822
823    @Override
824    public ByteBuffer getValue() {
825      assertSeeked();
826      // Okie to create new Pair. Not used in hot path
827      ObjectIntPair<ByteBuffer> valuePair = new ObjectIntPair<>();
828      this.blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen,
829        currValueLen, valuePair);
830      ByteBuffer valBuf = valuePair.getFirst().duplicate();
831      valBuf.position(valuePair.getSecond());
832      valBuf.limit(currValueLen + valuePair.getSecond());
833      return valBuf.slice();
834    }
835
836    protected void setNonSeekedState() {
837      reset();
838      blockBuffer = null;
839      currKeyLen = 0;
840      currValueLen = 0;
841      currMemstoreTS = 0;
842      currMemstoreTSLen = 0;
843      currTagsLen = 0;
844    }
845
846    /**
847     * Set the position on current backing blockBuffer.
848     */
849    private void positionThisBlockBuffer() {
850      try {
851        blockBuffer.skip(getCurCellSerializedSize());
852      } catch (IllegalArgumentException e) {
853        LOG.error("Current pos = " + blockBuffer.position() + "; currKeyLen = " + currKeyLen
854          + "; currValLen = " + currValueLen + "; block limit = " + blockBuffer.limit()
855          + "; currBlock currBlockOffset = " + this.curBlock.getOffset() + "; path="
856          + reader.getPath());
857        throw e;
858      }
859    }
860
861    /**
862     * Set our selves up for the next 'next' invocation, set up next block.
863     * @return True is more to read else false if at the end.
864     */
865    private boolean positionForNextBlock() throws IOException {
866      // Methods are small so they get inlined because they are 'hot'.
867      long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
868      if (this.curBlock.getOffset() >= lastDataBlockOffset) {
869        setNonSeekedState();
870        return false;
871      }
872      return isNextBlock();
873    }
874
875    private boolean isNextBlock() throws IOException {
876      // Methods are small so they get inlined because they are 'hot'.
877      HFileBlock nextBlock = readNextDataBlock();
878      if (nextBlock == null) {
879        setNonSeekedState();
880        return false;
881      }
882      updateCurrentBlock(nextBlock);
883      return true;
884    }
885
886    private final boolean _next() throws IOException {
887      // Small method so can be inlined. It is a hot one.
888      if (blockBuffer.remaining() <= 0) {
889        return positionForNextBlock();
890      }
891
892      // We are still in the same block.
893      readKeyValueLen();
894      return true;
895    }
896
897    /**
898     * Go to the next key/value in the block section. Loads the next block if necessary. If
899     * successful, {@link #getKey()} and {@link #getValue()} can be called.
900     * @return true if successfully navigated to the next key/value
901     */
902    @Override
903    public boolean next() throws IOException {
904      // This is a hot method so extreme measures taken to ensure it is small and inlineable.
905      // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation
906      assertSeeked();
907      positionThisBlockBuffer();
908      return _next();
909    }
910
911    /**
912     * Positions this scanner at the start of the file.
913     * @return false if empty file; i.e. a call to next would return false and the current key and
914     *         value are undefined.
915     */
916    @Override
917    public boolean seekTo() throws IOException {
918      if (reader == null) {
919        return false;
920      }
921
922      if (reader.getTrailer().getEntryCount() == 0) {
923        // No data blocks.
924        return false;
925      }
926
927      long firstDataBlockOffset = reader.getTrailer().getFirstDataBlockOffset();
928      if (curBlock != null && curBlock.getOffset() == firstDataBlockOffset) {
929        return processFirstDataBlock();
930      }
931
932      readAndUpdateNewBlock(firstDataBlockOffset);
933      return true;
934    }
935
936    protected boolean processFirstDataBlock() throws IOException {
937      blockBuffer.rewind();
938      readKeyValueLen();
939      return true;
940    }
941
942    protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException {
943      HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
944        isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
945      if (newBlock.getOffset() < 0) {
946        releaseIfNotCurBlock(newBlock);
947        throw new IOException(
948          "Invalid offset=" + newBlock.getOffset() + ", path=" + reader.getPath());
949      }
950      updateCurrentBlock(newBlock);
951    }
952
953    protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind,
954      Cell key, boolean seekBefore) throws IOException {
955      if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
956        updateCurrentBlock(seekToBlock);
957      } else if (rewind) {
958        blockBuffer.rewind();
959      }
960      // Update the nextIndexedKey
961      this.nextIndexedKey = nextIndexedKey;
962      return blockSeek(key, seekBefore);
963    }
964
965    /** Returns True if v &lt;= 0 or v &gt; current block buffer limit. */
966    protected final boolean checkKeyLen(final int v) {
967      return v <= 0 || v > this.blockBuffer.limit();
968    }
969
970    /** Returns True if v &lt; 0 or v &gt; current block buffer limit. */
971    protected final boolean checkLen(final int v) {
972      return v < 0 || v > this.blockBuffer.limit();
973    }
974
975    /**
976     * Check key and value lengths are wholesome.
977     */
978    protected final void checkKeyValueLen() {
979      if (checkKeyLen(this.currKeyLen) || checkLen(this.currValueLen)) {
980        throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen
981          + " or currValueLen " + this.currValueLen + ". Block offset: " + this.curBlock.getOffset()
982          + ", block length: " + this.blockBuffer.limit() + ", position: "
983          + this.blockBuffer.position() + " (without header)." + ", path=" + reader.getPath());
984      }
985    }
986
987    /**
988     * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first
989     * key/value pair.
990     * @param newBlock the block read by {@link HFileReaderImpl#readBlock}, it's a totally new block
991     *                 with new allocated {@link ByteBuff}, so if no further reference to this
992     *                 block, we should release it carefully.
993     */
994    protected void updateCurrentBlock(HFileBlock newBlock) throws IOException {
995      try {
996        if (newBlock.getBlockType() != BlockType.DATA) {
997          throw new IllegalStateException(
998            "ScannerV2 works only on data blocks, got " + newBlock.getBlockType() + "; "
999              + "HFileName=" + reader.getPath() + ", " + "dataBlockEncoder="
1000              + reader.getDataBlockEncoding() + ", " + "isCompaction=" + isCompaction);
1001        }
1002        updateCurrBlockRef(newBlock);
1003        blockBuffer = newBlock.getBufferWithoutHeader();
1004        readKeyValueLen();
1005      } finally {
1006        releaseIfNotCurBlock(newBlock);
1007      }
1008      // Reset the next indexed key
1009      this.nextIndexedKey = null;
1010    }
1011
1012    protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) {
1013      ByteBuff buffer = curBlock.getBufferWithoutHeader();
1014      // It is safe to manipulate this buffer because we own the buffer object.
1015      buffer.rewind();
1016      int klen = buffer.getInt();
1017      buffer.skip(Bytes.SIZEOF_INT);// Skip value len part
1018      ByteBuffer keyBuff = buffer.asSubByteBuffer(klen);
1019      if (keyBuff.hasArray()) {
1020        return new KeyValue.KeyOnlyKeyValue(keyBuff.array(),
1021          keyBuff.arrayOffset() + keyBuff.position(), klen);
1022      } else {
1023        return new ByteBufferKeyOnlyKeyValue(keyBuff, keyBuff.position(), klen);
1024      }
1025    }
1026
1027    @Override
1028    public String getKeyString() {
1029      return CellUtil.toString(getKey(), false);
1030    }
1031
1032    @Override
1033    public String getValueString() {
1034      return ByteBufferUtils.toStringBinary(getValue());
1035    }
1036
1037    public int compareKey(CellComparator comparator, Cell key) {
1038      blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, pair);
1039      this.bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), currKeyLen, rowLen);
1040      return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, this.bufBackedKeyOnlyKv);
1041    }
1042
1043    @Override
1044    public void shipped() throws IOException {
1045      this.returnBlocks(false);
1046    }
1047  }
1048
1049  @Override
1050  public Path getPath() {
1051    return path;
1052  }
1053
1054  @Override
1055  public DataBlockEncoding getDataBlockEncoding() {
1056    return dataBlockEncoder.getDataBlockEncoding();
1057  }
1058
1059  @Override
1060  public Configuration getConf() {
1061    return conf;
1062  }
1063
1064  @Override
1065  public void setConf(Configuration conf) {
1066    this.conf = conf;
1067  }
1068
1069  /** Minor versions in HFile starting with this number have hbase checksums */
1070  public static final int MINOR_VERSION_WITH_CHECKSUM = 1;
1071  /** In HFile minor version that does not support checksums */
1072  public static final int MINOR_VERSION_NO_CHECKSUM = 0;
1073
1074  /** HFile minor version that introduced pbuf filetrailer */
1075  public static final int PBUF_TRAILER_MINOR_VERSION = 2;
1076
1077  /**
1078   * The size of a (key length, value length) tuple that prefixes each entry in a data block.
1079   */
1080  public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT;
1081
1082  /**
1083   * Retrieve block from cache. Validates the retrieved block's type vs {@code expectedBlockType}
1084   * and its encoding vs. {@code expectedDataBlockEncoding}. Unpacks the block as necessary.
1085   */
1086  private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock,
1087    boolean updateCacheMetrics, BlockType expectedBlockType,
1088    DataBlockEncoding expectedDataBlockEncoding) throws IOException {
1089    // Check cache for block. If found return.
1090    BlockCache cache = cacheConf.getBlockCache().orElse(null);
1091    if (cache != null) {
1092      HFileBlock cachedBlock = (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock,
1093        updateCacheMetrics, expectedBlockType);
1094      if (cachedBlock != null) {
1095        if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) {
1096          HFileBlock compressedBlock = cachedBlock;
1097          cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
1098          // In case of compressed block after unpacking we can release the compressed block
1099          if (compressedBlock != cachedBlock) {
1100            compressedBlock.release();
1101          }
1102        }
1103        try {
1104          validateBlockType(cachedBlock, expectedBlockType);
1105        } catch (IOException e) {
1106          returnAndEvictBlock(cache, cacheKey, cachedBlock);
1107          throw e;
1108        }
1109
1110        if (expectedDataBlockEncoding == null) {
1111          return cachedBlock;
1112        }
1113        DataBlockEncoding actualDataBlockEncoding = cachedBlock.getDataBlockEncoding();
1114        // Block types other than data blocks always have
1115        // DataBlockEncoding.NONE. To avoid false negative cache misses, only
1116        // perform this check if cached block is a data block.
1117        if (
1118          cachedBlock.getBlockType().isData()
1119            && !actualDataBlockEncoding.equals(expectedDataBlockEncoding)
1120        ) {
1121          // This mismatch may happen if a Scanner, which is used for say a
1122          // compaction, tries to read an encoded block from the block cache.
1123          // The reverse might happen when an EncodedScanner tries to read
1124          // un-encoded blocks which were cached earlier.
1125          //
1126          // Because returning a data block with an implicit BlockType mismatch
1127          // will cause the requesting scanner to throw a disk read should be
1128          // forced here. This will potentially cause a significant number of
1129          // cache misses, so update so we should keep track of this as it might
1130          // justify the work on a CompoundScanner.
1131          if (
1132            !expectedDataBlockEncoding.equals(DataBlockEncoding.NONE)
1133              && !actualDataBlockEncoding.equals(DataBlockEncoding.NONE)
1134          ) {
1135            // If the block is encoded but the encoding does not match the
1136            // expected encoding it is likely the encoding was changed but the
1137            // block was not yet evicted. Evictions on file close happen async
1138            // so blocks with the old encoding still linger in cache for some
1139            // period of time. This event should be rare as it only happens on
1140            // schema definition change.
1141            LOG.info(
1142              "Evicting cached block with key {} because data block encoding mismatch; "
1143                + "expected {}, actual {}, path={}",
1144              cacheKey, actualDataBlockEncoding, expectedDataBlockEncoding, path);
1145            // This is an error scenario. so here we need to release the block.
1146            returnAndEvictBlock(cache, cacheKey, cachedBlock);
1147          }
1148          return null;
1149        }
1150        return cachedBlock;
1151      }
1152    }
1153    return null;
1154  }
1155
1156  private void returnAndEvictBlock(BlockCache cache, BlockCacheKey cacheKey, Cacheable block) {
1157    block.release();
1158    cache.evictBlock(cacheKey);
1159  }
1160
1161  /**
1162   * @param cacheBlock Add block to cache, if found
1163   * @return block wrapped in a ByteBuffer, with header skipped
1164   */
1165  @Override
1166  public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException {
1167    if (trailer.getMetaIndexCount() == 0) {
1168      return null; // there are no meta blocks
1169    }
1170    if (metaBlockIndexReader == null) {
1171      throw new IOException(path + " meta index not loaded");
1172    }
1173
1174    byte[] mbname = Bytes.toBytes(metaBlockName);
1175    int block = metaBlockIndexReader.rootBlockContainingKey(mbname, 0, mbname.length);
1176    if (block == -1) {
1177      return null;
1178    }
1179    long blockSize = metaBlockIndexReader.getRootBlockDataSize(block);
1180
1181    // Per meta key from any given file, synchronize reads for said block. This
1182    // is OK to do for meta blocks because the meta block index is always
1183    // single-level.
1184    synchronized (metaBlockIndexReader.getRootBlockKey(block)) {
1185      // Check cache for block. If found return.
1186      long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
1187      BlockCacheKey cacheKey =
1188        new BlockCacheKey(name, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META);
1189
1190      cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory());
1191      HFileBlock cachedBlock =
1192        getCachedBlock(cacheKey, cacheBlock, false, true, BlockType.META, null);
1193      if (cachedBlock != null) {
1194        assert cachedBlock.isUnpacked() : "Packed block leak.";
1195        // Return a distinct 'shallow copy' of the block,
1196        // so pos does not get messed by the scanner
1197        return cachedBlock;
1198      }
1199      // Cache Miss, please load.
1200
1201      HFileBlock compressedBlock =
1202        fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false, true);
1203      HFileBlock uncompressedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
1204      if (compressedBlock != uncompressedBlock) {
1205        compressedBlock.release();
1206      }
1207
1208      // Cache the block
1209      if (cacheBlock) {
1210        cacheConf.getBlockCache().ifPresent(
1211          cache -> cache.cacheBlock(cacheKey, uncompressedBlock, cacheConf.isInMemory()));
1212      }
1213      return uncompressedBlock;
1214    }
1215  }
1216
1217  /**
1218   * If expected block is data block, we'll allocate the ByteBuff of block from
1219   * {@link org.apache.hadoop.hbase.io.ByteBuffAllocator} and it's usually an off-heap one,
1220   * otherwise it will allocate from heap.
1221   * @see org.apache.hadoop.hbase.io.hfile.HFileBlock.FSReader#readBlockData(long, long, boolean,
1222   *      boolean, boolean)
1223   */
1224  private boolean shouldUseHeap(BlockType expectedBlockType) {
1225    if (!cacheConf.getBlockCache().isPresent()) {
1226      return false;
1227    } else if (!cacheConf.isCombinedBlockCache()) {
1228      // Block to cache in LruBlockCache must be an heap one. So just allocate block memory from
1229      // heap for saving an extra off-heap to heap copying.
1230      return true;
1231    }
1232    return expectedBlockType != null && !expectedBlockType.isData();
1233  }
1234
1235  @Override
1236  public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final boolean cacheBlock,
1237    boolean pread, final boolean isCompaction, boolean updateCacheMetrics,
1238    BlockType expectedBlockType, DataBlockEncoding expectedDataBlockEncoding) throws IOException {
1239    return readBlock(dataBlockOffset, onDiskBlockSize, cacheBlock, pread, isCompaction,
1240      updateCacheMetrics, expectedBlockType, expectedDataBlockEncoding, false);
1241  }
1242
1243  @Override
1244  public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final boolean cacheBlock,
1245    boolean pread, final boolean isCompaction, boolean updateCacheMetrics,
1246    BlockType expectedBlockType, DataBlockEncoding expectedDataBlockEncoding, boolean cacheOnly)
1247    throws IOException {
1248    if (dataBlockIndexReader == null) {
1249      throw new IOException(path + " block index not loaded");
1250    }
1251    long trailerOffset = trailer.getLoadOnOpenDataOffset();
1252    if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) {
1253      throw new IOException("Requested block is out of range: " + dataBlockOffset
1254        + ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset()
1255        + ", trailer.getLoadOnOpenDataOffset: " + trailerOffset + ", path=" + path);
1256    }
1257    // For any given block from any given file, synchronize reads for said
1258    // block.
1259    // Without a cache, this synchronizing is needless overhead, but really
1260    // the other choice is to duplicate work (which the cache would prevent you
1261    // from doing).
1262
1263    BlockCacheKey cacheKey =
1264      new BlockCacheKey(name, dataBlockOffset, this.isPrimaryReplicaReader(), expectedBlockType);
1265    Attributes attributes = Attributes.of(BLOCK_CACHE_KEY_KEY, cacheKey.toString());
1266
1267    boolean useLock = false;
1268    IdLock.Entry lockEntry = null;
1269    final Span span = Span.current();
1270    try {
1271      while (true) {
1272        // Check cache for block. If found return.
1273        if (cacheConf.shouldReadBlockFromCache(expectedBlockType) && !cacheOnly) {
1274          if (useLock) {
1275            lockEntry = offsetLock.getLockEntry(dataBlockOffset);
1276          }
1277          // Try and get the block from the block cache. If the useLock variable is true then this
1278          // is the second time through the loop and it should not be counted as a block cache miss.
1279          HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, useLock, updateCacheMetrics,
1280            expectedBlockType, expectedDataBlockEncoding);
1281          if (cachedBlock != null) {
1282            if (LOG.isTraceEnabled()) {
1283              LOG.trace("Block for file {} is coming from Cache {}",
1284                Bytes.toString(cachedBlock.getHFileContext().getTableName()), cachedBlock);
1285            }
1286            span.addEvent("block cache hit", attributes);
1287            assert cachedBlock.isUnpacked() : "Packed block leak.";
1288            if (cachedBlock.getBlockType().isData()) {
1289              if (updateCacheMetrics) {
1290                HFile.DATABLOCK_READ_COUNT.increment();
1291              }
1292              // Validate encoding type for data blocks. We include encoding
1293              // type in the cache key, and we expect it to match on a cache hit.
1294              if (cachedBlock.getDataBlockEncoding() != dataBlockEncoder.getDataBlockEncoding()) {
1295                // Remember to release the block when in exceptional path.
1296                cacheConf.getBlockCache().ifPresent(cache -> {
1297                  returnAndEvictBlock(cache, cacheKey, cachedBlock);
1298                });
1299                throw new IOException("Cached block under key " + cacheKey + " "
1300                  + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: "
1301                  + dataBlockEncoder.getDataBlockEncoding() + "), path=" + path);
1302              }
1303            }
1304            // Cache-hit. Return!
1305            return cachedBlock;
1306          }
1307
1308          if (!useLock && cacheBlock && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) {
1309            // check cache again with lock
1310            useLock = true;
1311            continue;
1312          }
1313          // Carry on, please load.
1314        }
1315
1316        span.addEvent("block cache miss", attributes);
1317        // Load block from filesystem.
1318        HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread,
1319          !isCompaction, shouldUseHeap(expectedBlockType));
1320        validateBlockType(hfileBlock, expectedBlockType);
1321        BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory();
1322        final boolean cacheCompressed = cacheConf.shouldCacheCompressed(category);
1323        final boolean cacheOnRead = cacheConf.shouldCacheBlockOnRead(category);
1324
1325        // Don't need the unpacked block back and we're storing the block in the cache compressed
1326        if (cacheOnly && cacheCompressed && cacheOnRead) {
1327          LOG.debug("Skipping decompression of block in prefetch");
1328          // Cache the block if necessary
1329          cacheConf.getBlockCache().ifPresent(cache -> {
1330            if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) {
1331              cache.cacheBlock(cacheKey, hfileBlock, cacheConf.isInMemory(), cacheOnly);
1332            }
1333          });
1334
1335          if (updateCacheMetrics && hfileBlock.getBlockType().isData()) {
1336            HFile.DATABLOCK_READ_COUNT.increment();
1337          }
1338          return hfileBlock;
1339        }
1340        HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader);
1341        // Cache the block if necessary
1342        cacheConf.getBlockCache().ifPresent(cache -> {
1343          if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) {
1344            // Using the wait on cache during compaction and prefetching.
1345            cache.cacheBlock(cacheKey, cacheCompressed ? hfileBlock : unpacked,
1346              cacheConf.isInMemory(), cacheOnly);
1347          }
1348        });
1349        if (unpacked != hfileBlock) {
1350          // End of life here if hfileBlock is an independent block.
1351          hfileBlock.release();
1352        }
1353        if (updateCacheMetrics && hfileBlock.getBlockType().isData()) {
1354          HFile.DATABLOCK_READ_COUNT.increment();
1355        }
1356
1357        return unpacked;
1358      }
1359    } finally {
1360      if (lockEntry != null) {
1361        offsetLock.releaseLockEntry(lockEntry);
1362      }
1363    }
1364  }
1365
1366  @Override
1367  public boolean hasMVCCInfo() {
1368    return fileInfo.shouldIncludeMemStoreTS() && fileInfo.isDecodeMemstoreTS();
1369  }
1370
1371  /**
1372   * Compares the actual type of a block retrieved from cache or disk with its expected type and
1373   * throws an exception in case of a mismatch. Expected block type of {@link BlockType#DATA} is
1374   * considered to match the actual block type [@link {@link BlockType#ENCODED_DATA} as well.
1375   * @param block             a block retrieved from cache or disk
1376   * @param expectedBlockType the expected block type, or null to skip the check
1377   */
1378  private void validateBlockType(HFileBlock block, BlockType expectedBlockType) throws IOException {
1379    if (expectedBlockType == null) {
1380      return;
1381    }
1382    BlockType actualBlockType = block.getBlockType();
1383    if (expectedBlockType.isData() && actualBlockType.isData()) {
1384      // We consider DATA to match ENCODED_DATA for the purpose of this
1385      // verification.
1386      return;
1387    }
1388    if (actualBlockType != expectedBlockType) {
1389      throw new IOException("Expected block type " + expectedBlockType + ", " + "but got "
1390        + actualBlockType + ": " + block + ", path=" + path);
1391    }
1392  }
1393
1394  /**
1395   * @return Last key as cell in the file. May be null if file has no entries. Note that this is not
1396   *         the last row key, but it is the Cell representation of the last key
1397   */
1398  @Override
1399  public Optional<Cell> getLastKey() {
1400    return dataBlockIndexReader.isEmpty()
1401      ? Optional.empty()
1402      : Optional.of(fileInfo.getLastKeyCell());
1403  }
1404
1405  /**
1406   * @return Midkey for this file. We work with block boundaries only so returned midkey is an
1407   *         approximation only.
1408   */
1409  @Override
1410  public Optional<Cell> midKey() throws IOException {
1411    return Optional.ofNullable(dataBlockIndexReader.midkey(this));
1412  }
1413
1414  @Override
1415  public void close() throws IOException {
1416    close(cacheConf.shouldEvictOnClose());
1417  }
1418
1419  @Override
1420  public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) {
1421    return dataBlockEncoder.getEffectiveEncodingInCache(isCompaction);
1422  }
1423
1424  /** For testing */
1425  @Override
1426  public HFileBlock.FSReader getUncachedBlockReader() {
1427    return fsBlockReader;
1428  }
1429
1430  /**
1431   * Scanner that operates on encoded data blocks.
1432   */
1433  protected static class EncodedScanner extends HFileScannerImpl {
1434    private final HFileBlockDecodingContext decodingCtx;
1435    private final DataBlockEncoder.EncodedSeeker seeker;
1436    private final DataBlockEncoder dataBlockEncoder;
1437
1438    public EncodedScanner(HFile.Reader reader, boolean cacheBlocks, boolean pread,
1439      boolean isCompaction, HFileContext meta, Configuration conf) {
1440      super(reader, cacheBlocks, pread, isCompaction);
1441      DataBlockEncoding encoding = reader.getDataBlockEncoding();
1442      dataBlockEncoder = encoding.getEncoder();
1443      decodingCtx = dataBlockEncoder.newDataBlockDecodingContext(conf, meta);
1444      seeker = dataBlockEncoder.createSeeker(decodingCtx);
1445    }
1446
1447    @Override
1448    public boolean isSeeked() {
1449      return curBlock != null;
1450    }
1451
1452    @Override
1453    public void setNonSeekedState() {
1454      reset();
1455    }
1456
1457    /**
1458     * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first
1459     * key/value pair.
1460     * @param newBlock the block to make current, and read by {@link HFileReaderImpl#readBlock},
1461     *                 it's a totally new block with new allocated {@link ByteBuff}, so if no
1462     *                 further reference to this block, we should release it carefully.
1463     */
1464    @Override
1465    protected void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException {
1466      try {
1467        // sanity checks
1468        if (newBlock.getBlockType() != BlockType.ENCODED_DATA) {
1469          throw new IllegalStateException("EncodedScanner works only on encoded data blocks");
1470        }
1471        short dataBlockEncoderId = newBlock.getDataBlockEncodingId();
1472        if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) {
1473          String encoderCls = dataBlockEncoder.getClass().getName();
1474          throw new CorruptHFileException(
1475            "Encoder " + encoderCls + " doesn't support data block encoding "
1476              + DataBlockEncoding.getNameFromId(dataBlockEncoderId) + ",path=" + reader.getPath());
1477        }
1478        updateCurrBlockRef(newBlock);
1479        ByteBuff encodedBuffer = getEncodedBuffer(newBlock);
1480        seeker.setCurrentBuffer(encodedBuffer);
1481      } finally {
1482        releaseIfNotCurBlock(newBlock);
1483      }
1484      // Reset the next indexed key
1485      this.nextIndexedKey = null;
1486    }
1487
1488    private ByteBuff getEncodedBuffer(HFileBlock newBlock) {
1489      ByteBuff origBlock = newBlock.getBufferReadOnly();
1490      int pos = newBlock.headerSize() + DataBlockEncoding.ID_SIZE;
1491      origBlock.position(pos);
1492      origBlock
1493        .limit(pos + newBlock.getUncompressedSizeWithoutHeader() - DataBlockEncoding.ID_SIZE);
1494      return origBlock.slice();
1495    }
1496
1497    @Override
1498    protected boolean processFirstDataBlock() throws IOException {
1499      seeker.rewind();
1500      return true;
1501    }
1502
1503    @Override
1504    public boolean next() throws IOException {
1505      boolean isValid = seeker.next();
1506      if (!isValid) {
1507        HFileBlock newBlock = readNextDataBlock();
1508        isValid = newBlock != null;
1509        if (isValid) {
1510          updateCurrentBlock(newBlock);
1511        } else {
1512          setNonSeekedState();
1513        }
1514      }
1515      return isValid;
1516    }
1517
1518    @Override
1519    public Cell getKey() {
1520      assertValidSeek();
1521      return seeker.getKey();
1522    }
1523
1524    @Override
1525    public ByteBuffer getValue() {
1526      assertValidSeek();
1527      return seeker.getValueShallowCopy();
1528    }
1529
1530    @Override
1531    public Cell getCell() {
1532      if (this.curBlock == null) {
1533        return null;
1534      }
1535      return seeker.getCell();
1536    }
1537
1538    @Override
1539    public String getKeyString() {
1540      return CellUtil.toString(getKey(), false);
1541    }
1542
1543    @Override
1544    public String getValueString() {
1545      ByteBuffer valueBuffer = getValue();
1546      return ByteBufferUtils.toStringBinary(valueBuffer);
1547    }
1548
1549    private void assertValidSeek() {
1550      if (this.curBlock == null) {
1551        throw new NotSeekedException(reader.getPath());
1552      }
1553    }
1554
1555    @Override
1556    protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) {
1557      return dataBlockEncoder.getFirstKeyCellInBlock(getEncodedBuffer(curBlock));
1558    }
1559
1560    @Override
1561    protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind,
1562      Cell key, boolean seekBefore) throws IOException {
1563      if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
1564        updateCurrentBlock(seekToBlock);
1565      } else if (rewind) {
1566        seeker.rewind();
1567      }
1568      this.nextIndexedKey = nextIndexedKey;
1569      return seeker.seekToKeyInBlock(key, seekBefore);
1570    }
1571
1572    @Override
1573    public int compareKey(CellComparator comparator, Cell key) {
1574      return seeker.compareKey(comparator, key);
1575    }
1576  }
1577
1578  /**
1579   * Returns a buffer with the Bloom filter metadata. The caller takes ownership of the buffer.
1580   */
1581  @Override
1582  public DataInput getGeneralBloomFilterMetadata() throws IOException {
1583    return this.getBloomFilterMetadata(BlockType.GENERAL_BLOOM_META);
1584  }
1585
1586  @Override
1587  public DataInput getDeleteBloomFilterMetadata() throws IOException {
1588    return this.getBloomFilterMetadata(BlockType.DELETE_FAMILY_BLOOM_META);
1589  }
1590
1591  private DataInput getBloomFilterMetadata(BlockType blockType) throws IOException {
1592    if (
1593      blockType != BlockType.GENERAL_BLOOM_META && blockType != BlockType.DELETE_FAMILY_BLOOM_META
1594    ) {
1595      throw new RuntimeException(
1596        "Block Type: " + blockType.toString() + " is not supported, path=" + path);
1597    }
1598
1599    for (HFileBlock b : fileInfo.getLoadOnOpenBlocks()) {
1600      if (b.getBlockType() == blockType) {
1601        return b.getByteStream();
1602      }
1603    }
1604    return null;
1605  }
1606
1607  public boolean isFileInfoLoaded() {
1608    return true; // We load file info in constructor in version 2.
1609  }
1610
1611  @Override
1612  public HFileContext getFileContext() {
1613    return hfileContext;
1614  }
1615
1616  /**
1617   * Returns false if block prefetching was requested for this file and has not completed, true
1618   * otherwise
1619   */
1620  @Override
1621  public boolean prefetchComplete() {
1622    return PrefetchExecutor.isCompleted(path);
1623  }
1624
1625  /**
1626   * Create a Scanner on this file. No seeks or reads are done on creation. Call
1627   * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is nothing to clean up
1628   * in a Scanner. Letting go of your references to the scanner is sufficient. NOTE: Do not use this
1629   * overload of getScanner for compactions. See
1630   * {@link #getScanner(Configuration, boolean, boolean, boolean)}
1631   * @param conf        Store configuration.
1632   * @param cacheBlocks True if we should cache blocks read in by this scanner.
1633   * @param pread       Use positional read rather than seek+read if true (pread is better for
1634   *                    random reads, seek+read is better scanning).
1635   * @return Scanner on this file.
1636   */
1637  @Override
1638  public HFileScanner getScanner(Configuration conf, boolean cacheBlocks, final boolean pread) {
1639    return getScanner(conf, cacheBlocks, pread, false);
1640  }
1641
1642  /**
1643   * Create a Scanner on this file. No seeks or reads are done on creation. Call
1644   * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is nothing to clean up
1645   * in a Scanner. Letting go of your references to the scanner is sufficient.
1646   * @param conf         Store configuration.
1647   * @param cacheBlocks  True if we should cache blocks read in by this scanner.
1648   * @param pread        Use positional read rather than seek+read if true (pread is better for
1649   *                     random reads, seek+read is better scanning).
1650   * @param isCompaction is scanner being used for a compaction?
1651   * @return Scanner on this file.
1652   */
1653  @Override
1654  public HFileScanner getScanner(Configuration conf, boolean cacheBlocks, final boolean pread,
1655    final boolean isCompaction) {
1656    if (dataBlockEncoder.useEncodedScanner()) {
1657      return new EncodedScanner(this, cacheBlocks, pread, isCompaction, this.hfileContext, conf);
1658    }
1659    return new HFileScannerImpl(this, cacheBlocks, pread, isCompaction);
1660  }
1661
1662  public int getMajorVersion() {
1663    return 3;
1664  }
1665
1666  @Override
1667  public void unbufferStream() {
1668    fsBlockReader.unbufferStream();
1669  }
1670}