001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import java.io.DataInput;
021import java.io.IOException;
022import java.nio.ByteBuffer;
023import java.util.ArrayList;
024import java.util.Optional;
025import org.apache.hadoop.conf.Configurable;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue;
029import org.apache.hadoop.hbase.Cell;
030import org.apache.hadoop.hbase.CellComparator;
031import org.apache.hadoop.hbase.CellUtil;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.KeyValue;
034import org.apache.hadoop.hbase.PrivateCellUtil;
035import org.apache.hadoop.hbase.SizeCachedByteBufferKeyValue;
036import org.apache.hadoop.hbase.SizeCachedKeyValue;
037import org.apache.hadoop.hbase.SizeCachedNoTagsByteBufferKeyValue;
038import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue;
039import org.apache.hadoop.hbase.io.compress.Compression;
040import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
041import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
042import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
043import org.apache.hadoop.hbase.nio.ByteBuff;
044import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
045import org.apache.hadoop.hbase.trace.TraceUtil;
046import org.apache.hadoop.hbase.util.ByteBufferUtils;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.IdLock;
049import org.apache.hadoop.hbase.util.ObjectIntPair;
050import org.apache.hadoop.io.WritableUtils;
051import org.apache.htrace.core.TraceScope;
052import org.apache.yetus.audience.InterfaceAudience;
053import org.slf4j.Logger;
054import org.slf4j.LoggerFactory;
055
056/**
057 * Implementation that can handle all hfile versions of {@link HFile.Reader}.
058 */
059@InterfaceAudience.Private
060@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
061public abstract class HFileReaderImpl implements HFile.Reader, Configurable {
062  // This class is HFileReaderV3 + HFileReaderV2 + AbstractHFileReader all squashed together into
063  // one file.  Ditto for all the HFileReader.ScannerV? implementations. I was running up against
064  // the MaxInlineLevel limit because too many tiers involved reading from an hfile. Was also hard
065  // to navigate the source code when so many classes participating in read.
066  private static final Logger LOG = LoggerFactory.getLogger(HFileReaderImpl.class);
067
068  /** Data block index reader keeping the root data index in memory */
069  protected HFileBlockIndex.CellBasedKeyBlockIndexReader dataBlockIndexReader;
070
071  /** Meta block index reader -- always single level */
072  protected HFileBlockIndex.ByteArrayKeyBlockIndexReader metaBlockIndexReader;
073
074  protected FixedFileTrailer trailer;
075
076  private final boolean primaryReplicaReader;
077
078  /**
079   * What kind of data block encoding should be used while reading, writing,
080   * and handling cache.
081   */
082  protected HFileDataBlockEncoder dataBlockEncoder = NoOpDataBlockEncoder.INSTANCE;
083
084  /** Block cache configuration. */
085  protected final CacheConfig cacheConf;
086
087  protected ReaderContext context;
088
089  protected final HFileInfo fileInfo;
090
091  /** Path of file */
092  protected final Path path;
093
094  /** File name to be used for block names */
095  protected final String name;
096
097  private Configuration conf;
098
099  protected HFileContext hfileContext;
100
101  /** Filesystem-level block reader. */
102  protected HFileBlock.FSReader fsBlockReader;
103
104  /**
105   * A "sparse lock" implementation allowing to lock on a particular block
106   * identified by offset. The purpose of this is to avoid two clients loading
107   * the same block, and have all but one client wait to get the block from the
108   * cache.
109   */
110  private IdLock offsetLock = new IdLock();
111
112  /** Minimum minor version supported by this HFile format */
113  static final int MIN_MINOR_VERSION = 0;
114
115  /** Maximum minor version supported by this HFile format */
116  // We went to version 2 when we moved to pb'ing fileinfo and the trailer on
117  // the file. This version can read Writables version 1.
118  static final int MAX_MINOR_VERSION = 3;
119
120  /** Minor versions starting with this number have faked index key */
121  static final int MINOR_VERSION_WITH_FAKED_KEY = 3;
122
123  /**
124   * Opens a HFile.
125   * @param context Reader context info
126   * @param fileInfo HFile info
127   * @param cacheConf Cache configuration.
128   * @param conf Configuration
129   */
130  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
131  public HFileReaderImpl(ReaderContext context, HFileInfo fileInfo, CacheConfig cacheConf,
132      Configuration conf) throws IOException {
133    this.cacheConf = cacheConf;
134    this.context = context;
135    this.path = context.getFilePath();
136    this.name = path.getName();
137    this.conf = conf;
138    this.primaryReplicaReader = context.isPrimaryReplicaReader();
139    this.fileInfo = fileInfo;
140    this.trailer = fileInfo.getTrailer();
141    this.hfileContext = fileInfo.getHFileContext();
142    this.fsBlockReader = new HFileBlock.FSReaderImpl(context, hfileContext,
143        cacheConf.getByteBuffAllocator());
144    this.dataBlockEncoder = HFileDataBlockEncoderImpl.createFromFileInfo(fileInfo);
145    fsBlockReader.setDataBlockEncoder(dataBlockEncoder);
146    dataBlockIndexReader = fileInfo.getDataBlockIndexReader();
147    metaBlockIndexReader = fileInfo.getMetaBlockIndexReader();
148  }
149
150  @SuppressWarnings("serial")
151  public static class BlockIndexNotLoadedException extends IllegalStateException {
152    public BlockIndexNotLoadedException(Path path) {
153      // Add a message in case anyone relies on it as opposed to class name.
154      super(path + " block index not loaded");
155    }
156  }
157
158  private Optional<String> toStringFirstKey() {
159    return getFirstKey().map(CellUtil::getCellKeyAsString);
160  }
161
162  private Optional<String> toStringLastKey() {
163    return getLastKey().map(CellUtil::getCellKeyAsString);
164  }
165
166  @Override
167  public String toString() {
168    return "reader=" + path.toString() +
169        (!isFileInfoLoaded()? "":
170          ", compression=" + trailer.getCompressionCodec().getName() +
171          ", cacheConf=" + cacheConf +
172          ", firstKey=" + toStringFirstKey() +
173          ", lastKey=" + toStringLastKey()) +
174          ", avgKeyLen=" + fileInfo.getAvgKeyLen() +
175          ", avgValueLen=" + fileInfo.getAvgValueLen() +
176          ", entries=" + trailer.getEntryCount() +
177          ", length=" + context.getFileSize();
178  }
179
180  @Override
181  public long length() {
182    return context.getFileSize();
183  }
184
185  /**
186   * @return the first key in the file. May be null if file has no entries. Note
187   *         that this is not the first row key, but rather the byte form of the
188   *         first KeyValue.
189   */
190  @Override
191  public Optional<Cell> getFirstKey() {
192    if (dataBlockIndexReader == null) {
193      throw new BlockIndexNotLoadedException(path);
194    }
195    return dataBlockIndexReader.isEmpty() ? Optional.empty()
196        : Optional.of(dataBlockIndexReader.getRootBlockKey(0));
197  }
198
199  /**
200   * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's
201   * patch goes in to eliminate {@link KeyValue} here.
202   *
203   * @return the first row key, or null if the file is empty.
204   */
205  @Override
206  public Optional<byte[]> getFirstRowKey() {
207    // We have to copy the row part to form the row key alone
208    return getFirstKey().map(CellUtil::cloneRow);
209  }
210
211  /**
212   * TODO left from {@link HFile} version 1: move this to StoreFile after
213   * Ryan's patch goes in to eliminate {@link KeyValue} here.
214   *
215   * @return the last row key, or null if the file is empty.
216   */
217  @Override
218  public Optional<byte[]> getLastRowKey() {
219    // We have to copy the row part to form the row key alone
220    return getLastKey().map(CellUtil::cloneRow);
221  }
222
223  /** @return number of KV entries in this HFile */
224  @Override
225  public long getEntries() {
226    return trailer.getEntryCount();
227  }
228
229  /** @return comparator */
230  @Override
231  public CellComparator getComparator() {
232    return this.hfileContext.getCellComparator();
233  }
234
235  public Compression.Algorithm getCompressionAlgorithm() {
236    return trailer.getCompressionCodec();
237  }
238
239  /**
240   * @return the total heap size of data and meta block indexes in bytes. Does
241   *         not take into account non-root blocks of a multilevel data index.
242   */
243  @Override
244  public long indexSize() {
245    return (dataBlockIndexReader != null ? dataBlockIndexReader.heapSize() : 0)
246        + ((metaBlockIndexReader != null) ? metaBlockIndexReader.heapSize()
247            : 0);
248  }
249
250  @Override
251  public String getName() {
252    return name;
253  }
254
255  @Override
256  public void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder) {
257    this.dataBlockEncoder = dataBlockEncoder;
258    this.fsBlockReader.setDataBlockEncoder(dataBlockEncoder);
259  }
260
261  @Override
262  public void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader) {
263    this.dataBlockIndexReader = reader;
264  }
265
266  @Override
267  public HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader() {
268    return dataBlockIndexReader;
269  }
270
271  @Override
272  public void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader) {
273    this.metaBlockIndexReader = reader;
274  }
275
276  @Override
277  public HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader() {
278    return metaBlockIndexReader;
279  }
280
281  @Override
282  public FixedFileTrailer getTrailer() {
283    return trailer;
284  }
285
286  @Override
287  public ReaderContext getContext() {
288    return this.context;
289  }
290
291  @Override
292  public HFileInfo getHFileInfo() {
293    return this.fileInfo;
294  }
295
296  @Override
297  public boolean isPrimaryReplicaReader() {
298    return primaryReplicaReader;
299  }
300
301  /**
302   * An exception thrown when an operation requiring a scanner to be seeked
303   * is invoked on a scanner that is not seeked.
304   */
305  @SuppressWarnings("serial")
306  public static class NotSeekedException extends IllegalStateException {
307    public NotSeekedException(Path path) {
308      super(path + " not seeked to a key/value");
309    }
310  }
311
312  protected static class HFileScannerImpl implements HFileScanner {
313    private ByteBuff blockBuffer;
314    protected final boolean cacheBlocks;
315    protected final boolean pread;
316    protected final boolean isCompaction;
317    private int currKeyLen;
318    private int currValueLen;
319    private int currMemstoreTSLen;
320    private long currMemstoreTS;
321    protected final HFile.Reader reader;
322    private int currTagsLen;
323    private short rowLen;
324    // buffer backed keyonlyKV
325    private ByteBufferKeyOnlyKeyValue bufBackedKeyOnlyKv = new ByteBufferKeyOnlyKeyValue();
326    // A pair for reusing in blockSeek() so that we don't garbage lot of objects
327    final ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>();
328
329    /**
330     * The next indexed key is to keep track of the indexed key of the next data block.
331     * If the nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the
332     * current data block is the last data block.
333     *
334     * If the nextIndexedKey is null, it means the nextIndexedKey has not been loaded yet.
335     */
336    protected Cell nextIndexedKey;
337    // Current block being used. NOTICE: DON't release curBlock separately except in shipped() or
338    // close() methods. Because the shipped() or close() will do the release finally, even if any
339    // exception occur the curBlock will be released by the close() method (see
340    // RegionScannerImpl#handleException). Call the releaseIfNotCurBlock() to release the
341    // unreferenced block please.
342    protected HFileBlock curBlock;
343    // Previous blocks that were used in the course of the read
344    protected final ArrayList<HFileBlock> prevBlocks = new ArrayList<>();
345
346    public HFileScannerImpl(final HFile.Reader reader, final boolean cacheBlocks,
347        final boolean pread, final boolean isCompaction) {
348      this.reader = reader;
349      this.cacheBlocks = cacheBlocks;
350      this.pread = pread;
351      this.isCompaction = isCompaction;
352    }
353
354    void updateCurrBlockRef(HFileBlock block) {
355      if (block != null && curBlock != null && block.getOffset() == curBlock.getOffset()) {
356        return;
357      }
358      if (this.curBlock != null && this.curBlock.isSharedMem()) {
359        prevBlocks.add(this.curBlock);
360      }
361      this.curBlock = block;
362    }
363
364    void reset() {
365      // We don't have to keep ref to heap block
366      if (this.curBlock != null && this.curBlock.isSharedMem()) {
367        this.prevBlocks.add(this.curBlock);
368      }
369      this.curBlock = null;
370    }
371
372    private void returnBlocks(boolean returnAll) {
373      this.prevBlocks.forEach(HFileBlock::release);
374      this.prevBlocks.clear();
375      if (returnAll && this.curBlock != null) {
376        this.curBlock.release();
377        this.curBlock = null;
378      }
379    }
380
381    @Override
382    public boolean isSeeked(){
383      return blockBuffer != null;
384    }
385
386    @Override
387    public String toString() {
388      return "HFileScanner for reader " + String.valueOf(getReader());
389    }
390
391    protected void assertSeeked() {
392      if (!isSeeked()) {
393        throw new NotSeekedException(reader.getPath());
394      }
395    }
396
397    @Override
398    public HFile.Reader getReader() {
399      return reader;
400    }
401
402    // From non encoded HFiles, we always read back KeyValue or its descendant.(Note: When HFile
403    // block is in DBB, it will be OffheapKV). So all parts of the Cell is in a contiguous
404    // array/buffer. How many bytes we should wrap to make the KV is what this method returns.
405    private int getKVBufSize() {
406      int kvBufSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen;
407      if (currTagsLen > 0) {
408        kvBufSize += Bytes.SIZEOF_SHORT + currTagsLen;
409      }
410      return kvBufSize;
411    }
412
413    @Override
414    public void close() {
415      if (!pread) {
416        // For seek + pread stream socket should be closed when the scanner is closed. HBASE-9393
417        reader.unbufferStream();
418      }
419      this.returnBlocks(true);
420    }
421
422    // Returns the #bytes in HFile for the current cell. Used to skip these many bytes in current
423    // HFile block's buffer so as to position to the next cell.
424    private int getCurCellSerializedSize() {
425      int curCellSize =  KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen
426          + currMemstoreTSLen;
427      if (this.reader.getFileContext().isIncludesTags()) {
428        curCellSize += Bytes.SIZEOF_SHORT + currTagsLen;
429      }
430      return curCellSize;
431    }
432
433    protected void readKeyValueLen() {
434      // This is a hot method. We go out of our way to make this method short so it can be
435      // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves
436      // because it is faster than going via range-checked ByteBuffer methods or going through a
437      // byte buffer array a byte at a time.
438      // Get a long at a time rather than read two individual ints. In micro-benchmarking, even
439      // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints.
440      // Trying to imitate what was done - need to profile if this is better or
441      // earlier way is better by doing mark and reset?
442      // But ensure that you read long instead of two ints
443      long ll = blockBuffer.getLongAfterPosition(0);
444      // Read top half as an int of key length and bottom int as value length
445      this.currKeyLen = (int)(ll >> Integer.SIZE);
446      this.currValueLen = (int)(Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
447      checkKeyValueLen();
448      this.rowLen = blockBuffer.getShortAfterPosition(Bytes.SIZEOF_LONG);
449      // Move position past the key and value lengths and then beyond the key and value
450      int p = (Bytes.SIZEOF_LONG + currKeyLen + currValueLen);
451      if (reader.getFileContext().isIncludesTags()) {
452        // Tags length is a short.
453        this.currTagsLen = blockBuffer.getShortAfterPosition(p);
454        checkTagsLen();
455        p += (Bytes.SIZEOF_SHORT + currTagsLen);
456      }
457      readMvccVersion(p);
458    }
459
460    private final void checkTagsLen() {
461      if (checkLen(this.currTagsLen)) {
462        throw new IllegalStateException("Invalid currTagsLen " + this.currTagsLen +
463          ". Block offset: " + curBlock.getOffset() + ", block length: " +
464            this.blockBuffer.limit() +
465          ", position: " + this.blockBuffer.position() + " (without header)." +
466          " path=" + reader.getPath());
467      }
468    }
469
470    /**
471     * Read mvcc. Does checks to see if we even need to read the mvcc at all.
472     */
473    protected void readMvccVersion(final int offsetFromPos) {
474      // See if we even need to decode mvcc.
475      if (!this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
476        return;
477      }
478      if (!this.reader.getHFileInfo().isDecodeMemstoreTS()) {
479        currMemstoreTS = 0;
480        currMemstoreTSLen = 1;
481        return;
482      }
483      _readMvccVersion(offsetFromPos);
484    }
485
486    /**
487     * Actually do the mvcc read. Does no checks.
488     */
489    private void _readMvccVersion(int offsetFromPos) {
490      // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e.
491      // previous if one-byte vint, we'd redo the vint call to find int size.
492      // Also the method is kept small so can be inlined.
493      byte firstByte = blockBuffer.getByteAfterPosition(offsetFromPos);
494      int len = WritableUtils.decodeVIntSize(firstByte);
495      if (len == 1) {
496        this.currMemstoreTS = firstByte;
497      } else {
498        int remaining = len -1;
499        long i = 0;
500        offsetFromPos++;
501        if (remaining >= Bytes.SIZEOF_INT) {
502          // The int read has to be converted to unsigned long so the & op
503          i = (blockBuffer.getIntAfterPosition(offsetFromPos) & 0x00000000ffffffffL);
504          remaining -= Bytes.SIZEOF_INT;
505          offsetFromPos += Bytes.SIZEOF_INT;
506        }
507        if (remaining >= Bytes.SIZEOF_SHORT) {
508          short s = blockBuffer.getShortAfterPosition(offsetFromPos);
509          i = i << 16;
510          i = i | (s & 0xFFFF);
511          remaining -= Bytes.SIZEOF_SHORT;
512          offsetFromPos += Bytes.SIZEOF_SHORT;
513        }
514        for (int idx = 0; idx < remaining; idx++) {
515          byte b = blockBuffer.getByteAfterPosition(offsetFromPos + idx);
516          i = i << 8;
517          i = i | (b & 0xFF);
518        }
519        currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
520      }
521      this.currMemstoreTSLen = len;
522    }
523
524    /**
525     * Within a loaded block, seek looking for the last key that is smaller than
526     * (or equal to?) the key we are interested in.
527     * A note on the seekBefore: if you have seekBefore = true, AND the first
528     * key in the block = key, then you'll get thrown exceptions. The caller has
529     * to check for that case and load the previous block as appropriate.
530     * @param key
531     *          the key to find
532     * @param seekBefore
533     *          find the key before the given key in case of exact match.
534     * @return 0 in case of an exact key match, 1 in case of an inexact match,
535     *         -2 in case of an inexact match and furthermore, the input key
536     *         less than the first key of current block(e.g. using a faked index
537     *         key)
538     */
539    protected int blockSeek(Cell key, boolean seekBefore) {
540      int klen, vlen, tlen = 0;
541      int lastKeyValueSize = -1;
542      int offsetFromPos;
543      do {
544        offsetFromPos = 0;
545        // Better to ensure that we use the BB Utils here
546        long ll = blockBuffer.getLongAfterPosition(offsetFromPos);
547        klen = (int)(ll >> Integer.SIZE);
548        vlen = (int)(Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
549        if (checkKeyLen(klen) || checkLen(vlen)) {
550          throw new IllegalStateException("Invalid klen " + klen + " or vlen "
551              + vlen + ". Block offset: "
552              + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
553              + blockBuffer.position() + " (without header)."
554              + " path=" + reader.getPath());
555        }
556        offsetFromPos += Bytes.SIZEOF_LONG;
557        this.rowLen = blockBuffer.getShortAfterPosition(offsetFromPos);
558        blockBuffer.asSubByteBuffer(blockBuffer.position() + offsetFromPos, klen, pair);
559        bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), klen, rowLen);
560        int comp =
561            PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, bufBackedKeyOnlyKv);
562        offsetFromPos += klen + vlen;
563        if (this.reader.getFileContext().isIncludesTags()) {
564          // Read short as unsigned, high byte first
565          tlen = ((blockBuffer.getByteAfterPosition(offsetFromPos) & 0xff) << 8)
566              ^ (blockBuffer.getByteAfterPosition(offsetFromPos + 1) & 0xff);
567          if (checkLen(tlen)) {
568            throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: "
569                + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
570                + blockBuffer.position() + " (without header)."
571                + " path=" + reader.getPath());
572          }
573          // add the two bytes read for the tags.
574          offsetFromPos += tlen + (Bytes.SIZEOF_SHORT);
575        }
576        if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
577          // Directly read the mvcc based on current position
578          readMvccVersion(offsetFromPos);
579        }
580        if (comp == 0) {
581          if (seekBefore) {
582            if (lastKeyValueSize < 0) {
583              throw new IllegalStateException("blockSeek with seekBefore "
584                  + "at the first key of the block: key=" + CellUtil.getCellKeyAsString(key)
585                  + ", blockOffset=" + curBlock.getOffset() + ", onDiskSize="
586                  + curBlock.getOnDiskSizeWithHeader()
587                  + ", path=" + reader.getPath());
588            }
589            blockBuffer.moveBack(lastKeyValueSize);
590            readKeyValueLen();
591            return 1; // non exact match.
592          }
593          currKeyLen = klen;
594          currValueLen = vlen;
595          currTagsLen = tlen;
596          return 0; // indicate exact match
597        } else if (comp < 0) {
598          if (lastKeyValueSize > 0) {
599            blockBuffer.moveBack(lastKeyValueSize);
600          }
601          readKeyValueLen();
602          if (lastKeyValueSize == -1 && blockBuffer.position() == 0) {
603            return HConstants.INDEX_KEY_MAGIC;
604          }
605          return 1;
606        }
607        // The size of this key/value tuple, including key/value length fields.
608        lastKeyValueSize = klen + vlen + currMemstoreTSLen + KEY_VALUE_LEN_SIZE;
609        // include tag length also if tags included with KV
610        if (reader.getFileContext().isIncludesTags()) {
611          lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT;
612        }
613        blockBuffer.skip(lastKeyValueSize);
614      } while (blockBuffer.hasRemaining());
615
616      // Seek to the last key we successfully read. This will happen if this is
617      // the last key/value pair in the file, in which case the following call
618      // to next() has to return false.
619      blockBuffer.moveBack(lastKeyValueSize);
620      readKeyValueLen();
621      return 1; // didn't exactly find it.
622    }
623
624    @Override
625    public Cell getNextIndexedKey() {
626      return nextIndexedKey;
627    }
628
629    @Override
630    public int seekTo(Cell key) throws IOException {
631      return seekTo(key, true);
632    }
633
634    @Override
635    public int reseekTo(Cell key) throws IOException {
636      int compared;
637      if (isSeeked()) {
638        compared = compareKey(reader.getComparator(), key);
639        if (compared < 1) {
640          // If the required key is less than or equal to current key, then
641          // don't do anything.
642          return compared;
643        } else {
644          // The comparison with no_next_index_key has to be checked
645          if (this.nextIndexedKey != null &&
646              (this.nextIndexedKey == KeyValueScanner.NO_NEXT_INDEXED_KEY || PrivateCellUtil
647                  .compareKeyIgnoresMvcc(reader.getComparator(), key, nextIndexedKey) < 0)) {
648            // The reader shall continue to scan the current data block instead
649            // of querying the
650            // block index as long as it knows the target key is strictly
651            // smaller than
652            // the next indexed key or the current data block is the last data
653            // block.
654            return loadBlockAndSeekToKey(this.curBlock, nextIndexedKey, false, key,
655                false);
656          }
657        }
658      }
659      // Don't rewind on a reseek operation, because reseek implies that we are
660      // always going forward in the file.
661      return seekTo(key, false);
662    }
663
664    /**
665     * An internal API function. Seek to the given key, optionally rewinding to
666     * the first key of the block before doing the seek.
667     *
668     * @param key - a cell representing the key that we need to fetch
669     * @param rewind whether to rewind to the first key of the block before
670     *        doing the seek. If this is false, we are assuming we never go
671     *        back, otherwise the result is undefined.
672     * @return -1 if the key is earlier than the first key of the file,
673     *         0 if we are at the given key, 1 if we are past the given key
674     *         -2 if the key is earlier than the first key of the file while
675     *         using a faked index key
676     */
677    public int seekTo(Cell key, boolean rewind) throws IOException {
678      HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader();
679      BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, curBlock,
680          cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding(), reader);
681      if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) {
682        // This happens if the key e.g. falls before the beginning of the file.
683        return -1;
684      }
685      return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(),
686        blockWithScanInfo.getNextIndexedKey(), rewind, key, false);
687    }
688
689    @Override
690    public boolean seekBefore(Cell key) throws IOException {
691      HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, curBlock,
692          cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction),
693          reader);
694      if (seekToBlock == null) {
695        return false;
696      }
697      Cell firstKey = getFirstKeyCellInBlock(seekToBlock);
698      if (PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), firstKey, key) >= 0) {
699        long previousBlockOffset = seekToBlock.getPrevBlockOffset();
700        // The key we are interested in
701        if (previousBlockOffset == -1) {
702          // we have a 'problem', the key we want is the first of the file.
703          releaseIfNotCurBlock(seekToBlock);
704          return false;
705        }
706
707        // The first key in the current block 'seekToBlock' is greater than the given
708        // seekBefore key. We will go ahead by reading the next block that satisfies the
709        // given key. Return the current block before reading the next one.
710        releaseIfNotCurBlock(seekToBlock);
711        // It is important that we compute and pass onDiskSize to the block
712        // reader so that it does not have to read the header separately to
713        // figure out the size. Currently, we do not have a way to do this
714        // correctly in the general case however.
715        // TODO: See https://issues.apache.org/jira/browse/HBASE-14576
716        int prevBlockSize = -1;
717        seekToBlock = reader.readBlock(previousBlockOffset, prevBlockSize, cacheBlocks, pread,
718          isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
719        // TODO shortcut: seek forward in this block to the last key of the
720        // block.
721      }
722      loadBlockAndSeekToKey(seekToBlock, firstKey, true, key, true);
723      return true;
724    }
725
726    /**
727     * The curBlock will be released by shipping or close method, so only need to consider releasing
728     * the block, which was read from HFile before and not referenced by curBlock.
729     */
730    protected void releaseIfNotCurBlock(HFileBlock block) {
731      if (curBlock != block) {
732        block.release();
733      }
734    }
735
736    /**
737     * Scans blocks in the "scanned" section of the {@link HFile} until the next
738     * data block is found.
739     *
740     * @return the next block, or null if there are no more data blocks
741     */
742    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH",
743        justification="Yeah, unnecessary null check; could do w/ clean up")
744    protected HFileBlock readNextDataBlock() throws IOException {
745      long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
746      if (curBlock == null) {
747        return null;
748      }
749      HFileBlock block = this.curBlock;
750      do {
751        if (block.getOffset() >= lastDataBlockOffset) {
752          releaseIfNotCurBlock(block);
753          return null;
754        }
755        if (block.getOffset() < 0) {
756          releaseIfNotCurBlock(block);
757          throw new IOException("Invalid block offset=" + block + ", path=" + reader.getPath());
758        }
759        // We are reading the next block without block type validation, because
760        // it might turn out to be a non-data block.
761        block = reader.readBlock(block.getOffset() + block.getOnDiskSizeWithHeader(),
762          block.getNextBlockOnDiskSize(), cacheBlocks, pread, isCompaction, true, null,
763          getEffectiveDataBlockEncoding());
764        if (block != null && !block.getBlockType().isData()) {
765          // Whatever block we read we will be returning it unless
766          // it is a datablock. Just in case the blocks are non data blocks
767          block.release();
768        }
769      } while (!block.getBlockType().isData());
770      return block;
771    }
772
773    public DataBlockEncoding getEffectiveDataBlockEncoding() {
774      return this.reader.getEffectiveEncodingInCache(isCompaction);
775    }
776
777    @Override
778    public Cell getCell() {
779      if (!isSeeked()) {
780        return null;
781      }
782
783      Cell ret;
784      int cellBufSize = getKVBufSize();
785      long seqId = 0L;
786      if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) {
787        seqId = currMemstoreTS;
788      }
789      if (blockBuffer.hasArray()) {
790        // TODO : reduce the varieties of KV here. Check if based on a boolean
791        // we can handle the 'no tags' case.
792        if (currTagsLen > 0) {
793          ret = new SizeCachedKeyValue(blockBuffer.array(),
794              blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen,
795              rowLen);
796        } else {
797          ret = new SizeCachedNoTagsKeyValue(blockBuffer.array(),
798              blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen,
799              rowLen);
800        }
801      } else {
802        ByteBuffer buf = blockBuffer.asSubByteBuffer(cellBufSize);
803        if (buf.isDirect()) {
804          ret = currTagsLen > 0
805              ? new SizeCachedByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId,
806                  currKeyLen, rowLen)
807              : new SizeCachedNoTagsByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId,
808                  currKeyLen, rowLen);
809        } else {
810          if (currTagsLen > 0) {
811            ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + buf.position(),
812                cellBufSize, seqId, currKeyLen, rowLen);
813          } else {
814            ret = new SizeCachedNoTagsKeyValue(buf.array(), buf.arrayOffset() + buf.position(),
815                cellBufSize, seqId, currKeyLen, rowLen);
816          }
817        }
818      }
819      return ret;
820    }
821
822    @Override
823    public Cell getKey() {
824      assertSeeked();
825      // Create a new object so that this getKey is cached as firstKey, lastKey
826      ObjectIntPair<ByteBuffer> keyPair = new ObjectIntPair<>();
827      blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, keyPair);
828      ByteBuffer keyBuf = keyPair.getFirst();
829      if (keyBuf.hasArray()) {
830        return new KeyValue.KeyOnlyKeyValue(keyBuf.array(), keyBuf.arrayOffset()
831            + keyPair.getSecond(), currKeyLen);
832      } else {
833        // Better to do a copy here instead of holding on to this BB so that
834        // we could release the blocks referring to this key. This key is specifically used
835        // in HalfStoreFileReader to get the firstkey and lastkey by creating a new scanner
836        // every time. So holding onto the BB (incase of DBB) is not advised here.
837        byte[] key = new byte[currKeyLen];
838        ByteBufferUtils.copyFromBufferToArray(key, keyBuf, keyPair.getSecond(), 0, currKeyLen);
839        return new KeyValue.KeyOnlyKeyValue(key, 0, currKeyLen);
840      }
841    }
842
843    @Override
844    public ByteBuffer getValue() {
845      assertSeeked();
846      // Okie to create new Pair. Not used in hot path
847      ObjectIntPair<ByteBuffer> valuePair = new ObjectIntPair<>();
848      this.blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen,
849        currValueLen, valuePair);
850      ByteBuffer valBuf = valuePair.getFirst().duplicate();
851      valBuf.position(valuePair.getSecond());
852      valBuf.limit(currValueLen + valuePair.getSecond());
853      return valBuf.slice();
854    }
855
856    protected void setNonSeekedState() {
857      reset();
858      blockBuffer = null;
859      currKeyLen = 0;
860      currValueLen = 0;
861      currMemstoreTS = 0;
862      currMemstoreTSLen = 0;
863      currTagsLen = 0;
864    }
865
866    /**
867     * Set the position on current backing blockBuffer.
868     */
869    private void positionThisBlockBuffer() {
870      try {
871        blockBuffer.skip(getCurCellSerializedSize());
872      } catch (IllegalArgumentException e) {
873        LOG.error("Current pos = " + blockBuffer.position()
874            + "; currKeyLen = " + currKeyLen + "; currValLen = "
875            + currValueLen + "; block limit = " + blockBuffer.limit()
876            + "; currBlock currBlockOffset = " + this.curBlock.getOffset()
877            + "; path=" + reader.getPath());
878        throw e;
879      }
880    }
881
882    /**
883     * Set our selves up for the next 'next' invocation, set up next block.
884     * @return True is more to read else false if at the end.
885     */
886    private boolean positionForNextBlock() throws IOException {
887      // Methods are small so they get inlined because they are 'hot'.
888      long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
889      if (this.curBlock.getOffset() >= lastDataBlockOffset) {
890        setNonSeekedState();
891        return false;
892      }
893      return isNextBlock();
894    }
895
896
897    private boolean isNextBlock() throws IOException {
898      // Methods are small so they get inlined because they are 'hot'.
899      HFileBlock nextBlock = readNextDataBlock();
900      if (nextBlock == null) {
901        setNonSeekedState();
902        return false;
903      }
904      updateCurrentBlock(nextBlock);
905      return true;
906    }
907
908    private final boolean _next() throws IOException {
909      // Small method so can be inlined. It is a hot one.
910      if (blockBuffer.remaining() <= 0) {
911        return positionForNextBlock();
912      }
913
914      // We are still in the same block.
915      readKeyValueLen();
916      return true;
917    }
918
919    /**
920     * Go to the next key/value in the block section. Loads the next block if
921     * necessary. If successful, {@link #getKey()} and {@link #getValue()} can
922     * be called.
923     *
924     * @return true if successfully navigated to the next key/value
925     */
926    @Override
927    public boolean next() throws IOException {
928      // This is a hot method so extreme measures taken to ensure it is small and inlineable.
929      // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation
930      assertSeeked();
931      positionThisBlockBuffer();
932      return _next();
933    }
934
935    /**
936     * Positions this scanner at the start of the file.
937     *
938     * @return false if empty file; i.e. a call to next would return false and
939     *         the current key and value are undefined.
940     */
941    @Override
942    public boolean seekTo() throws IOException {
943      if (reader == null) {
944        return false;
945      }
946
947      if (reader.getTrailer().getEntryCount() == 0) {
948        // No data blocks.
949        return false;
950      }
951
952      long firstDataBlockOffset = reader.getTrailer().getFirstDataBlockOffset();
953      if (curBlock != null && curBlock.getOffset() == firstDataBlockOffset) {
954        return processFirstDataBlock();
955      }
956
957      readAndUpdateNewBlock(firstDataBlockOffset);
958      return true;
959    }
960
961    protected boolean processFirstDataBlock() throws IOException{
962      blockBuffer.rewind();
963      readKeyValueLen();
964      return true;
965    }
966
967    protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException {
968      HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
969        isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
970      if (newBlock.getOffset() < 0) {
971        releaseIfNotCurBlock(newBlock);
972        throw new IOException("Invalid offset=" + newBlock.getOffset() +
973          ", path=" + reader.getPath());
974      }
975      updateCurrentBlock(newBlock);
976    }
977
978    protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind,
979        Cell key, boolean seekBefore) throws IOException {
980      if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
981        updateCurrentBlock(seekToBlock);
982      } else if (rewind) {
983        blockBuffer.rewind();
984      }
985      // Update the nextIndexedKey
986      this.nextIndexedKey = nextIndexedKey;
987      return blockSeek(key, seekBefore);
988    }
989
990    /**
991     * @return True if v &lt;= 0 or v &gt; current block buffer limit.
992     */
993    protected final boolean checkKeyLen(final int v) {
994      return v <= 0 || v > this.blockBuffer.limit();
995    }
996
997    /**
998     * @return True if v &lt; 0 or v &gt; current block buffer limit.
999     */
1000    protected final boolean checkLen(final int v) {
1001      return v < 0 || v > this.blockBuffer.limit();
1002    }
1003
1004    /**
1005     * Check key and value lengths are wholesome.
1006     */
1007    protected final void checkKeyValueLen() {
1008      if (checkKeyLen(this.currKeyLen) || checkLen(this.currValueLen)) {
1009        throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen
1010            + " or currValueLen " + this.currValueLen + ". Block offset: "
1011            + this.curBlock.getOffset() + ", block length: "
1012            + this.blockBuffer.limit() + ", position: " + this.blockBuffer.position()
1013            + " (without header)." + ", path=" + reader.getPath());
1014      }
1015    }
1016
1017    /**
1018     * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first
1019     * key/value pair.
1020     * @param newBlock the block read by {@link HFileReaderImpl#readBlock}, it's a totally new block
1021     *          with new allocated {@link ByteBuff}, so if no further reference to this block, we
1022     *          should release it carefully.
1023     */
1024    protected void updateCurrentBlock(HFileBlock newBlock) throws IOException {
1025      try {
1026        if (newBlock.getBlockType() != BlockType.DATA) {
1027          throw new IllegalStateException(
1028              "ScannerV2 works only on data blocks, got " + newBlock.getBlockType() + "; "
1029                  + "HFileName=" + reader.getPath() + ", " + "dataBlockEncoder="
1030                  + reader.getDataBlockEncoding() + ", " + "isCompaction=" + isCompaction);
1031        }
1032        updateCurrBlockRef(newBlock);
1033        blockBuffer = newBlock.getBufferWithoutHeader();
1034        readKeyValueLen();
1035      } finally {
1036        releaseIfNotCurBlock(newBlock);
1037      }
1038      // Reset the next indexed key
1039      this.nextIndexedKey = null;
1040    }
1041
1042    protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) {
1043      ByteBuff buffer = curBlock.getBufferWithoutHeader();
1044      // It is safe to manipulate this buffer because we own the buffer object.
1045      buffer.rewind();
1046      int klen = buffer.getInt();
1047      buffer.skip(Bytes.SIZEOF_INT);// Skip value len part
1048      ByteBuffer keyBuff = buffer.asSubByteBuffer(klen);
1049      if (keyBuff.hasArray()) {
1050        return new KeyValue.KeyOnlyKeyValue(keyBuff.array(), keyBuff.arrayOffset()
1051            + keyBuff.position(), klen);
1052      } else {
1053        return new ByteBufferKeyOnlyKeyValue(keyBuff, keyBuff.position(), klen);
1054      }
1055    }
1056
1057    @Override
1058    public String getKeyString() {
1059      return CellUtil.toString(getKey(), false);
1060    }
1061
1062    @Override
1063    public String getValueString() {
1064      return ByteBufferUtils.toStringBinary(getValue());
1065    }
1066
1067    public int compareKey(CellComparator comparator, Cell key) {
1068      blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, pair);
1069      this.bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), currKeyLen, rowLen);
1070      return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, this.bufBackedKeyOnlyKv);
1071    }
1072
1073    @Override
1074    public void shipped() throws IOException {
1075      this.returnBlocks(false);
1076    }
1077  }
1078
1079  @Override
1080  public Path getPath() {
1081    return path;
1082  }
1083
1084  @Override
1085  public DataBlockEncoding getDataBlockEncoding() {
1086    return dataBlockEncoder.getDataBlockEncoding();
1087  }
1088
1089  @Override
1090  public Configuration getConf() {
1091    return conf;
1092  }
1093
1094  @Override
1095  public void setConf(Configuration conf) {
1096    this.conf = conf;
1097  }
1098
1099  /** Minor versions in HFile starting with this number have hbase checksums */
1100  public static final int MINOR_VERSION_WITH_CHECKSUM = 1;
1101  /** In HFile minor version that does not support checksums */
1102  public static final int MINOR_VERSION_NO_CHECKSUM = 0;
1103
1104  /** HFile minor version that introduced pbuf filetrailer */
1105  public static final int PBUF_TRAILER_MINOR_VERSION = 2;
1106
1107  /**
1108   * The size of a (key length, value length) tuple that prefixes each entry in
1109   * a data block.
1110   */
1111  public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT;
1112
1113  /**
1114   * Retrieve block from cache. Validates the retrieved block's type vs {@code expectedBlockType}
1115   * and its encoding vs. {@code expectedDataBlockEncoding}. Unpacks the block as necessary.
1116   */
1117  private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock,
1118      boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType,
1119      DataBlockEncoding expectedDataBlockEncoding) throws IOException {
1120    // Check cache for block. If found return.
1121    BlockCache cache = cacheConf.getBlockCache().orElse(null);
1122    if (cache != null) {
1123      HFileBlock cachedBlock =
1124          (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock, updateCacheMetrics);
1125      if (cachedBlock != null) {
1126        if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) {
1127          HFileBlock compressedBlock = cachedBlock;
1128          cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
1129          // In case of compressed block after unpacking we can release the compressed block
1130          if (compressedBlock != cachedBlock) {
1131            compressedBlock.release();
1132          }
1133        }
1134        try {
1135          validateBlockType(cachedBlock, expectedBlockType);
1136        } catch (IOException e) {
1137          returnAndEvictBlock(cache, cacheKey, cachedBlock);
1138          throw e;
1139        }
1140
1141        if (expectedDataBlockEncoding == null) {
1142          return cachedBlock;
1143        }
1144        DataBlockEncoding actualDataBlockEncoding = cachedBlock.getDataBlockEncoding();
1145        // Block types other than data blocks always have
1146        // DataBlockEncoding.NONE. To avoid false negative cache misses, only
1147        // perform this check if cached block is a data block.
1148        if (cachedBlock.getBlockType().isData() &&
1149            !actualDataBlockEncoding.equals(expectedDataBlockEncoding)) {
1150          // This mismatch may happen if a Scanner, which is used for say a
1151          // compaction, tries to read an encoded block from the block cache.
1152          // The reverse might happen when an EncodedScanner tries to read
1153          // un-encoded blocks which were cached earlier.
1154          //
1155          // Because returning a data block with an implicit BlockType mismatch
1156          // will cause the requesting scanner to throw a disk read should be
1157          // forced here. This will potentially cause a significant number of
1158          // cache misses, so update so we should keep track of this as it might
1159          // justify the work on a CompoundScanner.
1160          if (!expectedDataBlockEncoding.equals(DataBlockEncoding.NONE) &&
1161              !actualDataBlockEncoding.equals(DataBlockEncoding.NONE)) {
1162            // If the block is encoded but the encoding does not match the
1163            // expected encoding it is likely the encoding was changed but the
1164            // block was not yet evicted. Evictions on file close happen async
1165            // so blocks with the old encoding still linger in cache for some
1166            // period of time. This event should be rare as it only happens on
1167            // schema definition change.
1168            LOG.info("Evicting cached block with key {} because data block encoding mismatch; " +
1169                "expected {}, actual {}, path={}", cacheKey, actualDataBlockEncoding,
1170              expectedDataBlockEncoding, path);
1171            // This is an error scenario. so here we need to release the block.
1172            returnAndEvictBlock(cache, cacheKey, cachedBlock);
1173          }
1174          return null;
1175        }
1176        return cachedBlock;
1177      }
1178    }
1179    return null;
1180  }
1181
1182  private void returnAndEvictBlock(BlockCache cache, BlockCacheKey cacheKey, Cacheable block) {
1183    block.release();
1184    cache.evictBlock(cacheKey);
1185  }
1186
1187  /**
1188   * @param cacheBlock Add block to cache, if found
1189   * @return block wrapped in a ByteBuffer, with header skipped
1190   */
1191  @Override
1192  public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock)
1193      throws IOException {
1194    if (trailer.getMetaIndexCount() == 0) {
1195      return null; // there are no meta blocks
1196    }
1197    if (metaBlockIndexReader == null) {
1198      throw new IOException(path + " meta index not loaded");
1199    }
1200
1201    byte[] mbname = Bytes.toBytes(metaBlockName);
1202    int block = metaBlockIndexReader.rootBlockContainingKey(mbname,
1203        0, mbname.length);
1204    if (block == -1) {
1205      return null;
1206    }
1207    long blockSize = metaBlockIndexReader.getRootBlockDataSize(block);
1208
1209    // Per meta key from any given file, synchronize reads for said block. This
1210    // is OK to do for meta blocks because the meta block index is always
1211    // single-level.
1212    synchronized (metaBlockIndexReader.getRootBlockKey(block)) {
1213      // Check cache for block. If found return.
1214      long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
1215      BlockCacheKey cacheKey =
1216          new BlockCacheKey(name, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META);
1217
1218      cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory());
1219      HFileBlock cachedBlock =
1220          getCachedBlock(cacheKey, cacheBlock, false, true, true, BlockType.META, null);
1221      if (cachedBlock != null) {
1222        assert cachedBlock.isUnpacked() : "Packed block leak.";
1223        // Return a distinct 'shallow copy' of the block,
1224        // so pos does not get messed by the scanner
1225        return cachedBlock;
1226      }
1227      // Cache Miss, please load.
1228
1229      HFileBlock compressedBlock =
1230          fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false, true);
1231      HFileBlock uncompressedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
1232      if (compressedBlock != uncompressedBlock) {
1233        compressedBlock.release();
1234      }
1235
1236      // Cache the block
1237      if (cacheBlock) {
1238        cacheConf.getBlockCache().ifPresent(
1239          cache -> cache.cacheBlock(cacheKey, uncompressedBlock, cacheConf.isInMemory()));
1240      }
1241      return uncompressedBlock;
1242    }
1243  }
1244
1245  /**
1246   * If expected block is data block, we'll allocate the ByteBuff of block from
1247   * {@link org.apache.hadoop.hbase.io.ByteBuffAllocator} and it's usually an off-heap one,
1248   * otherwise it will allocate from heap.
1249   * @see org.apache.hadoop.hbase.io.hfile.HFileBlock.FSReader#readBlockData(long, long, boolean,
1250   *      boolean, boolean)
1251   */
1252  private boolean shouldUseHeap(BlockType expectedBlockType) {
1253    if (!cacheConf.getBlockCache().isPresent()) {
1254      return false;
1255    } else if (!cacheConf.isCombinedBlockCache()) {
1256      // Block to cache in LruBlockCache must be an heap one. So just allocate block memory from
1257      // heap for saving an extra off-heap to heap copying.
1258      return true;
1259    }
1260    return expectedBlockType != null && !expectedBlockType.isData();
1261  }
1262
1263  @Override
1264  public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize,
1265      final boolean cacheBlock, boolean pread, final boolean isCompaction,
1266      boolean updateCacheMetrics, BlockType expectedBlockType,
1267      DataBlockEncoding expectedDataBlockEncoding)
1268      throws IOException {
1269    if (dataBlockIndexReader == null) {
1270      throw new IOException(path + " block index not loaded");
1271    }
1272    long trailerOffset = trailer.getLoadOnOpenDataOffset();
1273    if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) {
1274      throw new IOException("Requested block is out of range: " + dataBlockOffset +
1275        ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() +
1276        ", trailer.getLoadOnOpenDataOffset: " + trailerOffset +
1277        ", path=" + path);
1278    }
1279    // For any given block from any given file, synchronize reads for said
1280    // block.
1281    // Without a cache, this synchronizing is needless overhead, but really
1282    // the other choice is to duplicate work (which the cache would prevent you
1283    // from doing).
1284
1285    BlockCacheKey cacheKey = new BlockCacheKey(name, dataBlockOffset,
1286      this.isPrimaryReplicaReader(), expectedBlockType);
1287
1288    boolean useLock = false;
1289    IdLock.Entry lockEntry = null;
1290    try (TraceScope traceScope = TraceUtil.createTrace("HFileReaderImpl.readBlock")) {
1291      while (true) {
1292        // Check cache for block. If found return.
1293        if (cacheConf.shouldReadBlockFromCache(expectedBlockType)) {
1294          if (useLock) {
1295            lockEntry = offsetLock.getLockEntry(dataBlockOffset);
1296          }
1297          // Try and get the block from the block cache. If the useLock variable is true then this
1298          // is the second time through the loop and it should not be counted as a block cache miss.
1299          HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, useLock, isCompaction,
1300            updateCacheMetrics, expectedBlockType, expectedDataBlockEncoding);
1301          if (cachedBlock != null) {
1302            if (LOG.isTraceEnabled()) {
1303              LOG.trace("From Cache " + cachedBlock);
1304            }
1305            TraceUtil.addTimelineAnnotation("blockCacheHit");
1306            assert cachedBlock.isUnpacked() : "Packed block leak.";
1307            if (cachedBlock.getBlockType().isData()) {
1308              if (updateCacheMetrics) {
1309                HFile.DATABLOCK_READ_COUNT.increment();
1310              }
1311              // Validate encoding type for data blocks. We include encoding
1312              // type in the cache key, and we expect it to match on a cache hit.
1313              if (cachedBlock.getDataBlockEncoding() != dataBlockEncoder.getDataBlockEncoding()) {
1314                // Remember to release the block when in exceptional path.
1315                cacheConf.getBlockCache().ifPresent(cache -> {
1316                  returnAndEvictBlock(cache, cacheKey, cachedBlock);
1317                });
1318                throw new IOException("Cached block under key " + cacheKey + " "
1319                    + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: "
1320                    + dataBlockEncoder.getDataBlockEncoding() + "), path=" + path);
1321              }
1322            }
1323            // Cache-hit. Return!
1324            return cachedBlock;
1325          }
1326
1327          if (!useLock && cacheBlock && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) {
1328            // check cache again with lock
1329            useLock = true;
1330            continue;
1331          }
1332          // Carry on, please load.
1333        }
1334
1335        TraceUtil.addTimelineAnnotation("blockCacheMiss");
1336        // Load block from filesystem.
1337        HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread,
1338          !isCompaction, shouldUseHeap(expectedBlockType));
1339        validateBlockType(hfileBlock, expectedBlockType);
1340        HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader);
1341        BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory();
1342
1343        // Cache the block if necessary
1344        cacheConf.getBlockCache().ifPresent(cache -> {
1345          if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) {
1346            cache.cacheBlock(cacheKey,
1347              cacheConf.shouldCacheCompressed(category) ? hfileBlock : unpacked,
1348              cacheConf.isInMemory());
1349          }
1350        });
1351        if (unpacked != hfileBlock) {
1352          // End of life here if hfileBlock is an independent block.
1353          hfileBlock.release();
1354        }
1355        if (updateCacheMetrics && hfileBlock.getBlockType().isData()) {
1356          HFile.DATABLOCK_READ_COUNT.increment();
1357        }
1358
1359        return unpacked;
1360      }
1361    } finally {
1362      if (lockEntry != null) {
1363        offsetLock.releaseLockEntry(lockEntry);
1364      }
1365    }
1366  }
1367
1368  @Override
1369  public boolean hasMVCCInfo() {
1370    return fileInfo.shouldIncludeMemStoreTS() && fileInfo.isDecodeMemstoreTS();
1371  }
1372
1373  /**
1374   * Compares the actual type of a block retrieved from cache or disk with its
1375   * expected type and throws an exception in case of a mismatch. Expected
1376   * block type of {@link BlockType#DATA} is considered to match the actual
1377   * block type [@link {@link BlockType#ENCODED_DATA} as well.
1378   * @param block a block retrieved from cache or disk
1379   * @param expectedBlockType the expected block type, or null to skip the
1380   *          check
1381   */
1382  private void validateBlockType(HFileBlock block,
1383      BlockType expectedBlockType) throws IOException {
1384    if (expectedBlockType == null) {
1385      return;
1386    }
1387    BlockType actualBlockType = block.getBlockType();
1388    if (expectedBlockType.isData() && actualBlockType.isData()) {
1389      // We consider DATA to match ENCODED_DATA for the purpose of this
1390      // verification.
1391      return;
1392    }
1393    if (actualBlockType != expectedBlockType) {
1394      throw new IOException("Expected block type " + expectedBlockType + ", " +
1395          "but got " + actualBlockType + ": " + block + ", path=" + path);
1396    }
1397  }
1398
1399  /**
1400   * @return Last key as cell in the file. May be null if file has no entries. Note that
1401   *         this is not the last row key, but it is the Cell representation of the last
1402   *         key
1403   */
1404  @Override
1405  public Optional<Cell> getLastKey() {
1406    return dataBlockIndexReader.isEmpty() ? Optional.empty() :
1407        Optional.of(fileInfo.getLastKeyCell());
1408  }
1409
1410  /**
1411   * @return Midkey for this file. We work with block boundaries only so
1412   *         returned midkey is an approximation only.
1413   */
1414  @Override
1415  public Optional<Cell> midKey() throws IOException {
1416    return Optional.ofNullable(dataBlockIndexReader.midkey(this));
1417  }
1418
1419  @Override
1420  public void close() throws IOException {
1421    close(cacheConf.shouldEvictOnClose());
1422  }
1423
1424  @Override
1425  public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) {
1426    return dataBlockEncoder.getEffectiveEncodingInCache(isCompaction);
1427  }
1428
1429  /** For testing */
1430  @Override
1431  public HFileBlock.FSReader getUncachedBlockReader() {
1432    return fsBlockReader;
1433  }
1434
1435  /**
1436   * Scanner that operates on encoded data blocks.
1437   */
1438  protected static class EncodedScanner extends HFileScannerImpl {
1439    private final HFileBlockDecodingContext decodingCtx;
1440    private final DataBlockEncoder.EncodedSeeker seeker;
1441    private final DataBlockEncoder dataBlockEncoder;
1442
1443    public EncodedScanner(HFile.Reader reader, boolean cacheBlocks,
1444        boolean pread, boolean isCompaction, HFileContext meta) {
1445      super(reader, cacheBlocks, pread, isCompaction);
1446      DataBlockEncoding encoding = reader.getDataBlockEncoding();
1447      dataBlockEncoder = encoding.getEncoder();
1448      decodingCtx = dataBlockEncoder.newDataBlockDecodingContext(meta);
1449      seeker = dataBlockEncoder.createSeeker(decodingCtx);
1450    }
1451
1452    @Override
1453    public boolean isSeeked(){
1454      return curBlock != null;
1455    }
1456
1457    @Override
1458    public void setNonSeekedState() {
1459      reset();
1460    }
1461
1462    /**
1463     * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first
1464     * key/value pair.
1465     * @param newBlock the block to make current, and read by {@link HFileReaderImpl#readBlock},
1466     *          it's a totally new block with new allocated {@link ByteBuff}, so if no further
1467     *          reference to this block, we should release it carefully.
1468     */
1469    @Override
1470    protected void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException {
1471      try {
1472        // sanity checks
1473        if (newBlock.getBlockType() != BlockType.ENCODED_DATA) {
1474          throw new IllegalStateException("EncodedScanner works only on encoded data blocks");
1475        }
1476        short dataBlockEncoderId = newBlock.getDataBlockEncodingId();
1477        if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) {
1478          String encoderCls = dataBlockEncoder.getClass().getName();
1479          throw new CorruptHFileException("Encoder " + encoderCls +
1480            " doesn't support data block encoding " +
1481            DataBlockEncoding.getNameFromId(dataBlockEncoderId) + ",path=" + reader.getPath());
1482        }
1483        updateCurrBlockRef(newBlock);
1484        ByteBuff encodedBuffer = getEncodedBuffer(newBlock);
1485        seeker.setCurrentBuffer(encodedBuffer);
1486      } finally {
1487        releaseIfNotCurBlock(newBlock);
1488      }
1489      // Reset the next indexed key
1490      this.nextIndexedKey = null;
1491    }
1492
1493    private ByteBuff getEncodedBuffer(HFileBlock newBlock) {
1494      ByteBuff origBlock = newBlock.getBufferReadOnly();
1495      int pos = newBlock.headerSize() + DataBlockEncoding.ID_SIZE;
1496      origBlock.position(pos);
1497      origBlock
1498          .limit(pos + newBlock.getUncompressedSizeWithoutHeader() - DataBlockEncoding.ID_SIZE);
1499      return origBlock.slice();
1500    }
1501
1502    @Override
1503    protected boolean processFirstDataBlock() throws IOException {
1504      seeker.rewind();
1505      return true;
1506    }
1507
1508    @Override
1509    public boolean next() throws IOException {
1510      boolean isValid = seeker.next();
1511      if (!isValid) {
1512        HFileBlock newBlock = readNextDataBlock();
1513        isValid = newBlock != null;
1514        if (isValid) {
1515          updateCurrentBlock(newBlock);
1516        } else {
1517          setNonSeekedState();
1518        }
1519      }
1520      return isValid;
1521    }
1522
1523    @Override
1524    public Cell getKey() {
1525      assertValidSeek();
1526      return seeker.getKey();
1527    }
1528
1529    @Override
1530    public ByteBuffer getValue() {
1531      assertValidSeek();
1532      return seeker.getValueShallowCopy();
1533    }
1534
1535    @Override
1536    public Cell getCell() {
1537      if (this.curBlock == null) {
1538        return null;
1539      }
1540      return seeker.getCell();
1541    }
1542
1543    @Override
1544    public String getKeyString() {
1545      return CellUtil.toString(getKey(), true);
1546    }
1547
1548    @Override
1549    public String getValueString() {
1550      ByteBuffer valueBuffer = getValue();
1551      return ByteBufferUtils.toStringBinary(valueBuffer);
1552    }
1553
1554    private void assertValidSeek() {
1555      if (this.curBlock == null) {
1556        throw new NotSeekedException(reader.getPath());
1557      }
1558    }
1559
1560    @Override
1561    protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) {
1562      return dataBlockEncoder.getFirstKeyCellInBlock(getEncodedBuffer(curBlock));
1563    }
1564
1565    @Override
1566    protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
1567        boolean rewind, Cell key, boolean seekBefore) throws IOException {
1568      if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
1569        updateCurrentBlock(seekToBlock);
1570      } else if (rewind) {
1571        seeker.rewind();
1572      }
1573      this.nextIndexedKey = nextIndexedKey;
1574      return seeker.seekToKeyInBlock(key, seekBefore);
1575    }
1576
1577    @Override
1578    public int compareKey(CellComparator comparator, Cell key) {
1579      return seeker.compareKey(comparator, key);
1580    }
1581  }
1582
1583  /**
1584   * Returns a buffer with the Bloom filter metadata. The caller takes
1585   * ownership of the buffer.
1586   */
1587  @Override
1588  public DataInput getGeneralBloomFilterMetadata() throws IOException {
1589    return this.getBloomFilterMetadata(BlockType.GENERAL_BLOOM_META);
1590  }
1591
1592  @Override
1593  public DataInput getDeleteBloomFilterMetadata() throws IOException {
1594    return this.getBloomFilterMetadata(BlockType.DELETE_FAMILY_BLOOM_META);
1595  }
1596
1597  private DataInput getBloomFilterMetadata(BlockType blockType)
1598      throws IOException {
1599    if (blockType != BlockType.GENERAL_BLOOM_META &&
1600        blockType != BlockType.DELETE_FAMILY_BLOOM_META) {
1601      throw new RuntimeException("Block Type: " + blockType.toString() +
1602          " is not supported, path=" + path) ;
1603    }
1604
1605    for (HFileBlock b : fileInfo.getLoadOnOpenBlocks()) {
1606      if (b.getBlockType() == blockType) {
1607        return b.getByteStream();
1608      }
1609    }
1610    return null;
1611  }
1612
1613  public boolean isFileInfoLoaded() {
1614    return true; // We load file info in constructor in version 2.
1615  }
1616
1617  @Override
1618  public HFileContext getFileContext() {
1619    return hfileContext;
1620  }
1621
1622  /**
1623   * Returns false if block prefetching was requested for this file and has
1624   * not completed, true otherwise
1625   */
1626  @Override
1627  public boolean prefetchComplete() {
1628    return PrefetchExecutor.isCompleted(path);
1629  }
1630
1631  /**
1632   * Create a Scanner on this file. No seeks or reads are done on creation. Call
1633   * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is
1634   * nothing to clean up in a Scanner. Letting go of your references to the
1635   * scanner is sufficient. NOTE: Do not use this overload of getScanner for
1636   * compactions. See {@link #getScanner(boolean, boolean, boolean)}
1637   *
1638   * @param cacheBlocks True if we should cache blocks read in by this scanner.
1639   * @param pread Use positional read rather than seek+read if true (pread is
1640   *          better for random reads, seek+read is better scanning).
1641   * @return Scanner on this file.
1642   */
1643  @Override
1644  public HFileScanner getScanner(boolean cacheBlocks, final boolean pread) {
1645    return getScanner(cacheBlocks, pread, false);
1646  }
1647
1648  /**
1649   * Create a Scanner on this file. No seeks or reads are done on creation. Call
1650   * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is
1651   * nothing to clean up in a Scanner. Letting go of your references to the
1652   * scanner is sufficient.
1653   * @param cacheBlocks
1654   *          True if we should cache blocks read in by this scanner.
1655   * @param pread
1656   *          Use positional read rather than seek+read if true (pread is better
1657   *          for random reads, seek+read is better scanning).
1658   * @param isCompaction
1659   *          is scanner being used for a compaction?
1660   * @return Scanner on this file.
1661   */
1662  @Override
1663  public HFileScanner getScanner(boolean cacheBlocks, final boolean pread,
1664      final boolean isCompaction) {
1665    if (dataBlockEncoder.useEncodedScanner()) {
1666      return new EncodedScanner(this, cacheBlocks, pread, isCompaction, this.hfileContext);
1667    }
1668    return new HFileScannerImpl(this, cacheBlocks, pread, isCompaction);
1669  }
1670
1671  public int getMajorVersion() {
1672    return 3;
1673  }
1674
1675  @Override
1676  public void unbufferStream() {
1677    fsBlockReader.unbufferStream();
1678  }
1679}