001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.apache.hadoop.hbase.trace.HBaseSemanticAttributes.BLOCK_CACHE_KEY_KEY; 021 022import io.opentelemetry.api.common.Attributes; 023import io.opentelemetry.api.trace.Span; 024import java.io.DataInput; 025import java.io.IOException; 026import java.nio.ByteBuffer; 027import java.util.ArrayList; 028import java.util.Optional; 029import java.util.function.IntConsumer; 030import org.apache.hadoop.conf.Configurable; 031import org.apache.hadoop.conf.Configuration; 032import org.apache.hadoop.fs.Path; 033import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue; 034import org.apache.hadoop.hbase.Cell; 035import org.apache.hadoop.hbase.CellComparator; 036import org.apache.hadoop.hbase.CellUtil; 037import org.apache.hadoop.hbase.ExtendedCell; 038import org.apache.hadoop.hbase.HConstants; 039import org.apache.hadoop.hbase.KeyValue; 040import org.apache.hadoop.hbase.PrivateCellUtil; 041import org.apache.hadoop.hbase.SizeCachedByteBufferKeyValue; 042import org.apache.hadoop.hbase.SizeCachedKeyValue; 043import org.apache.hadoop.hbase.SizeCachedNoTagsByteBufferKeyValue; 044import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue; 045import org.apache.hadoop.hbase.io.compress.Compression; 046import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; 047import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 048import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; 049import org.apache.hadoop.hbase.nio.ByteBuff; 050import org.apache.hadoop.hbase.regionserver.KeyValueScanner; 051import org.apache.hadoop.hbase.util.ByteBufferUtils; 052import org.apache.hadoop.hbase.util.Bytes; 053import org.apache.hadoop.hbase.util.IdLock; 054import org.apache.hadoop.hbase.util.ObjectIntPair; 055import org.apache.hadoop.io.WritableUtils; 056import org.apache.yetus.audience.InterfaceAudience; 057import org.slf4j.Logger; 058import org.slf4j.LoggerFactory; 059 060/** 061 * Implementation that can handle all hfile versions of {@link HFile.Reader}. 062 */ 063@InterfaceAudience.Private 064@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD") 065public abstract class HFileReaderImpl implements HFile.Reader, Configurable { 066 // This class is HFileReaderV3 + HFileReaderV2 + AbstractHFileReader all squashed together into 067 // one file. Ditto for all the HFileReader.ScannerV? implementations. I was running up against 068 // the MaxInlineLevel limit because too many tiers involved reading from an hfile. Was also hard 069 // to navigate the source code when so many classes participating in read. 070 private static final Logger LOG = LoggerFactory.getLogger(HFileReaderImpl.class); 071 072 /** Data block index reader keeping the root data index in memory */ 073 protected HFileBlockIndex.CellBasedKeyBlockIndexReader dataBlockIndexReader; 074 075 /** Meta block index reader -- always single level */ 076 protected HFileBlockIndex.ByteArrayKeyBlockIndexReader metaBlockIndexReader; 077 078 protected FixedFileTrailer trailer; 079 080 private final boolean primaryReplicaReader; 081 082 /** 083 * What kind of data block encoding should be used while reading, writing, and handling cache. 084 */ 085 protected HFileDataBlockEncoder dataBlockEncoder = NoOpDataBlockEncoder.INSTANCE; 086 087 /** Block cache configuration. */ 088 protected final CacheConfig cacheConf; 089 090 protected ReaderContext context; 091 092 protected final HFileInfo fileInfo; 093 094 /** Path of file */ 095 protected final Path path; 096 097 /** File name to be used for block names */ 098 protected final String name; 099 100 private Configuration conf; 101 102 protected HFileContext hfileContext; 103 104 /** Filesystem-level block reader. */ 105 protected HFileBlock.FSReader fsBlockReader; 106 107 /** 108 * A "sparse lock" implementation allowing to lock on a particular block identified by offset. The 109 * purpose of this is to avoid two clients loading the same block, and have all but one client 110 * wait to get the block from the cache. 111 */ 112 private IdLock offsetLock = new IdLock(); 113 114 /** Minimum minor version supported by this HFile format */ 115 static final int MIN_MINOR_VERSION = 0; 116 117 /** Maximum minor version supported by this HFile format */ 118 // We went to version 2 when we moved to pb'ing fileinfo and the trailer on 119 // the file. This version can read Writables version 1. 120 static final int MAX_MINOR_VERSION = 3; 121 122 /** Minor versions starting with this number have faked index key */ 123 static final int MINOR_VERSION_WITH_FAKED_KEY = 3; 124 125 /** 126 * Opens a HFile. 127 * @param context Reader context info 128 * @param fileInfo HFile info 129 * @param cacheConf Cache configuration. 130 * @param conf Configuration 131 */ 132 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD") 133 public HFileReaderImpl(ReaderContext context, HFileInfo fileInfo, CacheConfig cacheConf, 134 Configuration conf) throws IOException { 135 this.cacheConf = cacheConf; 136 this.context = context; 137 this.path = context.getFilePath(); 138 this.name = path.getName(); 139 this.conf = conf; 140 this.primaryReplicaReader = context.isPrimaryReplicaReader(); 141 this.fileInfo = fileInfo; 142 this.trailer = fileInfo.getTrailer(); 143 this.hfileContext = fileInfo.getHFileContext(); 144 this.fsBlockReader = 145 new HFileBlock.FSReaderImpl(context, hfileContext, cacheConf.getByteBuffAllocator(), conf); 146 this.dataBlockEncoder = HFileDataBlockEncoderImpl.createFromFileInfo(fileInfo); 147 fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf); 148 dataBlockIndexReader = fileInfo.getDataBlockIndexReader(); 149 metaBlockIndexReader = fileInfo.getMetaBlockIndexReader(); 150 } 151 152 @SuppressWarnings("serial") 153 public static class BlockIndexNotLoadedException extends IllegalStateException { 154 public BlockIndexNotLoadedException(Path path) { 155 // Add a message in case anyone relies on it as opposed to class name. 156 super(path + " block index not loaded"); 157 } 158 } 159 160 public CacheConfig getCacheConf() { 161 return cacheConf; 162 } 163 164 private Optional<String> toStringFirstKey() { 165 return getFirstKey().map(CellUtil::getCellKeyAsString); 166 } 167 168 private Optional<String> toStringLastKey() { 169 return getLastKey().map(CellUtil::getCellKeyAsString); 170 } 171 172 @Override 173 public String toString() { 174 return "reader=" + path.toString() 175 + (!isFileInfoLoaded() 176 ? "" 177 : ", compression=" + trailer.getCompressionCodec().getName() + ", cacheConf=" + cacheConf 178 + ", firstKey=" + toStringFirstKey() + ", lastKey=" + toStringLastKey()) 179 + ", avgKeyLen=" + fileInfo.getAvgKeyLen() + ", avgValueLen=" + fileInfo.getAvgValueLen() 180 + ", entries=" + trailer.getEntryCount() + ", length=" + context.getFileSize(); 181 } 182 183 @Override 184 public long length() { 185 return context.getFileSize(); 186 } 187 188 /** 189 * @return the first key in the file. May be null if file has no entries. Note that this is not 190 * the first row key, but rather the byte form of the first KeyValue. 191 */ 192 @Override 193 public Optional<ExtendedCell> getFirstKey() { 194 if (dataBlockIndexReader == null) { 195 throw new BlockIndexNotLoadedException(path); 196 } 197 return dataBlockIndexReader.isEmpty() 198 ? Optional.empty() 199 : Optional.of(dataBlockIndexReader.getRootBlockKey(0)); 200 } 201 202 /** 203 * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's patch goes in to 204 * eliminate {@link KeyValue} here. 205 * @return the first row key, or null if the file is empty. 206 */ 207 @Override 208 public Optional<byte[]> getFirstRowKey() { 209 // We have to copy the row part to form the row key alone 210 return getFirstKey().map(CellUtil::cloneRow); 211 } 212 213 /** 214 * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's patch goes in to 215 * eliminate {@link KeyValue} here. 216 * @return the last row key, or null if the file is empty. 217 */ 218 @Override 219 public Optional<byte[]> getLastRowKey() { 220 // We have to copy the row part to form the row key alone 221 return getLastKey().map(CellUtil::cloneRow); 222 } 223 224 /** Returns number of KV entries in this HFile */ 225 @Override 226 public long getEntries() { 227 return trailer.getEntryCount(); 228 } 229 230 /** Returns comparator */ 231 @Override 232 public CellComparator getComparator() { 233 return this.hfileContext.getCellComparator(); 234 } 235 236 public Compression.Algorithm getCompressionAlgorithm() { 237 return trailer.getCompressionCodec(); 238 } 239 240 /** 241 * @return the total heap size of data and meta block indexes in bytes. Does not take into account 242 * non-root blocks of a multilevel data index. 243 */ 244 @Override 245 public long indexSize() { 246 return (dataBlockIndexReader != null ? dataBlockIndexReader.heapSize() : 0) 247 + ((metaBlockIndexReader != null) ? metaBlockIndexReader.heapSize() : 0); 248 } 249 250 @Override 251 public String getName() { 252 return name; 253 } 254 255 @Override 256 public void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder) { 257 this.dataBlockEncoder = dataBlockEncoder; 258 this.fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf); 259 } 260 261 @Override 262 public void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader) { 263 this.dataBlockIndexReader = reader; 264 } 265 266 @Override 267 public HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader() { 268 return dataBlockIndexReader; 269 } 270 271 @Override 272 public void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader) { 273 this.metaBlockIndexReader = reader; 274 } 275 276 @Override 277 public HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader() { 278 return metaBlockIndexReader; 279 } 280 281 @Override 282 public FixedFileTrailer getTrailer() { 283 return trailer; 284 } 285 286 @Override 287 public ReaderContext getContext() { 288 return this.context; 289 } 290 291 @Override 292 public HFileInfo getHFileInfo() { 293 return this.fileInfo; 294 } 295 296 @Override 297 public boolean isPrimaryReplicaReader() { 298 return primaryReplicaReader; 299 } 300 301 /** 302 * An exception thrown when an operation requiring a scanner to be seeked is invoked on a scanner 303 * that is not seeked. 304 */ 305 @SuppressWarnings("serial") 306 public static class NotSeekedException extends IllegalStateException { 307 public NotSeekedException(Path path) { 308 super(path + " not seeked to a key/value"); 309 } 310 } 311 312 public static class HFileScannerImpl implements HFileScanner { 313 private ByteBuff blockBuffer; 314 protected final boolean cacheBlocks; 315 protected final boolean pread; 316 protected final boolean isCompaction; 317 private int currKeyLen; 318 private int currValueLen; 319 private int currMemstoreTSLen; 320 private long currMemstoreTS; 321 protected final HFile.Reader reader; 322 private int currTagsLen; 323 private short rowLen; 324 // buffer backed keyonlyKV 325 private ByteBufferKeyOnlyKeyValue bufBackedKeyOnlyKv = new ByteBufferKeyOnlyKeyValue(); 326 // A pair for reusing in blockSeek() so that we don't garbage lot of objects 327 final ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>(); 328 329 /** 330 * The next indexed key is to keep track of the indexed key of the next data block. If the 331 * nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the current data block is the 332 * last data block. If the nextIndexedKey is null, it means the nextIndexedKey has not been 333 * loaded yet. 334 */ 335 protected ExtendedCell nextIndexedKey; 336 337 // Current block being used. NOTICE: DON't release curBlock separately except in shipped() or 338 // close() methods. Because the shipped() or close() will do the release finally, even if any 339 // exception occur the curBlock will be released by the close() method (see 340 // RegionScannerImpl#handleException). Call the releaseIfNotCurBlock() to release the 341 // unreferenced block please. 342 protected HFileBlock curBlock; 343 // Whether we returned a result for curBlock's size in recordBlockSize(). 344 // gets reset whenever curBlock is changed. 345 private boolean providedCurrentBlockSize = false; 346 347 public HFileBlock getCurBlock() { 348 return curBlock; 349 } 350 351 // Previous blocks that were used in the course of the read 352 protected final ArrayList<HFileBlock> prevBlocks = new ArrayList<>(); 353 354 public HFileScannerImpl(final HFile.Reader reader, final boolean cacheBlocks, 355 final boolean pread, final boolean isCompaction) { 356 this.reader = reader; 357 this.cacheBlocks = cacheBlocks; 358 this.pread = pread; 359 this.isCompaction = isCompaction; 360 } 361 362 void updateCurrBlockRef(HFileBlock block) { 363 if (block != null && curBlock != null && block.getOffset() == curBlock.getOffset()) { 364 return; 365 } 366 if (this.curBlock != null && this.curBlock.isSharedMem()) { 367 prevBlocks.add(this.curBlock); 368 } 369 this.curBlock = block; 370 this.providedCurrentBlockSize = false; 371 } 372 373 void reset() { 374 // We don't have to keep ref to heap block 375 if (this.curBlock != null && this.curBlock.isSharedMem()) { 376 this.prevBlocks.add(this.curBlock); 377 } 378 this.curBlock = null; 379 } 380 381 private void returnBlocks(boolean returnAll) { 382 this.prevBlocks.forEach(HFileBlock::release); 383 this.prevBlocks.clear(); 384 if (returnAll && this.curBlock != null) { 385 this.curBlock.release(); 386 this.curBlock = null; 387 } 388 } 389 390 @Override 391 public boolean isSeeked() { 392 return blockBuffer != null; 393 } 394 395 @Override 396 public String toString() { 397 return "HFileScanner for reader " + String.valueOf(getReader()); 398 } 399 400 protected void assertSeeked() { 401 if (!isSeeked()) { 402 throw new NotSeekedException(reader.getPath()); 403 } 404 } 405 406 @Override 407 public HFile.Reader getReader() { 408 return reader; 409 } 410 411 // From non encoded HFiles, we always read back KeyValue or its descendant.(Note: When HFile 412 // block is in DBB, it will be OffheapKV). So all parts of the Cell is in a contiguous 413 // array/buffer. How many bytes we should wrap to make the KV is what this method returns. 414 private int getKVBufSize() { 415 int kvBufSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen; 416 if (currTagsLen > 0) { 417 kvBufSize += Bytes.SIZEOF_SHORT + currTagsLen; 418 } 419 return kvBufSize; 420 } 421 422 @Override 423 public void close() { 424 if (!pread) { 425 // For seek + pread stream socket should be closed when the scanner is closed. HBASE-9393 426 reader.unbufferStream(); 427 } 428 this.returnBlocks(true); 429 } 430 431 @Override 432 public void recordBlockSize(IntConsumer blockSizeConsumer) { 433 if (!providedCurrentBlockSize && curBlock != null) { 434 providedCurrentBlockSize = true; 435 blockSizeConsumer.accept(curBlock.getUncompressedSizeWithoutHeader()); 436 } 437 } 438 439 // Returns the #bytes in HFile for the current cell. Used to skip these many bytes in current 440 // HFile block's buffer so as to position to the next cell. 441 private int getCurCellSerializedSize() { 442 int curCellSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen + currMemstoreTSLen; 443 if (this.reader.getFileContext().isIncludesTags()) { 444 curCellSize += Bytes.SIZEOF_SHORT + currTagsLen; 445 } 446 return curCellSize; 447 } 448 449 protected void readKeyValueLen() { 450 // This is a hot method. We go out of our way to make this method short so it can be 451 // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves 452 // because it is faster than going via range-checked ByteBuffer methods or going through a 453 // byte buffer array a byte at a time. 454 // Get a long at a time rather than read two individual ints. In micro-benchmarking, even 455 // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints. 456 // Trying to imitate what was done - need to profile if this is better or 457 // earlier way is better by doing mark and reset? 458 // But ensure that you read long instead of two ints 459 long ll = blockBuffer.getLongAfterPosition(0); 460 // Read top half as an int of key length and bottom int as value length 461 this.currKeyLen = (int) (ll >> Integer.SIZE); 462 this.currValueLen = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll); 463 checkKeyValueLen(); 464 this.rowLen = blockBuffer.getShortAfterPosition(Bytes.SIZEOF_LONG); 465 // Move position past the key and value lengths and then beyond the key and value 466 int p = (Bytes.SIZEOF_LONG + currKeyLen + currValueLen); 467 if (reader.getFileContext().isIncludesTags()) { 468 // Tags length is a short. 469 this.currTagsLen = blockBuffer.getShortAfterPosition(p); 470 checkTagsLen(); 471 p += (Bytes.SIZEOF_SHORT + currTagsLen); 472 } 473 readMvccVersion(p); 474 } 475 476 private final void checkTagsLen() { 477 if (checkLen(this.currTagsLen)) { 478 throw new IllegalStateException( 479 "Invalid currTagsLen " + this.currTagsLen + ". Block offset: " + curBlock.getOffset() 480 + ", block length: " + this.blockBuffer.limit() + ", position: " 481 + this.blockBuffer.position() + " (without header)." + " path=" + reader.getPath()); 482 } 483 } 484 485 /** 486 * Read mvcc. Does checks to see if we even need to read the mvcc at all. 487 */ 488 protected void readMvccVersion(final int offsetFromPos) { 489 // See if we even need to decode mvcc. 490 if (!this.reader.getHFileInfo().shouldIncludeMemStoreTS()) { 491 return; 492 } 493 if (!this.reader.getHFileInfo().isDecodeMemstoreTS()) { 494 currMemstoreTS = 0; 495 currMemstoreTSLen = 1; 496 return; 497 } 498 _readMvccVersion(offsetFromPos); 499 } 500 501 /** 502 * Actually do the mvcc read. Does no checks. 503 */ 504 private void _readMvccVersion(int offsetFromPos) { 505 // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e. 506 // previous if one-byte vint, we'd redo the vint call to find int size. 507 // Also the method is kept small so can be inlined. 508 byte firstByte = blockBuffer.getByteAfterPosition(offsetFromPos); 509 int len = WritableUtils.decodeVIntSize(firstByte); 510 if (len == 1) { 511 this.currMemstoreTS = firstByte; 512 } else { 513 int remaining = len - 1; 514 long i = 0; 515 offsetFromPos++; 516 if (remaining >= Bytes.SIZEOF_INT) { 517 // The int read has to be converted to unsigned long so the & op 518 i = (blockBuffer.getIntAfterPosition(offsetFromPos) & 0x00000000ffffffffL); 519 remaining -= Bytes.SIZEOF_INT; 520 offsetFromPos += Bytes.SIZEOF_INT; 521 } 522 if (remaining >= Bytes.SIZEOF_SHORT) { 523 short s = blockBuffer.getShortAfterPosition(offsetFromPos); 524 i = i << 16; 525 i = i | (s & 0xFFFF); 526 remaining -= Bytes.SIZEOF_SHORT; 527 offsetFromPos += Bytes.SIZEOF_SHORT; 528 } 529 for (int idx = 0; idx < remaining; idx++) { 530 byte b = blockBuffer.getByteAfterPosition(offsetFromPos + idx); 531 i = i << 8; 532 i = i | (b & 0xFF); 533 } 534 currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); 535 } 536 this.currMemstoreTSLen = len; 537 } 538 539 /** 540 * Within a loaded block, seek looking for the last key that is smaller than (or equal to?) the 541 * key we are interested in. A note on the seekBefore: if you have seekBefore = true, AND the 542 * first key in the block = key, then you'll get thrown exceptions. The caller has to check for 543 * that case and load the previous block as appropriate. the key to find find the key before the 544 * given key in case of exact match. 545 * @return 0 in case of an exact key match, 1 in case of an inexact match, -2 in case of an 546 * inexact match and furthermore, the input key less than the first key of current 547 * block(e.g. using a faked index key) 548 */ 549 protected int blockSeek(Cell key, boolean seekBefore) { 550 int klen, vlen, tlen = 0; 551 int lastKeyValueSize = -1; 552 int offsetFromPos; 553 do { 554 offsetFromPos = 0; 555 // Better to ensure that we use the BB Utils here 556 long ll = blockBuffer.getLongAfterPosition(offsetFromPos); 557 klen = (int) (ll >> Integer.SIZE); 558 vlen = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll); 559 if (checkKeyLen(klen) || checkLen(vlen)) { 560 throw new IllegalStateException( 561 "Invalid klen " + klen + " or vlen " + vlen + ". Block offset: " + curBlock.getOffset() 562 + ", block length: " + blockBuffer.limit() + ", position: " + blockBuffer.position() 563 + " (without header)." + " path=" + reader.getPath()); 564 } 565 offsetFromPos += Bytes.SIZEOF_LONG; 566 this.rowLen = blockBuffer.getShortAfterPosition(offsetFromPos); 567 blockBuffer.asSubByteBuffer(blockBuffer.position() + offsetFromPos, klen, pair); 568 bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), klen, rowLen); 569 int comp = 570 PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, bufBackedKeyOnlyKv); 571 offsetFromPos += klen + vlen; 572 if (this.reader.getFileContext().isIncludesTags()) { 573 // Read short as unsigned, high byte first 574 tlen = ((blockBuffer.getByteAfterPosition(offsetFromPos) & 0xff) << 8) 575 ^ (blockBuffer.getByteAfterPosition(offsetFromPos + 1) & 0xff); 576 if (checkLen(tlen)) { 577 throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: " 578 + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " 579 + blockBuffer.position() + " (without header)." + " path=" + reader.getPath()); 580 } 581 // add the two bytes read for the tags. 582 offsetFromPos += tlen + (Bytes.SIZEOF_SHORT); 583 } 584 if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) { 585 // Directly read the mvcc based on current position 586 readMvccVersion(offsetFromPos); 587 } 588 if (comp == 0) { 589 if (seekBefore) { 590 if (lastKeyValueSize < 0) { 591 throw new IllegalStateException("blockSeek with seekBefore " 592 + "at the first key of the block: key=" + CellUtil.getCellKeyAsString(key) 593 + ", blockOffset=" + curBlock.getOffset() + ", onDiskSize=" 594 + curBlock.getOnDiskSizeWithHeader() + ", path=" + reader.getPath()); 595 } 596 blockBuffer.moveBack(lastKeyValueSize); 597 readKeyValueLen(); 598 return 1; // non exact match. 599 } 600 currKeyLen = klen; 601 currValueLen = vlen; 602 currTagsLen = tlen; 603 return 0; // indicate exact match 604 } else if (comp < 0) { 605 if (lastKeyValueSize > 0) { 606 blockBuffer.moveBack(lastKeyValueSize); 607 } 608 readKeyValueLen(); 609 if (lastKeyValueSize == -1 && blockBuffer.position() == 0) { 610 return HConstants.INDEX_KEY_MAGIC; 611 } 612 return 1; 613 } 614 // The size of this key/value tuple, including key/value length fields. 615 lastKeyValueSize = klen + vlen + currMemstoreTSLen + KEY_VALUE_LEN_SIZE; 616 // include tag length also if tags included with KV 617 if (reader.getFileContext().isIncludesTags()) { 618 lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT; 619 } 620 blockBuffer.skip(lastKeyValueSize); 621 } while (blockBuffer.hasRemaining()); 622 623 // Seek to the last key we successfully read. This will happen if this is 624 // the last key/value pair in the file, in which case the following call 625 // to next() has to return false. 626 blockBuffer.moveBack(lastKeyValueSize); 627 readKeyValueLen(); 628 return 1; // didn't exactly find it. 629 } 630 631 @Override 632 public ExtendedCell getNextIndexedKey() { 633 return nextIndexedKey; 634 } 635 636 @Override 637 public int seekTo(ExtendedCell key) throws IOException { 638 return seekTo(key, true); 639 } 640 641 @Override 642 public int reseekTo(ExtendedCell key) throws IOException { 643 int compared; 644 if (isSeeked()) { 645 compared = compareKey(reader.getComparator(), key); 646 if (compared < 1) { 647 // If the required key is less than or equal to current key, then 648 // don't do anything. 649 return compared; 650 } else { 651 // The comparison with no_next_index_key has to be checked 652 if ( 653 this.nextIndexedKey != null && (this.nextIndexedKey 654 == KeyValueScanner.NO_NEXT_INDEXED_KEY 655 || PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, nextIndexedKey) 656 < 0) 657 ) { 658 // The reader shall continue to scan the current data block instead 659 // of querying the 660 // block index as long as it knows the target key is strictly 661 // smaller than 662 // the next indexed key or the current data block is the last data 663 // block. 664 return loadBlockAndSeekToKey(this.curBlock, nextIndexedKey, false, key, false); 665 } 666 } 667 } 668 // Don't rewind on a reseek operation, because reseek implies that we are 669 // always going forward in the file. 670 return seekTo(key, false); 671 } 672 673 /** 674 * An internal API function. Seek to the given key, optionally rewinding to the first key of the 675 * block before doing the seek. 676 * @param key - a cell representing the key that we need to fetch 677 * @param rewind whether to rewind to the first key of the block before doing the seek. If this 678 * is false, we are assuming we never go back, otherwise the result is undefined. 679 * @return -1 if the key is earlier than the first key of the file, 0 if we are at the given 680 * key, 1 if we are past the given key -2 if the key is earlier than the first key of 681 * the file while using a faked index key 682 */ 683 public int seekTo(ExtendedCell key, boolean rewind) throws IOException { 684 HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader(); 685 BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, curBlock, 686 cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding(), reader); 687 if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) { 688 // This happens if the key e.g. falls before the beginning of the file. 689 return -1; 690 } 691 return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(), 692 blockWithScanInfo.getNextIndexedKey(), rewind, key, false); 693 } 694 695 @Override 696 public boolean seekBefore(ExtendedCell key) throws IOException { 697 HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, curBlock, 698 cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction), reader); 699 if (seekToBlock == null) { 700 return false; 701 } 702 ExtendedCell firstKey = getFirstKeyCellInBlock(seekToBlock); 703 if (PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), firstKey, key) >= 0) { 704 long previousBlockOffset = seekToBlock.getPrevBlockOffset(); 705 // The key we are interested in 706 if (previousBlockOffset == -1) { 707 // we have a 'problem', the key we want is the first of the file. 708 releaseIfNotCurBlock(seekToBlock); 709 return false; 710 } 711 712 // The first key in the current block 'seekToBlock' is greater than the given 713 // seekBefore key. We will go ahead by reading the next block that satisfies the 714 // given key. Return the current block before reading the next one. 715 releaseIfNotCurBlock(seekToBlock); 716 // It is important that we compute and pass onDiskSize to the block 717 // reader so that it does not have to read the header separately to 718 // figure out the size. Currently, we do not have a way to do this 719 // correctly in the general case however. 720 // TODO: See https://issues.apache.org/jira/browse/HBASE-14576 721 int prevBlockSize = -1; 722 seekToBlock = reader.readBlock(previousBlockOffset, prevBlockSize, cacheBlocks, pread, 723 isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); 724 // TODO shortcut: seek forward in this block to the last key of the 725 // block. 726 } 727 loadBlockAndSeekToKey(seekToBlock, firstKey, true, key, true); 728 return true; 729 } 730 731 /** 732 * The curBlock will be released by shipping or close method, so only need to consider releasing 733 * the block, which was read from HFile before and not referenced by curBlock. 734 */ 735 protected void releaseIfNotCurBlock(HFileBlock block) { 736 if (curBlock != block) { 737 block.release(); 738 } 739 } 740 741 /** 742 * Scans blocks in the "scanned" section of the {@link HFile} until the next data block is 743 * found. 744 * @return the next block, or null if there are no more data blocks 745 */ 746 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH", 747 justification = "Yeah, unnecessary null check; could do w/ clean up") 748 protected HFileBlock readNextDataBlock() throws IOException { 749 long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); 750 if (curBlock == null) { 751 return null; 752 } 753 HFileBlock block = this.curBlock; 754 do { 755 if (block.getOffset() >= lastDataBlockOffset) { 756 releaseIfNotCurBlock(block); 757 return null; 758 } 759 if (block.getOffset() < 0) { 760 releaseIfNotCurBlock(block); 761 throw new IOException("Invalid block offset: " + block + ", path=" + reader.getPath()); 762 } 763 // We are reading the next block without block type validation, because 764 // it might turn out to be a non-data block. 765 block = reader.readBlock(block.getOffset() + block.getOnDiskSizeWithHeader(), 766 block.getNextBlockOnDiskSize(), cacheBlocks, pread, isCompaction, true, null, 767 getEffectiveDataBlockEncoding()); 768 if (block != null && !block.getBlockType().isData()) { 769 // Whatever block we read we will be returning it unless 770 // it is a datablock. Just in case the blocks are non data blocks 771 block.release(); 772 } 773 } while (!block.getBlockType().isData()); 774 return block; 775 } 776 777 public DataBlockEncoding getEffectiveDataBlockEncoding() { 778 return this.reader.getEffectiveEncodingInCache(isCompaction); 779 } 780 781 @Override 782 public ExtendedCell getCell() { 783 if (!isSeeked()) { 784 return null; 785 } 786 787 ExtendedCell ret; 788 int cellBufSize = getKVBufSize(); 789 long seqId = 0L; 790 if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) { 791 seqId = currMemstoreTS; 792 } 793 if (blockBuffer.hasArray()) { 794 // TODO : reduce the varieties of KV here. Check if based on a boolean 795 // we can handle the 'no tags' case. 796 if (currTagsLen > 0) { 797 ret = new SizeCachedKeyValue(blockBuffer.array(), 798 blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen, 799 rowLen); 800 } else { 801 ret = new SizeCachedNoTagsKeyValue(blockBuffer.array(), 802 blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen, 803 rowLen); 804 } 805 } else { 806 ByteBuffer buf = blockBuffer.asSubByteBuffer(cellBufSize); 807 if (buf.isDirect()) { 808 ret = currTagsLen > 0 809 ? new SizeCachedByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId, currKeyLen, 810 rowLen) 811 : new SizeCachedNoTagsByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId, 812 currKeyLen, rowLen); 813 } else { 814 if (currTagsLen > 0) { 815 ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + buf.position(), 816 cellBufSize, seqId, currKeyLen, rowLen); 817 } else { 818 ret = new SizeCachedNoTagsKeyValue(buf.array(), buf.arrayOffset() + buf.position(), 819 cellBufSize, seqId, currKeyLen, rowLen); 820 } 821 } 822 } 823 return ret; 824 } 825 826 @Override 827 public ExtendedCell getKey() { 828 assertSeeked(); 829 // Create a new object so that this getKey is cached as firstKey, lastKey 830 ObjectIntPair<ByteBuffer> keyPair = new ObjectIntPair<>(); 831 blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, keyPair); 832 ByteBuffer keyBuf = keyPair.getFirst(); 833 if (keyBuf.hasArray()) { 834 return new KeyValue.KeyOnlyKeyValue(keyBuf.array(), 835 keyBuf.arrayOffset() + keyPair.getSecond(), currKeyLen); 836 } else { 837 // Better to do a copy here instead of holding on to this BB so that 838 // we could release the blocks referring to this key. This key is specifically used 839 // in HalfStoreFileReader to get the firstkey and lastkey by creating a new scanner 840 // every time. So holding onto the BB (incase of DBB) is not advised here. 841 byte[] key = new byte[currKeyLen]; 842 ByteBufferUtils.copyFromBufferToArray(key, keyBuf, keyPair.getSecond(), 0, currKeyLen); 843 return new KeyValue.KeyOnlyKeyValue(key, 0, currKeyLen); 844 } 845 } 846 847 @Override 848 public ByteBuffer getValue() { 849 assertSeeked(); 850 // Okie to create new Pair. Not used in hot path 851 ObjectIntPair<ByteBuffer> valuePair = new ObjectIntPair<>(); 852 this.blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen, 853 currValueLen, valuePair); 854 ByteBuffer valBuf = valuePair.getFirst().duplicate(); 855 valBuf.position(valuePair.getSecond()); 856 valBuf.limit(currValueLen + valuePair.getSecond()); 857 return valBuf.slice(); 858 } 859 860 protected void setNonSeekedState() { 861 reset(); 862 blockBuffer = null; 863 currKeyLen = 0; 864 currValueLen = 0; 865 currMemstoreTS = 0; 866 currMemstoreTSLen = 0; 867 currTagsLen = 0; 868 } 869 870 /** 871 * Set the position on current backing blockBuffer. 872 */ 873 private void positionThisBlockBuffer() { 874 try { 875 blockBuffer.skip(getCurCellSerializedSize()); 876 } catch (IllegalArgumentException e) { 877 LOG.error("Current pos = " + blockBuffer.position() + "; currKeyLen = " + currKeyLen 878 + "; currValLen = " + currValueLen + "; block limit = " + blockBuffer.limit() 879 + "; currBlock currBlockOffset = " + this.curBlock.getOffset() + "; path=" 880 + reader.getPath()); 881 throw e; 882 } 883 } 884 885 /** 886 * Set our selves up for the next 'next' invocation, set up next block. 887 * @return True is more to read else false if at the end. 888 */ 889 private boolean positionForNextBlock() throws IOException { 890 // Methods are small so they get inlined because they are 'hot'. 891 long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); 892 if (this.curBlock.getOffset() >= lastDataBlockOffset) { 893 setNonSeekedState(); 894 return false; 895 } 896 return isNextBlock(); 897 } 898 899 private boolean isNextBlock() throws IOException { 900 // Methods are small so they get inlined because they are 'hot'. 901 HFileBlock nextBlock = readNextDataBlock(); 902 if (nextBlock == null) { 903 setNonSeekedState(); 904 return false; 905 } 906 updateCurrentBlock(nextBlock); 907 return true; 908 } 909 910 private final boolean _next() throws IOException { 911 // Small method so can be inlined. It is a hot one. 912 if (blockBuffer.remaining() <= 0) { 913 return positionForNextBlock(); 914 } 915 916 // We are still in the same block. 917 readKeyValueLen(); 918 return true; 919 } 920 921 /** 922 * Go to the next key/value in the block section. Loads the next block if necessary. If 923 * successful, {@link #getKey()} and {@link #getValue()} can be called. 924 * @return true if successfully navigated to the next key/value 925 */ 926 @Override 927 public boolean next() throws IOException { 928 // This is a hot method so extreme measures taken to ensure it is small and inlineable. 929 // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation 930 assertSeeked(); 931 positionThisBlockBuffer(); 932 return _next(); 933 } 934 935 /** 936 * Positions this scanner at the start of the file. 937 * @return false if empty file; i.e. a call to next would return false and the current key and 938 * value are undefined. 939 */ 940 @Override 941 public boolean seekTo() throws IOException { 942 if (reader == null) { 943 return false; 944 } 945 946 if (reader.getTrailer().getEntryCount() == 0) { 947 // No data blocks. 948 return false; 949 } 950 951 long firstDataBlockOffset = reader.getTrailer().getFirstDataBlockOffset(); 952 if (curBlock != null && curBlock.getOffset() == firstDataBlockOffset) { 953 return processFirstDataBlock(); 954 } 955 956 readAndUpdateNewBlock(firstDataBlockOffset); 957 return true; 958 } 959 960 protected boolean processFirstDataBlock() throws IOException { 961 blockBuffer.rewind(); 962 readKeyValueLen(); 963 return true; 964 } 965 966 protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException { 967 HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread, 968 isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); 969 if (newBlock.getOffset() < 0) { 970 releaseIfNotCurBlock(newBlock); 971 throw new IOException( 972 "Invalid offset=" + newBlock.getOffset() + ", path=" + reader.getPath()); 973 } 974 updateCurrentBlock(newBlock); 975 } 976 977 protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, ExtendedCell nextIndexedKey, 978 boolean rewind, ExtendedCell key, boolean seekBefore) throws IOException { 979 if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) { 980 updateCurrentBlock(seekToBlock); 981 } else if (rewind) { 982 blockBuffer.rewind(); 983 } 984 // Update the nextIndexedKey 985 this.nextIndexedKey = nextIndexedKey; 986 return blockSeek(key, seekBefore); 987 } 988 989 /** Returns True if v <= 0 or v > current block buffer limit. */ 990 protected final boolean checkKeyLen(final int v) { 991 return v <= 0 || v > this.blockBuffer.limit(); 992 } 993 994 /** Returns True if v < 0 or v > current block buffer limit. */ 995 protected final boolean checkLen(final int v) { 996 return v < 0 || v > this.blockBuffer.limit(); 997 } 998 999 /** 1000 * Check key and value lengths are wholesome. 1001 */ 1002 protected final void checkKeyValueLen() { 1003 if (checkKeyLen(this.currKeyLen) || checkLen(this.currValueLen)) { 1004 throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen 1005 + " or currValueLen " + this.currValueLen + ". Block offset: " + this.curBlock.getOffset() 1006 + ", block length: " + this.blockBuffer.limit() + ", position: " 1007 + this.blockBuffer.position() + " (without header)." + ", path=" + reader.getPath()); 1008 } 1009 } 1010 1011 /** 1012 * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first 1013 * key/value pair. 1014 * @param newBlock the block read by {@link HFileReaderImpl#readBlock}, it's a totally new block 1015 * with new allocated {@link ByteBuff}, so if no further reference to this 1016 * block, we should release it carefully. 1017 */ 1018 protected void updateCurrentBlock(HFileBlock newBlock) throws IOException { 1019 try { 1020 if (newBlock.getBlockType() != BlockType.DATA) { 1021 throw new IllegalStateException( 1022 "ScannerV2 works only on data blocks, got " + newBlock.getBlockType() + "; " 1023 + "HFileName=" + reader.getPath() + ", " + "dataBlockEncoder=" 1024 + reader.getDataBlockEncoding() + ", " + "isCompaction=" + isCompaction); 1025 } 1026 updateCurrBlockRef(newBlock); 1027 blockBuffer = newBlock.getBufferWithoutHeader(); 1028 readKeyValueLen(); 1029 } finally { 1030 releaseIfNotCurBlock(newBlock); 1031 } 1032 // Reset the next indexed key 1033 this.nextIndexedKey = null; 1034 } 1035 1036 protected ExtendedCell getFirstKeyCellInBlock(HFileBlock curBlock) { 1037 ByteBuff buffer = curBlock.getBufferWithoutHeader(); 1038 // It is safe to manipulate this buffer because we own the buffer object. 1039 buffer.rewind(); 1040 int klen = buffer.getInt(); 1041 buffer.skip(Bytes.SIZEOF_INT);// Skip value len part 1042 ByteBuffer keyBuff = buffer.asSubByteBuffer(klen); 1043 if (keyBuff.hasArray()) { 1044 return new KeyValue.KeyOnlyKeyValue(keyBuff.array(), 1045 keyBuff.arrayOffset() + keyBuff.position(), klen); 1046 } else { 1047 return new ByteBufferKeyOnlyKeyValue(keyBuff, keyBuff.position(), klen); 1048 } 1049 } 1050 1051 public int compareKey(CellComparator comparator, ExtendedCell key) { 1052 blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, pair); 1053 this.bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), currKeyLen, rowLen); 1054 return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, this.bufBackedKeyOnlyKv); 1055 } 1056 1057 @Override 1058 public void shipped() throws IOException { 1059 this.returnBlocks(false); 1060 } 1061 } 1062 1063 @Override 1064 public Path getPath() { 1065 return path; 1066 } 1067 1068 @Override 1069 public DataBlockEncoding getDataBlockEncoding() { 1070 return dataBlockEncoder.getDataBlockEncoding(); 1071 } 1072 1073 @Override 1074 public Configuration getConf() { 1075 return conf; 1076 } 1077 1078 @Override 1079 public void setConf(Configuration conf) { 1080 this.conf = conf; 1081 } 1082 1083 /** Minor versions in HFile starting with this number have hbase checksums */ 1084 public static final int MINOR_VERSION_WITH_CHECKSUM = 1; 1085 /** In HFile minor version that does not support checksums */ 1086 public static final int MINOR_VERSION_NO_CHECKSUM = 0; 1087 1088 /** HFile minor version that introduced pbuf filetrailer */ 1089 public static final int PBUF_TRAILER_MINOR_VERSION = 2; 1090 1091 /** 1092 * The size of a (key length, value length) tuple that prefixes each entry in a data block. 1093 */ 1094 public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT; 1095 1096 /** 1097 * Retrieve block from cache. Validates the retrieved block's type vs {@code expectedBlockType} 1098 * and its encoding vs. {@code expectedDataBlockEncoding}. Unpacks the block as necessary. 1099 */ 1100 private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock, 1101 boolean updateCacheMetrics, BlockType expectedBlockType, 1102 DataBlockEncoding expectedDataBlockEncoding) throws IOException { 1103 // Check cache for block. If found return. 1104 BlockCache cache = cacheConf.getBlockCache().orElse(null); 1105 if (cache != null) { 1106 HFileBlock cachedBlock = (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock, 1107 updateCacheMetrics, expectedBlockType); 1108 if (cachedBlock != null) { 1109 if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) { 1110 HFileBlock compressedBlock = cachedBlock; 1111 cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader); 1112 // In case of compressed block after unpacking we can release the compressed block 1113 if (compressedBlock != cachedBlock) { 1114 compressedBlock.release(); 1115 } 1116 } 1117 try { 1118 validateBlockType(cachedBlock, expectedBlockType); 1119 } catch (IOException e) { 1120 returnAndEvictBlock(cache, cacheKey, cachedBlock); 1121 throw e; 1122 } 1123 1124 if (expectedDataBlockEncoding == null) { 1125 return cachedBlock; 1126 } 1127 DataBlockEncoding actualDataBlockEncoding = cachedBlock.getDataBlockEncoding(); 1128 // Block types other than data blocks always have 1129 // DataBlockEncoding.NONE. To avoid false negative cache misses, only 1130 // perform this check if cached block is a data block. 1131 if ( 1132 cachedBlock.getBlockType().isData() 1133 && !actualDataBlockEncoding.equals(expectedDataBlockEncoding) 1134 ) { 1135 // This mismatch may happen if a Scanner, which is used for say a 1136 // compaction, tries to read an encoded block from the block cache. 1137 // The reverse might happen when an EncodedScanner tries to read 1138 // un-encoded blocks which were cached earlier. 1139 // 1140 // Because returning a data block with an implicit BlockType mismatch 1141 // will cause the requesting scanner to throw a disk read should be 1142 // forced here. This will potentially cause a significant number of 1143 // cache misses, so update so we should keep track of this as it might 1144 // justify the work on a CompoundScanner. 1145 if ( 1146 !expectedDataBlockEncoding.equals(DataBlockEncoding.NONE) 1147 && !actualDataBlockEncoding.equals(DataBlockEncoding.NONE) 1148 ) { 1149 // If the block is encoded but the encoding does not match the 1150 // expected encoding it is likely the encoding was changed but the 1151 // block was not yet evicted. Evictions on file close happen async 1152 // so blocks with the old encoding still linger in cache for some 1153 // period of time. This event should be rare as it only happens on 1154 // schema definition change. 1155 LOG.info( 1156 "Evicting cached block with key {} because data block encoding mismatch; " 1157 + "expected {}, actual {}, path={}", 1158 cacheKey, actualDataBlockEncoding, expectedDataBlockEncoding, path); 1159 // This is an error scenario. so here we need to release the block. 1160 returnAndEvictBlock(cache, cacheKey, cachedBlock); 1161 } 1162 return null; 1163 } 1164 return cachedBlock; 1165 } 1166 } 1167 return null; 1168 } 1169 1170 private void returnAndEvictBlock(BlockCache cache, BlockCacheKey cacheKey, Cacheable block) { 1171 block.release(); 1172 cache.evictBlock(cacheKey); 1173 } 1174 1175 /** 1176 * @param cacheBlock Add block to cache, if found 1177 * @return block wrapped in a ByteBuffer, with header skipped 1178 */ 1179 @Override 1180 public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException { 1181 if (trailer.getMetaIndexCount() == 0) { 1182 return null; // there are no meta blocks 1183 } 1184 if (metaBlockIndexReader == null) { 1185 throw new IOException(path + " meta index not loaded"); 1186 } 1187 1188 byte[] mbname = Bytes.toBytes(metaBlockName); 1189 int block = metaBlockIndexReader.rootBlockContainingKey(mbname, 0, mbname.length); 1190 if (block == -1) { 1191 return null; 1192 } 1193 long blockSize = metaBlockIndexReader.getRootBlockDataSize(block); 1194 1195 // Per meta key from any given file, synchronize reads for said block. This 1196 // is OK to do for meta blocks because the meta block index is always 1197 // single-level. 1198 synchronized (metaBlockIndexReader.getRootBlockKey(block)) { 1199 // Check cache for block. If found return. 1200 long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block); 1201 BlockCacheKey cacheKey = 1202 new BlockCacheKey(name, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META); 1203 1204 cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory()); 1205 HFileBlock cachedBlock = 1206 getCachedBlock(cacheKey, cacheBlock, false, true, BlockType.META, null); 1207 if (cachedBlock != null) { 1208 assert cachedBlock.isUnpacked() : "Packed block leak."; 1209 // Return a distinct 'shallow copy' of the block, 1210 // so pos does not get messed by the scanner 1211 return cachedBlock; 1212 } 1213 // Cache Miss, please load. 1214 1215 HFileBlock compressedBlock = 1216 fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false, true); 1217 HFileBlock uncompressedBlock = compressedBlock.unpack(hfileContext, fsBlockReader); 1218 if (compressedBlock != uncompressedBlock) { 1219 compressedBlock.release(); 1220 } 1221 1222 // Cache the block 1223 if (cacheBlock) { 1224 cacheConf.getBlockCache().ifPresent( 1225 cache -> cache.cacheBlock(cacheKey, uncompressedBlock, cacheConf.isInMemory())); 1226 } 1227 return uncompressedBlock; 1228 } 1229 } 1230 1231 /** 1232 * Whether we use heap or not depends on our intent to cache the block. We want to avoid 1233 * allocating to off-heap if we intend to cache into the on-heap L1 cache. Otherwise, it's more 1234 * efficient to allocate to off-heap since we can control GC ourselves for those. So our decision 1235 * here breaks down as follows: <br> 1236 * If block cache is disabled, don't use heap. If we're not using the CombinedBlockCache, use heap 1237 * unless caching is disabled for the request. Otherwise, only use heap if caching is enabled and 1238 * the expected block type is not DATA (which goes to off-heap L2 in combined cache). 1239 * @see org.apache.hadoop.hbase.io.hfile.HFileBlock.FSReader#readBlockData(long, long, boolean, 1240 * boolean, boolean) 1241 */ 1242 private boolean shouldUseHeap(BlockType expectedBlockType, boolean cacheBlock) { 1243 if (!cacheConf.getBlockCache().isPresent()) { 1244 return false; 1245 } 1246 1247 // we only cache a block if cacheBlock is true and caching-on-read is enabled in CacheConfig 1248 // we can really only check for that if have an expectedBlockType 1249 if (expectedBlockType != null) { 1250 cacheBlock &= cacheConf.shouldCacheBlockOnRead(expectedBlockType.getCategory()); 1251 } 1252 1253 if (!cacheConf.isCombinedBlockCache()) { 1254 // Block to cache in LruBlockCache must be an heap one, if caching enabled. So just allocate 1255 // block memory from heap for saving an extra off-heap to heap copying in that case. 1256 return cacheBlock; 1257 } 1258 1259 return cacheBlock && expectedBlockType != null && !expectedBlockType.isData(); 1260 } 1261 1262 @Override 1263 public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final boolean cacheBlock, 1264 boolean pread, final boolean isCompaction, boolean updateCacheMetrics, 1265 BlockType expectedBlockType, DataBlockEncoding expectedDataBlockEncoding) throws IOException { 1266 return readBlock(dataBlockOffset, onDiskBlockSize, cacheBlock, pread, isCompaction, 1267 updateCacheMetrics, expectedBlockType, expectedDataBlockEncoding, false); 1268 } 1269 1270 @Override 1271 public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final boolean cacheBlock, 1272 boolean pread, final boolean isCompaction, boolean updateCacheMetrics, 1273 BlockType expectedBlockType, DataBlockEncoding expectedDataBlockEncoding, boolean cacheOnly) 1274 throws IOException { 1275 if (dataBlockIndexReader == null) { 1276 throw new IOException(path + " block index not loaded"); 1277 } 1278 long trailerOffset = trailer.getLoadOnOpenDataOffset(); 1279 if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) { 1280 throw new IOException("Requested block is out of range: " + dataBlockOffset 1281 + ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() 1282 + ", trailer.getLoadOnOpenDataOffset: " + trailerOffset + ", path=" + path); 1283 } 1284 // For any given block from any given file, synchronize reads for said 1285 // block. 1286 // Without a cache, this synchronizing is needless overhead, but really 1287 // the other choice is to duplicate work (which the cache would prevent you 1288 // from doing). 1289 1290 BlockCacheKey cacheKey = 1291 new BlockCacheKey(path, dataBlockOffset, this.isPrimaryReplicaReader(), expectedBlockType); 1292 Attributes attributes = Attributes.of(BLOCK_CACHE_KEY_KEY, cacheKey.toString()); 1293 1294 boolean useLock = false; 1295 IdLock.Entry lockEntry = null; 1296 final Span span = Span.current(); 1297 try { 1298 while (true) { 1299 // Check cache for block. If found return. 1300 if (cacheConf.shouldReadBlockFromCache(expectedBlockType) && !cacheOnly) { 1301 if (useLock) { 1302 lockEntry = offsetLock.getLockEntry(dataBlockOffset); 1303 } 1304 // Try and get the block from the block cache. If the useLock variable is true then this 1305 // is the second time through the loop and it should not be counted as a block cache miss. 1306 HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, useLock, updateCacheMetrics, 1307 expectedBlockType, expectedDataBlockEncoding); 1308 if (cachedBlock != null) { 1309 if (LOG.isTraceEnabled()) { 1310 LOG.trace("Block for file {} is coming from Cache {}", 1311 Bytes.toString(cachedBlock.getHFileContext().getTableName()), cachedBlock); 1312 } 1313 span.addEvent("block cache hit", attributes); 1314 assert cachedBlock.isUnpacked() : "Packed block leak."; 1315 if (cachedBlock.getBlockType().isData()) { 1316 if (updateCacheMetrics) { 1317 HFile.DATABLOCK_READ_COUNT.increment(); 1318 } 1319 // Validate encoding type for data blocks. We include encoding 1320 // type in the cache key, and we expect it to match on a cache hit. 1321 if (cachedBlock.getDataBlockEncoding() != dataBlockEncoder.getDataBlockEncoding()) { 1322 // Remember to release the block when in exceptional path. 1323 cacheConf.getBlockCache().ifPresent(cache -> { 1324 returnAndEvictBlock(cache, cacheKey, cachedBlock); 1325 }); 1326 throw new IOException("Cached block under key " + cacheKey + " " 1327 + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: " 1328 + dataBlockEncoder.getDataBlockEncoding() + "), path=" + path); 1329 } 1330 } 1331 // Cache-hit. Return! 1332 return cachedBlock; 1333 } 1334 1335 if (!useLock && cacheBlock && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) { 1336 // check cache again with lock 1337 useLock = true; 1338 continue; 1339 } 1340 // Carry on, please load. 1341 } 1342 1343 span.addEvent("block cache miss", attributes); 1344 // Load block from filesystem. 1345 HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread, 1346 !isCompaction, shouldUseHeap(expectedBlockType, cacheBlock)); 1347 try { 1348 validateBlockType(hfileBlock, expectedBlockType); 1349 } catch (IOException e) { 1350 hfileBlock.release(); 1351 throw e; 1352 } 1353 BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory(); 1354 final boolean cacheCompressed = cacheConf.shouldCacheCompressed(category); 1355 final boolean cacheOnRead = cacheConf.shouldCacheBlockOnRead(category); 1356 1357 // Don't need the unpacked block back and we're storing the block in the cache compressed 1358 if (cacheOnly && cacheCompressed && cacheOnRead) { 1359 HFileBlock blockNoChecksum = BlockCacheUtil.getBlockForCaching(cacheConf, hfileBlock); 1360 cacheConf.getBlockCache().ifPresent(cache -> { 1361 LOG.debug("Skipping decompression of block {} in prefetch", cacheKey); 1362 // Cache the block if necessary 1363 if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) { 1364 cache.cacheBlock(cacheKey, blockNoChecksum, cacheConf.isInMemory(), cacheOnly); 1365 } 1366 }); 1367 1368 if (updateCacheMetrics && hfileBlock.getBlockType().isData()) { 1369 HFile.DATABLOCK_READ_COUNT.increment(); 1370 } 1371 return blockNoChecksum; 1372 } 1373 HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader); 1374 HFileBlock unpackedNoChecksum = BlockCacheUtil.getBlockForCaching(cacheConf, unpacked); 1375 // Cache the block if necessary 1376 cacheConf.getBlockCache().ifPresent(cache -> { 1377 if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) { 1378 // Using the wait on cache during compaction and prefetching. 1379 cache.cacheBlock(cacheKey, 1380 cacheCompressed 1381 ? BlockCacheUtil.getBlockForCaching(cacheConf, hfileBlock) 1382 : unpackedNoChecksum, 1383 cacheConf.isInMemory(), cacheOnly); 1384 } 1385 }); 1386 if (unpacked != hfileBlock) { 1387 // End of life here if hfileBlock is an independent block. 1388 hfileBlock.release(); 1389 } 1390 if (updateCacheMetrics && hfileBlock.getBlockType().isData()) { 1391 HFile.DATABLOCK_READ_COUNT.increment(); 1392 } 1393 1394 return unpackedNoChecksum; 1395 } 1396 } finally { 1397 if (lockEntry != null) { 1398 offsetLock.releaseLockEntry(lockEntry); 1399 } 1400 } 1401 } 1402 1403 @Override 1404 public boolean hasMVCCInfo() { 1405 return fileInfo.shouldIncludeMemStoreTS() && fileInfo.isDecodeMemstoreTS(); 1406 } 1407 1408 /** 1409 * Compares the actual type of a block retrieved from cache or disk with its expected type and 1410 * throws an exception in case of a mismatch. Expected block type of {@link BlockType#DATA} is 1411 * considered to match the actual block type [@link {@link BlockType#ENCODED_DATA} as well. 1412 * @param block a block retrieved from cache or disk 1413 * @param expectedBlockType the expected block type, or null to skip the check 1414 */ 1415 private void validateBlockType(HFileBlock block, BlockType expectedBlockType) throws IOException { 1416 if (expectedBlockType == null) { 1417 return; 1418 } 1419 BlockType actualBlockType = block.getBlockType(); 1420 if (expectedBlockType.isData() && actualBlockType.isData()) { 1421 // We consider DATA to match ENCODED_DATA for the purpose of this 1422 // verification. 1423 return; 1424 } 1425 if (actualBlockType != expectedBlockType) { 1426 throw new IOException("Expected block type " + expectedBlockType + ", " + "but got " 1427 + actualBlockType + ": " + block + ", path=" + path); 1428 } 1429 } 1430 1431 /** 1432 * @return Last key as cell in the file. May be null if file has no entries. Note that this is not 1433 * the last row key, but it is the Cell representation of the last key 1434 */ 1435 @Override 1436 public Optional<ExtendedCell> getLastKey() { 1437 return dataBlockIndexReader.isEmpty() 1438 ? Optional.empty() 1439 : Optional.of(fileInfo.getLastKeyCell()); 1440 } 1441 1442 /** 1443 * @return Midkey for this file. We work with block boundaries only so returned midkey is an 1444 * approximation only. 1445 */ 1446 @Override 1447 public Optional<ExtendedCell> midKey() throws IOException { 1448 return Optional.ofNullable(dataBlockIndexReader.midkey(this)); 1449 } 1450 1451 @Override 1452 public void close() throws IOException { 1453 close(cacheConf.shouldEvictOnClose()); 1454 } 1455 1456 @Override 1457 public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) { 1458 return dataBlockEncoder.getEffectiveEncodingInCache(isCompaction); 1459 } 1460 1461 /** For testing */ 1462 @Override 1463 public HFileBlock.FSReader getUncachedBlockReader() { 1464 return fsBlockReader; 1465 } 1466 1467 /** 1468 * Scanner that operates on encoded data blocks. 1469 */ 1470 protected static class EncodedScanner extends HFileScannerImpl { 1471 private final HFileBlockDecodingContext decodingCtx; 1472 private final DataBlockEncoder.EncodedSeeker seeker; 1473 private final DataBlockEncoder dataBlockEncoder; 1474 1475 public EncodedScanner(HFile.Reader reader, boolean cacheBlocks, boolean pread, 1476 boolean isCompaction, HFileContext meta, Configuration conf) { 1477 super(reader, cacheBlocks, pread, isCompaction); 1478 DataBlockEncoding encoding = reader.getDataBlockEncoding(); 1479 dataBlockEncoder = encoding.getEncoder(); 1480 decodingCtx = dataBlockEncoder.newDataBlockDecodingContext(conf, meta); 1481 seeker = dataBlockEncoder.createSeeker(decodingCtx); 1482 } 1483 1484 @Override 1485 public boolean isSeeked() { 1486 return curBlock != null; 1487 } 1488 1489 @Override 1490 public void setNonSeekedState() { 1491 reset(); 1492 } 1493 1494 /** 1495 * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first 1496 * key/value pair. 1497 * @param newBlock the block to make current, and read by {@link HFileReaderImpl#readBlock}, 1498 * it's a totally new block with new allocated {@link ByteBuff}, so if no 1499 * further reference to this block, we should release it carefully. 1500 */ 1501 @Override 1502 protected void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException { 1503 try { 1504 // sanity checks 1505 if (newBlock.getBlockType() != BlockType.ENCODED_DATA) { 1506 throw new IllegalStateException("EncodedScanner works only on encoded data blocks"); 1507 } 1508 short dataBlockEncoderId = newBlock.getDataBlockEncodingId(); 1509 if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) { 1510 String encoderCls = dataBlockEncoder.getClass().getName(); 1511 throw new CorruptHFileException( 1512 "Encoder " + encoderCls + " doesn't support data block encoding " 1513 + DataBlockEncoding.getNameFromId(dataBlockEncoderId) + ",path=" + reader.getPath()); 1514 } 1515 updateCurrBlockRef(newBlock); 1516 ByteBuff encodedBuffer = getEncodedBuffer(newBlock); 1517 seeker.setCurrentBuffer(encodedBuffer); 1518 } finally { 1519 releaseIfNotCurBlock(newBlock); 1520 } 1521 // Reset the next indexed key 1522 this.nextIndexedKey = null; 1523 } 1524 1525 private ByteBuff getEncodedBuffer(HFileBlock newBlock) { 1526 ByteBuff origBlock = newBlock.getBufferReadOnly(); 1527 int pos = newBlock.headerSize() + DataBlockEncoding.ID_SIZE; 1528 origBlock.position(pos); 1529 origBlock 1530 .limit(pos + newBlock.getUncompressedSizeWithoutHeader() - DataBlockEncoding.ID_SIZE); 1531 return origBlock.slice(); 1532 } 1533 1534 @Override 1535 protected boolean processFirstDataBlock() throws IOException { 1536 seeker.rewind(); 1537 return true; 1538 } 1539 1540 @Override 1541 public boolean next() throws IOException { 1542 boolean isValid = seeker.next(); 1543 if (!isValid) { 1544 HFileBlock newBlock = readNextDataBlock(); 1545 isValid = newBlock != null; 1546 if (isValid) { 1547 updateCurrentBlock(newBlock); 1548 } else { 1549 setNonSeekedState(); 1550 } 1551 } 1552 return isValid; 1553 } 1554 1555 @Override 1556 public ExtendedCell getKey() { 1557 assertValidSeek(); 1558 return seeker.getKey(); 1559 } 1560 1561 @Override 1562 public ByteBuffer getValue() { 1563 assertValidSeek(); 1564 return seeker.getValueShallowCopy(); 1565 } 1566 1567 @Override 1568 public ExtendedCell getCell() { 1569 if (this.curBlock == null) { 1570 return null; 1571 } 1572 return seeker.getCell(); 1573 } 1574 1575 private void assertValidSeek() { 1576 if (this.curBlock == null) { 1577 throw new NotSeekedException(reader.getPath()); 1578 } 1579 } 1580 1581 @Override 1582 protected ExtendedCell getFirstKeyCellInBlock(HFileBlock curBlock) { 1583 return dataBlockEncoder.getFirstKeyCellInBlock(getEncodedBuffer(curBlock)); 1584 } 1585 1586 @Override 1587 protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, ExtendedCell nextIndexedKey, 1588 boolean rewind, ExtendedCell key, boolean seekBefore) throws IOException { 1589 if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) { 1590 updateCurrentBlock(seekToBlock); 1591 } else if (rewind) { 1592 seeker.rewind(); 1593 } 1594 this.nextIndexedKey = nextIndexedKey; 1595 return seeker.seekToKeyInBlock(key, seekBefore); 1596 } 1597 1598 @Override 1599 public int compareKey(CellComparator comparator, ExtendedCell key) { 1600 return seeker.compareKey(comparator, key); 1601 } 1602 } 1603 1604 /** 1605 * Returns a buffer with the Bloom filter metadata. The caller takes ownership of the buffer. 1606 */ 1607 @Override 1608 public DataInput getGeneralBloomFilterMetadata() throws IOException { 1609 return this.getBloomFilterMetadata(BlockType.GENERAL_BLOOM_META); 1610 } 1611 1612 @Override 1613 public DataInput getDeleteBloomFilterMetadata() throws IOException { 1614 return this.getBloomFilterMetadata(BlockType.DELETE_FAMILY_BLOOM_META); 1615 } 1616 1617 private DataInput getBloomFilterMetadata(BlockType blockType) throws IOException { 1618 if ( 1619 blockType != BlockType.GENERAL_BLOOM_META && blockType != BlockType.DELETE_FAMILY_BLOOM_META 1620 ) { 1621 throw new RuntimeException( 1622 "Block Type: " + blockType.toString() + " is not supported, path=" + path); 1623 } 1624 1625 for (HFileBlock b : fileInfo.getLoadOnOpenBlocks()) { 1626 if (b.getBlockType() == blockType) { 1627 return b.getByteStream(); 1628 } 1629 } 1630 return null; 1631 } 1632 1633 public boolean isFileInfoLoaded() { 1634 return true; // We load file info in constructor in version 2. 1635 } 1636 1637 @Override 1638 public HFileContext getFileContext() { 1639 return hfileContext; 1640 } 1641 1642 /** 1643 * Returns false if block prefetching was requested for this file and has not completed, true 1644 * otherwise 1645 */ 1646 @Override 1647 public boolean prefetchComplete() { 1648 return PrefetchExecutor.isCompleted(path); 1649 } 1650 1651 /** 1652 * Returns true if block prefetching was started after waiting for specified delay, false 1653 * otherwise 1654 */ 1655 @Override 1656 public boolean prefetchStarted() { 1657 return PrefetchExecutor.isPrefetchStarted(); 1658 } 1659 1660 /** 1661 * Create a Scanner on this file. No seeks or reads are done on creation. Call 1662 * {@link HFileScanner#seekTo(ExtendedCell)} to position an start the read. There is nothing to 1663 * clean up in a Scanner. Letting go of your references to the scanner is sufficient. NOTE: Do not 1664 * use this overload of getScanner for compactions. See 1665 * {@link #getScanner(Configuration, boolean, boolean, boolean)} 1666 * @param conf Store configuration. 1667 * @param cacheBlocks True if we should cache blocks read in by this scanner. 1668 * @param pread Use positional read rather than seek+read if true (pread is better for 1669 * random reads, seek+read is better scanning). 1670 * @return Scanner on this file. 1671 */ 1672 @Override 1673 public HFileScanner getScanner(Configuration conf, boolean cacheBlocks, final boolean pread) { 1674 return getScanner(conf, cacheBlocks, pread, false); 1675 } 1676 1677 /** 1678 * Create a Scanner on this file. No seeks or reads are done on creation. Call 1679 * {@link HFileScanner#seekTo(ExtendedCell)} to position an start the read. There is nothing to 1680 * clean up in a Scanner. Letting go of your references to the scanner is sufficient. 1681 * @param conf Store configuration. 1682 * @param cacheBlocks True if we should cache blocks read in by this scanner. 1683 * @param pread Use positional read rather than seek+read if true (pread is better for 1684 * random reads, seek+read is better scanning). 1685 * @param isCompaction is scanner being used for a compaction? 1686 * @return Scanner on this file. 1687 */ 1688 @Override 1689 public HFileScanner getScanner(Configuration conf, boolean cacheBlocks, final boolean pread, 1690 final boolean isCompaction) { 1691 if (dataBlockEncoder.useEncodedScanner()) { 1692 return new EncodedScanner(this, cacheBlocks, pread, isCompaction, this.hfileContext, conf); 1693 } 1694 return new HFileScannerImpl(this, cacheBlocks, pread, isCompaction); 1695 } 1696 1697 public int getMajorVersion() { 1698 return 3; 1699 } 1700 1701 @Override 1702 public void unbufferStream() { 1703 fsBlockReader.unbufferStream(); 1704 } 1705}