001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import io.opentelemetry.api.trace.Span; 021import io.opentelemetry.context.Scope; 022import java.io.DataInput; 023import java.io.IOException; 024import java.nio.ByteBuffer; 025import java.util.ArrayList; 026import java.util.Optional; 027import org.apache.hadoop.conf.Configurable; 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.fs.Path; 030import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue; 031import org.apache.hadoop.hbase.Cell; 032import org.apache.hadoop.hbase.CellComparator; 033import org.apache.hadoop.hbase.CellUtil; 034import org.apache.hadoop.hbase.HConstants; 035import org.apache.hadoop.hbase.KeyValue; 036import org.apache.hadoop.hbase.PrivateCellUtil; 037import org.apache.hadoop.hbase.SizeCachedByteBufferKeyValue; 038import org.apache.hadoop.hbase.SizeCachedKeyValue; 039import org.apache.hadoop.hbase.SizeCachedNoTagsByteBufferKeyValue; 040import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue; 041import org.apache.hadoop.hbase.io.compress.Compression; 042import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; 043import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 044import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; 045import org.apache.hadoop.hbase.nio.ByteBuff; 046import org.apache.hadoop.hbase.regionserver.KeyValueScanner; 047import org.apache.hadoop.hbase.trace.TraceUtil; 048import org.apache.hadoop.hbase.util.ByteBufferUtils; 049import org.apache.hadoop.hbase.util.Bytes; 050import org.apache.hadoop.hbase.util.IdLock; 051import org.apache.hadoop.hbase.util.ObjectIntPair; 052import org.apache.hadoop.io.WritableUtils; 053import org.apache.yetus.audience.InterfaceAudience; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057/** 058 * Implementation that can handle all hfile versions of {@link HFile.Reader}. 059 */ 060@InterfaceAudience.Private 061@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD") 062public abstract class HFileReaderImpl implements HFile.Reader, Configurable { 063 // This class is HFileReaderV3 + HFileReaderV2 + AbstractHFileReader all squashed together into 064 // one file. Ditto for all the HFileReader.ScannerV? implementations. I was running up against 065 // the MaxInlineLevel limit because too many tiers involved reading from an hfile. Was also hard 066 // to navigate the source code when so many classes participating in read. 067 private static final Logger LOG = LoggerFactory.getLogger(HFileReaderImpl.class); 068 069 /** Data block index reader keeping the root data index in memory */ 070 protected HFileBlockIndex.CellBasedKeyBlockIndexReader dataBlockIndexReader; 071 072 /** Meta block index reader -- always single level */ 073 protected HFileBlockIndex.ByteArrayKeyBlockIndexReader metaBlockIndexReader; 074 075 protected FixedFileTrailer trailer; 076 077 private final boolean primaryReplicaReader; 078 079 /** 080 * What kind of data block encoding should be used while reading, writing, and handling cache. 081 */ 082 protected HFileDataBlockEncoder dataBlockEncoder = NoOpDataBlockEncoder.INSTANCE; 083 084 /** Block cache configuration. */ 085 protected final CacheConfig cacheConf; 086 087 protected ReaderContext context; 088 089 protected final HFileInfo fileInfo; 090 091 /** Path of file */ 092 protected final Path path; 093 094 /** File name to be used for block names */ 095 protected final String name; 096 097 private Configuration conf; 098 099 protected HFileContext hfileContext; 100 101 /** Filesystem-level block reader. */ 102 protected HFileBlock.FSReader fsBlockReader; 103 104 /** 105 * A "sparse lock" implementation allowing to lock on a particular block identified by offset. The 106 * purpose of this is to avoid two clients loading the same block, and have all but one client 107 * wait to get the block from the cache. 108 */ 109 private IdLock offsetLock = new IdLock(); 110 111 /** Minimum minor version supported by this HFile format */ 112 static final int MIN_MINOR_VERSION = 0; 113 114 /** Maximum minor version supported by this HFile format */ 115 // We went to version 2 when we moved to pb'ing fileinfo and the trailer on 116 // the file. This version can read Writables version 1. 117 static final int MAX_MINOR_VERSION = 3; 118 119 /** Minor versions starting with this number have faked index key */ 120 static final int MINOR_VERSION_WITH_FAKED_KEY = 3; 121 122 /** 123 * Opens a HFile. 124 * @param context Reader context info 125 * @param fileInfo HFile info 126 * @param cacheConf Cache configuration. 127 * @param conf Configuration 128 */ 129 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD") 130 public HFileReaderImpl(ReaderContext context, HFileInfo fileInfo, CacheConfig cacheConf, 131 Configuration conf) throws IOException { 132 this.cacheConf = cacheConf; 133 this.context = context; 134 this.path = context.getFilePath(); 135 this.name = path.getName(); 136 this.conf = conf; 137 this.primaryReplicaReader = context.isPrimaryReplicaReader(); 138 this.fileInfo = fileInfo; 139 this.trailer = fileInfo.getTrailer(); 140 this.hfileContext = fileInfo.getHFileContext(); 141 this.fsBlockReader = 142 new HFileBlock.FSReaderImpl(context, hfileContext, cacheConf.getByteBuffAllocator(), conf); 143 this.dataBlockEncoder = HFileDataBlockEncoderImpl.createFromFileInfo(fileInfo); 144 fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf); 145 dataBlockIndexReader = fileInfo.getDataBlockIndexReader(); 146 metaBlockIndexReader = fileInfo.getMetaBlockIndexReader(); 147 } 148 149 @SuppressWarnings("serial") 150 public static class BlockIndexNotLoadedException extends IllegalStateException { 151 public BlockIndexNotLoadedException(Path path) { 152 // Add a message in case anyone relies on it as opposed to class name. 153 super(path + " block index not loaded"); 154 } 155 } 156 157 private Optional<String> toStringFirstKey() { 158 return getFirstKey().map(CellUtil::getCellKeyAsString); 159 } 160 161 private Optional<String> toStringLastKey() { 162 return getLastKey().map(CellUtil::getCellKeyAsString); 163 } 164 165 @Override 166 public String toString() { 167 return "reader=" + path.toString() 168 + (!isFileInfoLoaded() 169 ? "" 170 : ", compression=" + trailer.getCompressionCodec().getName() + ", cacheConf=" + cacheConf 171 + ", firstKey=" + toStringFirstKey() + ", lastKey=" + toStringLastKey()) 172 + ", avgKeyLen=" + fileInfo.getAvgKeyLen() + ", avgValueLen=" + fileInfo.getAvgValueLen() 173 + ", entries=" + trailer.getEntryCount() + ", length=" + context.getFileSize(); 174 } 175 176 @Override 177 public long length() { 178 return context.getFileSize(); 179 } 180 181 /** 182 * @return the first key in the file. May be null if file has no entries. Note that this is not 183 * the first row key, but rather the byte form of the first KeyValue. 184 */ 185 @Override 186 public Optional<Cell> getFirstKey() { 187 if (dataBlockIndexReader == null) { 188 throw new BlockIndexNotLoadedException(path); 189 } 190 return dataBlockIndexReader.isEmpty() 191 ? Optional.empty() 192 : Optional.of(dataBlockIndexReader.getRootBlockKey(0)); 193 } 194 195 /** 196 * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's patch goes in to 197 * eliminate {@link KeyValue} here. 198 * @return the first row key, or null if the file is empty. 199 */ 200 @Override 201 public Optional<byte[]> getFirstRowKey() { 202 // We have to copy the row part to form the row key alone 203 return getFirstKey().map(CellUtil::cloneRow); 204 } 205 206 /** 207 * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's patch goes in to 208 * eliminate {@link KeyValue} here. 209 * @return the last row key, or null if the file is empty. 210 */ 211 @Override 212 public Optional<byte[]> getLastRowKey() { 213 // We have to copy the row part to form the row key alone 214 return getLastKey().map(CellUtil::cloneRow); 215 } 216 217 /** @return number of KV entries in this HFile */ 218 @Override 219 public long getEntries() { 220 return trailer.getEntryCount(); 221 } 222 223 /** @return comparator */ 224 @Override 225 public CellComparator getComparator() { 226 return this.hfileContext.getCellComparator(); 227 } 228 229 public Compression.Algorithm getCompressionAlgorithm() { 230 return trailer.getCompressionCodec(); 231 } 232 233 /** 234 * @return the total heap size of data and meta block indexes in bytes. Does not take into account 235 * non-root blocks of a multilevel data index. 236 */ 237 @Override 238 public long indexSize() { 239 return (dataBlockIndexReader != null ? dataBlockIndexReader.heapSize() : 0) 240 + ((metaBlockIndexReader != null) ? metaBlockIndexReader.heapSize() : 0); 241 } 242 243 @Override 244 public String getName() { 245 return name; 246 } 247 248 @Override 249 public void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder) { 250 this.dataBlockEncoder = dataBlockEncoder; 251 this.fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf); 252 } 253 254 @Override 255 public void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader) { 256 this.dataBlockIndexReader = reader; 257 } 258 259 @Override 260 public HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader() { 261 return dataBlockIndexReader; 262 } 263 264 @Override 265 public void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader) { 266 this.metaBlockIndexReader = reader; 267 } 268 269 @Override 270 public HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader() { 271 return metaBlockIndexReader; 272 } 273 274 @Override 275 public FixedFileTrailer getTrailer() { 276 return trailer; 277 } 278 279 @Override 280 public ReaderContext getContext() { 281 return this.context; 282 } 283 284 @Override 285 public HFileInfo getHFileInfo() { 286 return this.fileInfo; 287 } 288 289 @Override 290 public boolean isPrimaryReplicaReader() { 291 return primaryReplicaReader; 292 } 293 294 /** 295 * An exception thrown when an operation requiring a scanner to be seeked is invoked on a scanner 296 * that is not seeked. 297 */ 298 @SuppressWarnings("serial") 299 public static class NotSeekedException extends IllegalStateException { 300 public NotSeekedException(Path path) { 301 super(path + " not seeked to a key/value"); 302 } 303 } 304 305 protected static class HFileScannerImpl implements HFileScanner { 306 private ByteBuff blockBuffer; 307 protected final boolean cacheBlocks; 308 protected final boolean pread; 309 protected final boolean isCompaction; 310 private int currKeyLen; 311 private int currValueLen; 312 private int currMemstoreTSLen; 313 private long currMemstoreTS; 314 protected final HFile.Reader reader; 315 private int currTagsLen; 316 private short rowLen; 317 // buffer backed keyonlyKV 318 private ByteBufferKeyOnlyKeyValue bufBackedKeyOnlyKv = new ByteBufferKeyOnlyKeyValue(); 319 // A pair for reusing in blockSeek() so that we don't garbage lot of objects 320 final ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>(); 321 322 /** 323 * The next indexed key is to keep track of the indexed key of the next data block. If the 324 * nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the current data block is the 325 * last data block. If the nextIndexedKey is null, it means the nextIndexedKey has not been 326 * loaded yet. 327 */ 328 protected Cell nextIndexedKey; 329 // Current block being used. NOTICE: DON't release curBlock separately except in shipped() or 330 // close() methods. Because the shipped() or close() will do the release finally, even if any 331 // exception occur the curBlock will be released by the close() method (see 332 // RegionScannerImpl#handleException). Call the releaseIfNotCurBlock() to release the 333 // unreferenced block please. 334 protected HFileBlock curBlock; 335 // Previous blocks that were used in the course of the read 336 protected final ArrayList<HFileBlock> prevBlocks = new ArrayList<>(); 337 338 public HFileScannerImpl(final HFile.Reader reader, final boolean cacheBlocks, 339 final boolean pread, final boolean isCompaction) { 340 this.reader = reader; 341 this.cacheBlocks = cacheBlocks; 342 this.pread = pread; 343 this.isCompaction = isCompaction; 344 } 345 346 void updateCurrBlockRef(HFileBlock block) { 347 if (block != null && curBlock != null && block.getOffset() == curBlock.getOffset()) { 348 return; 349 } 350 if (this.curBlock != null && this.curBlock.isSharedMem()) { 351 prevBlocks.add(this.curBlock); 352 } 353 this.curBlock = block; 354 } 355 356 void reset() { 357 // We don't have to keep ref to heap block 358 if (this.curBlock != null && this.curBlock.isSharedMem()) { 359 this.prevBlocks.add(this.curBlock); 360 } 361 this.curBlock = null; 362 } 363 364 private void returnBlocks(boolean returnAll) { 365 this.prevBlocks.forEach(HFileBlock::release); 366 this.prevBlocks.clear(); 367 if (returnAll && this.curBlock != null) { 368 this.curBlock.release(); 369 this.curBlock = null; 370 } 371 } 372 373 @Override 374 public boolean isSeeked() { 375 return blockBuffer != null; 376 } 377 378 @Override 379 public String toString() { 380 return "HFileScanner for reader " + String.valueOf(getReader()); 381 } 382 383 protected void assertSeeked() { 384 if (!isSeeked()) { 385 throw new NotSeekedException(reader.getPath()); 386 } 387 } 388 389 @Override 390 public HFile.Reader getReader() { 391 return reader; 392 } 393 394 // From non encoded HFiles, we always read back KeyValue or its descendant.(Note: When HFile 395 // block is in DBB, it will be OffheapKV). So all parts of the Cell is in a contiguous 396 // array/buffer. How many bytes we should wrap to make the KV is what this method returns. 397 private int getKVBufSize() { 398 int kvBufSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen; 399 if (currTagsLen > 0) { 400 kvBufSize += Bytes.SIZEOF_SHORT + currTagsLen; 401 } 402 return kvBufSize; 403 } 404 405 @Override 406 public void close() { 407 if (!pread) { 408 // For seek + pread stream socket should be closed when the scanner is closed. HBASE-9393 409 reader.unbufferStream(); 410 } 411 this.returnBlocks(true); 412 } 413 414 // Returns the #bytes in HFile for the current cell. Used to skip these many bytes in current 415 // HFile block's buffer so as to position to the next cell. 416 private int getCurCellSerializedSize() { 417 int curCellSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen + currMemstoreTSLen; 418 if (this.reader.getFileContext().isIncludesTags()) { 419 curCellSize += Bytes.SIZEOF_SHORT + currTagsLen; 420 } 421 return curCellSize; 422 } 423 424 protected void readKeyValueLen() { 425 // This is a hot method. We go out of our way to make this method short so it can be 426 // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves 427 // because it is faster than going via range-checked ByteBuffer methods or going through a 428 // byte buffer array a byte at a time. 429 // Get a long at a time rather than read two individual ints. In micro-benchmarking, even 430 // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints. 431 // Trying to imitate what was done - need to profile if this is better or 432 // earlier way is better by doing mark and reset? 433 // But ensure that you read long instead of two ints 434 long ll = blockBuffer.getLongAfterPosition(0); 435 // Read top half as an int of key length and bottom int as value length 436 this.currKeyLen = (int) (ll >> Integer.SIZE); 437 this.currValueLen = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll); 438 checkKeyValueLen(); 439 this.rowLen = blockBuffer.getShortAfterPosition(Bytes.SIZEOF_LONG); 440 // Move position past the key and value lengths and then beyond the key and value 441 int p = (Bytes.SIZEOF_LONG + currKeyLen + currValueLen); 442 if (reader.getFileContext().isIncludesTags()) { 443 // Tags length is a short. 444 this.currTagsLen = blockBuffer.getShortAfterPosition(p); 445 checkTagsLen(); 446 p += (Bytes.SIZEOF_SHORT + currTagsLen); 447 } 448 readMvccVersion(p); 449 } 450 451 private final void checkTagsLen() { 452 if (checkLen(this.currTagsLen)) { 453 throw new IllegalStateException( 454 "Invalid currTagsLen " + this.currTagsLen + ". Block offset: " + curBlock.getOffset() 455 + ", block length: " + this.blockBuffer.limit() + ", position: " 456 + this.blockBuffer.position() + " (without header)." + " path=" + reader.getPath()); 457 } 458 } 459 460 /** 461 * Read mvcc. Does checks to see if we even need to read the mvcc at all. 462 */ 463 protected void readMvccVersion(final int offsetFromPos) { 464 // See if we even need to decode mvcc. 465 if (!this.reader.getHFileInfo().shouldIncludeMemStoreTS()) { 466 return; 467 } 468 if (!this.reader.getHFileInfo().isDecodeMemstoreTS()) { 469 currMemstoreTS = 0; 470 currMemstoreTSLen = 1; 471 return; 472 } 473 _readMvccVersion(offsetFromPos); 474 } 475 476 /** 477 * Actually do the mvcc read. Does no checks. 478 */ 479 private void _readMvccVersion(int offsetFromPos) { 480 // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e. 481 // previous if one-byte vint, we'd redo the vint call to find int size. 482 // Also the method is kept small so can be inlined. 483 byte firstByte = blockBuffer.getByteAfterPosition(offsetFromPos); 484 int len = WritableUtils.decodeVIntSize(firstByte); 485 if (len == 1) { 486 this.currMemstoreTS = firstByte; 487 } else { 488 int remaining = len - 1; 489 long i = 0; 490 offsetFromPos++; 491 if (remaining >= Bytes.SIZEOF_INT) { 492 // The int read has to be converted to unsigned long so the & op 493 i = (blockBuffer.getIntAfterPosition(offsetFromPos) & 0x00000000ffffffffL); 494 remaining -= Bytes.SIZEOF_INT; 495 offsetFromPos += Bytes.SIZEOF_INT; 496 } 497 if (remaining >= Bytes.SIZEOF_SHORT) { 498 short s = blockBuffer.getShortAfterPosition(offsetFromPos); 499 i = i << 16; 500 i = i | (s & 0xFFFF); 501 remaining -= Bytes.SIZEOF_SHORT; 502 offsetFromPos += Bytes.SIZEOF_SHORT; 503 } 504 for (int idx = 0; idx < remaining; idx++) { 505 byte b = blockBuffer.getByteAfterPosition(offsetFromPos + idx); 506 i = i << 8; 507 i = i | (b & 0xFF); 508 } 509 currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); 510 } 511 this.currMemstoreTSLen = len; 512 } 513 514 /** 515 * Within a loaded block, seek looking for the last key that is smaller than (or equal to?) the 516 * key we are interested in. A note on the seekBefore: if you have seekBefore = true, AND the 517 * first key in the block = key, then you'll get thrown exceptions. The caller has to check for 518 * that case and load the previous block as appropriate. n * the key to find n * find the key 519 * before the given key in case of exact match. 520 * @return 0 in case of an exact key match, 1 in case of an inexact match, -2 in case of an 521 * inexact match and furthermore, the input key less than the first key of current 522 * block(e.g. using a faked index key) 523 */ 524 protected int blockSeek(Cell key, boolean seekBefore) { 525 int klen, vlen, tlen = 0; 526 int lastKeyValueSize = -1; 527 int offsetFromPos; 528 do { 529 offsetFromPos = 0; 530 // Better to ensure that we use the BB Utils here 531 long ll = blockBuffer.getLongAfterPosition(offsetFromPos); 532 klen = (int) (ll >> Integer.SIZE); 533 vlen = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll); 534 if (checkKeyLen(klen) || checkLen(vlen)) { 535 throw new IllegalStateException( 536 "Invalid klen " + klen + " or vlen " + vlen + ". Block offset: " + curBlock.getOffset() 537 + ", block length: " + blockBuffer.limit() + ", position: " + blockBuffer.position() 538 + " (without header)." + " path=" + reader.getPath()); 539 } 540 offsetFromPos += Bytes.SIZEOF_LONG; 541 this.rowLen = blockBuffer.getShortAfterPosition(offsetFromPos); 542 blockBuffer.asSubByteBuffer(blockBuffer.position() + offsetFromPos, klen, pair); 543 bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), klen, rowLen); 544 int comp = 545 PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, bufBackedKeyOnlyKv); 546 offsetFromPos += klen + vlen; 547 if (this.reader.getFileContext().isIncludesTags()) { 548 // Read short as unsigned, high byte first 549 tlen = ((blockBuffer.getByteAfterPosition(offsetFromPos) & 0xff) << 8) 550 ^ (blockBuffer.getByteAfterPosition(offsetFromPos + 1) & 0xff); 551 if (checkLen(tlen)) { 552 throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: " 553 + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " 554 + blockBuffer.position() + " (without header)." + " path=" + reader.getPath()); 555 } 556 // add the two bytes read for the tags. 557 offsetFromPos += tlen + (Bytes.SIZEOF_SHORT); 558 } 559 if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) { 560 // Directly read the mvcc based on current position 561 readMvccVersion(offsetFromPos); 562 } 563 if (comp == 0) { 564 if (seekBefore) { 565 if (lastKeyValueSize < 0) { 566 throw new IllegalStateException("blockSeek with seekBefore " 567 + "at the first key of the block: key=" + CellUtil.getCellKeyAsString(key) 568 + ", blockOffset=" + curBlock.getOffset() + ", onDiskSize=" 569 + curBlock.getOnDiskSizeWithHeader() + ", path=" + reader.getPath()); 570 } 571 blockBuffer.moveBack(lastKeyValueSize); 572 readKeyValueLen(); 573 return 1; // non exact match. 574 } 575 currKeyLen = klen; 576 currValueLen = vlen; 577 currTagsLen = tlen; 578 return 0; // indicate exact match 579 } else if (comp < 0) { 580 if (lastKeyValueSize > 0) { 581 blockBuffer.moveBack(lastKeyValueSize); 582 } 583 readKeyValueLen(); 584 if (lastKeyValueSize == -1 && blockBuffer.position() == 0) { 585 return HConstants.INDEX_KEY_MAGIC; 586 } 587 return 1; 588 } 589 // The size of this key/value tuple, including key/value length fields. 590 lastKeyValueSize = klen + vlen + currMemstoreTSLen + KEY_VALUE_LEN_SIZE; 591 // include tag length also if tags included with KV 592 if (reader.getFileContext().isIncludesTags()) { 593 lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT; 594 } 595 blockBuffer.skip(lastKeyValueSize); 596 } while (blockBuffer.hasRemaining()); 597 598 // Seek to the last key we successfully read. This will happen if this is 599 // the last key/value pair in the file, in which case the following call 600 // to next() has to return false. 601 blockBuffer.moveBack(lastKeyValueSize); 602 readKeyValueLen(); 603 return 1; // didn't exactly find it. 604 } 605 606 @Override 607 public Cell getNextIndexedKey() { 608 return nextIndexedKey; 609 } 610 611 @Override 612 public int seekTo(Cell key) throws IOException { 613 return seekTo(key, true); 614 } 615 616 @Override 617 public int reseekTo(Cell key) throws IOException { 618 int compared; 619 if (isSeeked()) { 620 compared = compareKey(reader.getComparator(), key); 621 if (compared < 1) { 622 // If the required key is less than or equal to current key, then 623 // don't do anything. 624 return compared; 625 } else { 626 // The comparison with no_next_index_key has to be checked 627 if ( 628 this.nextIndexedKey != null && (this.nextIndexedKey 629 == KeyValueScanner.NO_NEXT_INDEXED_KEY 630 || PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, nextIndexedKey) 631 < 0) 632 ) { 633 // The reader shall continue to scan the current data block instead 634 // of querying the 635 // block index as long as it knows the target key is strictly 636 // smaller than 637 // the next indexed key or the current data block is the last data 638 // block. 639 return loadBlockAndSeekToKey(this.curBlock, nextIndexedKey, false, key, false); 640 } 641 } 642 } 643 // Don't rewind on a reseek operation, because reseek implies that we are 644 // always going forward in the file. 645 return seekTo(key, false); 646 } 647 648 /** 649 * An internal API function. Seek to the given key, optionally rewinding to the first key of the 650 * block before doing the seek. 651 * @param key - a cell representing the key that we need to fetch 652 * @param rewind whether to rewind to the first key of the block before doing the seek. If this 653 * is false, we are assuming we never go back, otherwise the result is undefined. 654 * @return -1 if the key is earlier than the first key of the file, 0 if we are at the given 655 * key, 1 if we are past the given key -2 if the key is earlier than the first key of 656 * the file while using a faked index key 657 */ 658 public int seekTo(Cell key, boolean rewind) throws IOException { 659 HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader(); 660 BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, curBlock, 661 cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding(), reader); 662 if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) { 663 // This happens if the key e.g. falls before the beginning of the file. 664 return -1; 665 } 666 return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(), 667 blockWithScanInfo.getNextIndexedKey(), rewind, key, false); 668 } 669 670 @Override 671 public boolean seekBefore(Cell key) throws IOException { 672 HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, curBlock, 673 cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction), reader); 674 if (seekToBlock == null) { 675 return false; 676 } 677 Cell firstKey = getFirstKeyCellInBlock(seekToBlock); 678 if (PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), firstKey, key) >= 0) { 679 long previousBlockOffset = seekToBlock.getPrevBlockOffset(); 680 // The key we are interested in 681 if (previousBlockOffset == -1) { 682 // we have a 'problem', the key we want is the first of the file. 683 releaseIfNotCurBlock(seekToBlock); 684 return false; 685 } 686 687 // The first key in the current block 'seekToBlock' is greater than the given 688 // seekBefore key. We will go ahead by reading the next block that satisfies the 689 // given key. Return the current block before reading the next one. 690 releaseIfNotCurBlock(seekToBlock); 691 // It is important that we compute and pass onDiskSize to the block 692 // reader so that it does not have to read the header separately to 693 // figure out the size. Currently, we do not have a way to do this 694 // correctly in the general case however. 695 // TODO: See https://issues.apache.org/jira/browse/HBASE-14576 696 int prevBlockSize = -1; 697 seekToBlock = reader.readBlock(previousBlockOffset, prevBlockSize, cacheBlocks, pread, 698 isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); 699 // TODO shortcut: seek forward in this block to the last key of the 700 // block. 701 } 702 loadBlockAndSeekToKey(seekToBlock, firstKey, true, key, true); 703 return true; 704 } 705 706 /** 707 * The curBlock will be released by shipping or close method, so only need to consider releasing 708 * the block, which was read from HFile before and not referenced by curBlock. 709 */ 710 protected void releaseIfNotCurBlock(HFileBlock block) { 711 if (curBlock != block) { 712 block.release(); 713 } 714 } 715 716 /** 717 * Scans blocks in the "scanned" section of the {@link HFile} until the next data block is 718 * found. 719 * @return the next block, or null if there are no more data blocks 720 */ 721 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH", 722 justification = "Yeah, unnecessary null check; could do w/ clean up") 723 protected HFileBlock readNextDataBlock() throws IOException { 724 long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); 725 if (curBlock == null) { 726 return null; 727 } 728 HFileBlock block = this.curBlock; 729 do { 730 if (block.getOffset() >= lastDataBlockOffset) { 731 releaseIfNotCurBlock(block); 732 return null; 733 } 734 if (block.getOffset() < 0) { 735 releaseIfNotCurBlock(block); 736 throw new IOException("Invalid block offset: " + block + ", path=" + reader.getPath()); 737 } 738 // We are reading the next block without block type validation, because 739 // it might turn out to be a non-data block. 740 block = reader.readBlock(block.getOffset() + block.getOnDiskSizeWithHeader(), 741 block.getNextBlockOnDiskSize(), cacheBlocks, pread, isCompaction, true, null, 742 getEffectiveDataBlockEncoding()); 743 if (block != null && !block.getBlockType().isData()) { 744 // Whatever block we read we will be returning it unless 745 // it is a datablock. Just in case the blocks are non data blocks 746 block.release(); 747 } 748 } while (!block.getBlockType().isData()); 749 return block; 750 } 751 752 public DataBlockEncoding getEffectiveDataBlockEncoding() { 753 return this.reader.getEffectiveEncodingInCache(isCompaction); 754 } 755 756 @Override 757 public Cell getCell() { 758 if (!isSeeked()) { 759 return null; 760 } 761 762 Cell ret; 763 int cellBufSize = getKVBufSize(); 764 long seqId = 0L; 765 if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) { 766 seqId = currMemstoreTS; 767 } 768 if (blockBuffer.hasArray()) { 769 // TODO : reduce the varieties of KV here. Check if based on a boolean 770 // we can handle the 'no tags' case. 771 if (currTagsLen > 0) { 772 ret = new SizeCachedKeyValue(blockBuffer.array(), 773 blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen, 774 rowLen); 775 } else { 776 ret = new SizeCachedNoTagsKeyValue(blockBuffer.array(), 777 blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen, 778 rowLen); 779 } 780 } else { 781 ByteBuffer buf = blockBuffer.asSubByteBuffer(cellBufSize); 782 if (buf.isDirect()) { 783 ret = currTagsLen > 0 784 ? new SizeCachedByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId, currKeyLen, 785 rowLen) 786 : new SizeCachedNoTagsByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId, 787 currKeyLen, rowLen); 788 } else { 789 if (currTagsLen > 0) { 790 ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + buf.position(), 791 cellBufSize, seqId, currKeyLen, rowLen); 792 } else { 793 ret = new SizeCachedNoTagsKeyValue(buf.array(), buf.arrayOffset() + buf.position(), 794 cellBufSize, seqId, currKeyLen, rowLen); 795 } 796 } 797 } 798 return ret; 799 } 800 801 @Override 802 public Cell getKey() { 803 assertSeeked(); 804 // Create a new object so that this getKey is cached as firstKey, lastKey 805 ObjectIntPair<ByteBuffer> keyPair = new ObjectIntPair<>(); 806 blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, keyPair); 807 ByteBuffer keyBuf = keyPair.getFirst(); 808 if (keyBuf.hasArray()) { 809 return new KeyValue.KeyOnlyKeyValue(keyBuf.array(), 810 keyBuf.arrayOffset() + keyPair.getSecond(), currKeyLen); 811 } else { 812 // Better to do a copy here instead of holding on to this BB so that 813 // we could release the blocks referring to this key. This key is specifically used 814 // in HalfStoreFileReader to get the firstkey and lastkey by creating a new scanner 815 // every time. So holding onto the BB (incase of DBB) is not advised here. 816 byte[] key = new byte[currKeyLen]; 817 ByteBufferUtils.copyFromBufferToArray(key, keyBuf, keyPair.getSecond(), 0, currKeyLen); 818 return new KeyValue.KeyOnlyKeyValue(key, 0, currKeyLen); 819 } 820 } 821 822 @Override 823 public ByteBuffer getValue() { 824 assertSeeked(); 825 // Okie to create new Pair. Not used in hot path 826 ObjectIntPair<ByteBuffer> valuePair = new ObjectIntPair<>(); 827 this.blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen, 828 currValueLen, valuePair); 829 ByteBuffer valBuf = valuePair.getFirst().duplicate(); 830 valBuf.position(valuePair.getSecond()); 831 valBuf.limit(currValueLen + valuePair.getSecond()); 832 return valBuf.slice(); 833 } 834 835 protected void setNonSeekedState() { 836 reset(); 837 blockBuffer = null; 838 currKeyLen = 0; 839 currValueLen = 0; 840 currMemstoreTS = 0; 841 currMemstoreTSLen = 0; 842 currTagsLen = 0; 843 } 844 845 /** 846 * Set the position on current backing blockBuffer. 847 */ 848 private void positionThisBlockBuffer() { 849 try { 850 blockBuffer.skip(getCurCellSerializedSize()); 851 } catch (IllegalArgumentException e) { 852 LOG.error("Current pos = " + blockBuffer.position() + "; currKeyLen = " + currKeyLen 853 + "; currValLen = " + currValueLen + "; block limit = " + blockBuffer.limit() 854 + "; currBlock currBlockOffset = " + this.curBlock.getOffset() + "; path=" 855 + reader.getPath()); 856 throw e; 857 } 858 } 859 860 /** 861 * Set our selves up for the next 'next' invocation, set up next block. 862 * @return True is more to read else false if at the end. 863 */ 864 private boolean positionForNextBlock() throws IOException { 865 // Methods are small so they get inlined because they are 'hot'. 866 long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); 867 if (this.curBlock.getOffset() >= lastDataBlockOffset) { 868 setNonSeekedState(); 869 return false; 870 } 871 return isNextBlock(); 872 } 873 874 private boolean isNextBlock() throws IOException { 875 // Methods are small so they get inlined because they are 'hot'. 876 HFileBlock nextBlock = readNextDataBlock(); 877 if (nextBlock == null) { 878 setNonSeekedState(); 879 return false; 880 } 881 updateCurrentBlock(nextBlock); 882 return true; 883 } 884 885 private final boolean _next() throws IOException { 886 // Small method so can be inlined. It is a hot one. 887 if (blockBuffer.remaining() <= 0) { 888 return positionForNextBlock(); 889 } 890 891 // We are still in the same block. 892 readKeyValueLen(); 893 return true; 894 } 895 896 /** 897 * Go to the next key/value in the block section. Loads the next block if necessary. If 898 * successful, {@link #getKey()} and {@link #getValue()} can be called. 899 * @return true if successfully navigated to the next key/value 900 */ 901 @Override 902 public boolean next() throws IOException { 903 // This is a hot method so extreme measures taken to ensure it is small and inlineable. 904 // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation 905 assertSeeked(); 906 positionThisBlockBuffer(); 907 return _next(); 908 } 909 910 /** 911 * Positions this scanner at the start of the file. 912 * @return false if empty file; i.e. a call to next would return false and the current key and 913 * value are undefined. 914 */ 915 @Override 916 public boolean seekTo() throws IOException { 917 if (reader == null) { 918 return false; 919 } 920 921 if (reader.getTrailer().getEntryCount() == 0) { 922 // No data blocks. 923 return false; 924 } 925 926 long firstDataBlockOffset = reader.getTrailer().getFirstDataBlockOffset(); 927 if (curBlock != null && curBlock.getOffset() == firstDataBlockOffset) { 928 return processFirstDataBlock(); 929 } 930 931 readAndUpdateNewBlock(firstDataBlockOffset); 932 return true; 933 } 934 935 protected boolean processFirstDataBlock() throws IOException { 936 blockBuffer.rewind(); 937 readKeyValueLen(); 938 return true; 939 } 940 941 protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException { 942 HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread, 943 isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); 944 if (newBlock.getOffset() < 0) { 945 releaseIfNotCurBlock(newBlock); 946 throw new IOException( 947 "Invalid offset=" + newBlock.getOffset() + ", path=" + reader.getPath()); 948 } 949 updateCurrentBlock(newBlock); 950 } 951 952 protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind, 953 Cell key, boolean seekBefore) throws IOException { 954 if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) { 955 updateCurrentBlock(seekToBlock); 956 } else if (rewind) { 957 blockBuffer.rewind(); 958 } 959 // Update the nextIndexedKey 960 this.nextIndexedKey = nextIndexedKey; 961 return blockSeek(key, seekBefore); 962 } 963 964 /** 965 * @return True if v <= 0 or v > current block buffer limit. 966 */ 967 protected final boolean checkKeyLen(final int v) { 968 return v <= 0 || v > this.blockBuffer.limit(); 969 } 970 971 /** 972 * @return True if v < 0 or v > current block buffer limit. 973 */ 974 protected final boolean checkLen(final int v) { 975 return v < 0 || v > this.blockBuffer.limit(); 976 } 977 978 /** 979 * Check key and value lengths are wholesome. 980 */ 981 protected final void checkKeyValueLen() { 982 if (checkKeyLen(this.currKeyLen) || checkLen(this.currValueLen)) { 983 throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen 984 + " or currValueLen " + this.currValueLen + ". Block offset: " + this.curBlock.getOffset() 985 + ", block length: " + this.blockBuffer.limit() + ", position: " 986 + this.blockBuffer.position() + " (without header)." + ", path=" + reader.getPath()); 987 } 988 } 989 990 /** 991 * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first 992 * key/value pair. 993 * @param newBlock the block read by {@link HFileReaderImpl#readBlock}, it's a totally new block 994 * with new allocated {@link ByteBuff}, so if no further reference to this 995 * block, we should release it carefully. 996 */ 997 protected void updateCurrentBlock(HFileBlock newBlock) throws IOException { 998 try { 999 if (newBlock.getBlockType() != BlockType.DATA) { 1000 throw new IllegalStateException( 1001 "ScannerV2 works only on data blocks, got " + newBlock.getBlockType() + "; " 1002 + "HFileName=" + reader.getPath() + ", " + "dataBlockEncoder=" 1003 + reader.getDataBlockEncoding() + ", " + "isCompaction=" + isCompaction); 1004 } 1005 updateCurrBlockRef(newBlock); 1006 blockBuffer = newBlock.getBufferWithoutHeader(); 1007 readKeyValueLen(); 1008 } finally { 1009 releaseIfNotCurBlock(newBlock); 1010 } 1011 // Reset the next indexed key 1012 this.nextIndexedKey = null; 1013 } 1014 1015 protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) { 1016 ByteBuff buffer = curBlock.getBufferWithoutHeader(); 1017 // It is safe to manipulate this buffer because we own the buffer object. 1018 buffer.rewind(); 1019 int klen = buffer.getInt(); 1020 buffer.skip(Bytes.SIZEOF_INT);// Skip value len part 1021 ByteBuffer keyBuff = buffer.asSubByteBuffer(klen); 1022 if (keyBuff.hasArray()) { 1023 return new KeyValue.KeyOnlyKeyValue(keyBuff.array(), 1024 keyBuff.arrayOffset() + keyBuff.position(), klen); 1025 } else { 1026 return new ByteBufferKeyOnlyKeyValue(keyBuff, keyBuff.position(), klen); 1027 } 1028 } 1029 1030 @Override 1031 public String getKeyString() { 1032 return CellUtil.toString(getKey(), false); 1033 } 1034 1035 @Override 1036 public String getValueString() { 1037 return ByteBufferUtils.toStringBinary(getValue()); 1038 } 1039 1040 public int compareKey(CellComparator comparator, Cell key) { 1041 blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, pair); 1042 this.bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), currKeyLen, rowLen); 1043 return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, this.bufBackedKeyOnlyKv); 1044 } 1045 1046 @Override 1047 public void shipped() throws IOException { 1048 this.returnBlocks(false); 1049 } 1050 } 1051 1052 @Override 1053 public Path getPath() { 1054 return path; 1055 } 1056 1057 @Override 1058 public DataBlockEncoding getDataBlockEncoding() { 1059 return dataBlockEncoder.getDataBlockEncoding(); 1060 } 1061 1062 @Override 1063 public Configuration getConf() { 1064 return conf; 1065 } 1066 1067 @Override 1068 public void setConf(Configuration conf) { 1069 this.conf = conf; 1070 } 1071 1072 /** Minor versions in HFile starting with this number have hbase checksums */ 1073 public static final int MINOR_VERSION_WITH_CHECKSUM = 1; 1074 /** In HFile minor version that does not support checksums */ 1075 public static final int MINOR_VERSION_NO_CHECKSUM = 0; 1076 1077 /** HFile minor version that introduced pbuf filetrailer */ 1078 public static final int PBUF_TRAILER_MINOR_VERSION = 2; 1079 1080 /** 1081 * The size of a (key length, value length) tuple that prefixes each entry in a data block. 1082 */ 1083 public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT; 1084 1085 /** 1086 * Retrieve block from cache. Validates the retrieved block's type vs {@code expectedBlockType} 1087 * and its encoding vs. {@code expectedDataBlockEncoding}. Unpacks the block as necessary. 1088 */ 1089 private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock, 1090 boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType, 1091 DataBlockEncoding expectedDataBlockEncoding) throws IOException { 1092 // Check cache for block. If found return. 1093 BlockCache cache = cacheConf.getBlockCache().orElse(null); 1094 if (cache != null) { 1095 HFileBlock cachedBlock = (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock, 1096 updateCacheMetrics, expectedBlockType); 1097 if (cachedBlock != null) { 1098 if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) { 1099 HFileBlock compressedBlock = cachedBlock; 1100 cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader); 1101 // In case of compressed block after unpacking we can release the compressed block 1102 if (compressedBlock != cachedBlock) { 1103 compressedBlock.release(); 1104 } 1105 } 1106 try { 1107 validateBlockType(cachedBlock, expectedBlockType); 1108 } catch (IOException e) { 1109 returnAndEvictBlock(cache, cacheKey, cachedBlock); 1110 throw e; 1111 } 1112 1113 if (expectedDataBlockEncoding == null) { 1114 return cachedBlock; 1115 } 1116 DataBlockEncoding actualDataBlockEncoding = cachedBlock.getDataBlockEncoding(); 1117 // Block types other than data blocks always have 1118 // DataBlockEncoding.NONE. To avoid false negative cache misses, only 1119 // perform this check if cached block is a data block. 1120 if ( 1121 cachedBlock.getBlockType().isData() 1122 && !actualDataBlockEncoding.equals(expectedDataBlockEncoding) 1123 ) { 1124 // This mismatch may happen if a Scanner, which is used for say a 1125 // compaction, tries to read an encoded block from the block cache. 1126 // The reverse might happen when an EncodedScanner tries to read 1127 // un-encoded blocks which were cached earlier. 1128 // 1129 // Because returning a data block with an implicit BlockType mismatch 1130 // will cause the requesting scanner to throw a disk read should be 1131 // forced here. This will potentially cause a significant number of 1132 // cache misses, so update so we should keep track of this as it might 1133 // justify the work on a CompoundScanner. 1134 if ( 1135 !expectedDataBlockEncoding.equals(DataBlockEncoding.NONE) 1136 && !actualDataBlockEncoding.equals(DataBlockEncoding.NONE) 1137 ) { 1138 // If the block is encoded but the encoding does not match the 1139 // expected encoding it is likely the encoding was changed but the 1140 // block was not yet evicted. Evictions on file close happen async 1141 // so blocks with the old encoding still linger in cache for some 1142 // period of time. This event should be rare as it only happens on 1143 // schema definition change. 1144 LOG.info( 1145 "Evicting cached block with key {} because data block encoding mismatch; " 1146 + "expected {}, actual {}, path={}", 1147 cacheKey, actualDataBlockEncoding, expectedDataBlockEncoding, path); 1148 // This is an error scenario. so here we need to release the block. 1149 returnAndEvictBlock(cache, cacheKey, cachedBlock); 1150 } 1151 return null; 1152 } 1153 return cachedBlock; 1154 } 1155 } 1156 return null; 1157 } 1158 1159 private void returnAndEvictBlock(BlockCache cache, BlockCacheKey cacheKey, Cacheable block) { 1160 block.release(); 1161 cache.evictBlock(cacheKey); 1162 } 1163 1164 /** 1165 * @param cacheBlock Add block to cache, if found 1166 * @return block wrapped in a ByteBuffer, with header skipped 1167 */ 1168 @Override 1169 public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException { 1170 if (trailer.getMetaIndexCount() == 0) { 1171 return null; // there are no meta blocks 1172 } 1173 if (metaBlockIndexReader == null) { 1174 throw new IOException(path + " meta index not loaded"); 1175 } 1176 1177 byte[] mbname = Bytes.toBytes(metaBlockName); 1178 int block = metaBlockIndexReader.rootBlockContainingKey(mbname, 0, mbname.length); 1179 if (block == -1) { 1180 return null; 1181 } 1182 long blockSize = metaBlockIndexReader.getRootBlockDataSize(block); 1183 1184 // Per meta key from any given file, synchronize reads for said block. This 1185 // is OK to do for meta blocks because the meta block index is always 1186 // single-level. 1187 synchronized (metaBlockIndexReader.getRootBlockKey(block)) { 1188 // Check cache for block. If found return. 1189 long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block); 1190 BlockCacheKey cacheKey = 1191 new BlockCacheKey(name, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META); 1192 1193 cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory()); 1194 HFileBlock cachedBlock = 1195 getCachedBlock(cacheKey, cacheBlock, false, true, true, BlockType.META, null); 1196 if (cachedBlock != null) { 1197 assert cachedBlock.isUnpacked() : "Packed block leak."; 1198 // Return a distinct 'shallow copy' of the block, 1199 // so pos does not get messed by the scanner 1200 return cachedBlock; 1201 } 1202 // Cache Miss, please load. 1203 1204 HFileBlock compressedBlock = 1205 fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false, true); 1206 HFileBlock uncompressedBlock = compressedBlock.unpack(hfileContext, fsBlockReader); 1207 if (compressedBlock != uncompressedBlock) { 1208 compressedBlock.release(); 1209 } 1210 1211 // Cache the block 1212 if (cacheBlock) { 1213 cacheConf.getBlockCache().ifPresent( 1214 cache -> cache.cacheBlock(cacheKey, uncompressedBlock, cacheConf.isInMemory())); 1215 } 1216 return uncompressedBlock; 1217 } 1218 } 1219 1220 /** 1221 * If expected block is data block, we'll allocate the ByteBuff of block from 1222 * {@link org.apache.hadoop.hbase.io.ByteBuffAllocator} and it's usually an off-heap one, 1223 * otherwise it will allocate from heap. 1224 * @see org.apache.hadoop.hbase.io.hfile.HFileBlock.FSReader#readBlockData(long, long, boolean, 1225 * boolean, boolean) 1226 */ 1227 private boolean shouldUseHeap(BlockType expectedBlockType) { 1228 if (!cacheConf.getBlockCache().isPresent()) { 1229 return false; 1230 } else if (!cacheConf.isCombinedBlockCache()) { 1231 // Block to cache in LruBlockCache must be an heap one. So just allocate block memory from 1232 // heap for saving an extra off-heap to heap copying. 1233 return true; 1234 } 1235 return expectedBlockType != null && !expectedBlockType.isData(); 1236 } 1237 1238 @Override 1239 public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final boolean cacheBlock, 1240 boolean pread, final boolean isCompaction, boolean updateCacheMetrics, 1241 BlockType expectedBlockType, DataBlockEncoding expectedDataBlockEncoding) throws IOException { 1242 if (dataBlockIndexReader == null) { 1243 throw new IOException(path + " block index not loaded"); 1244 } 1245 long trailerOffset = trailer.getLoadOnOpenDataOffset(); 1246 if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) { 1247 throw new IOException("Requested block is out of range: " + dataBlockOffset 1248 + ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() 1249 + ", trailer.getLoadOnOpenDataOffset: " + trailerOffset + ", path=" + path); 1250 } 1251 // For any given block from any given file, synchronize reads for said 1252 // block. 1253 // Without a cache, this synchronizing is needless overhead, but really 1254 // the other choice is to duplicate work (which the cache would prevent you 1255 // from doing). 1256 1257 BlockCacheKey cacheKey = 1258 new BlockCacheKey(name, dataBlockOffset, this.isPrimaryReplicaReader(), expectedBlockType); 1259 1260 boolean useLock = false; 1261 IdLock.Entry lockEntry = null; 1262 Span span = TraceUtil.getGlobalTracer().spanBuilder("HFileReaderImpl.readBlock").startSpan(); 1263 try (Scope traceScope = span.makeCurrent()) { 1264 while (true) { 1265 // Check cache for block. If found return. 1266 if (cacheConf.shouldReadBlockFromCache(expectedBlockType)) { 1267 if (useLock) { 1268 lockEntry = offsetLock.getLockEntry(dataBlockOffset); 1269 } 1270 // Try and get the block from the block cache. If the useLock variable is true then this 1271 // is the second time through the loop and it should not be counted as a block cache miss. 1272 HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, useLock, isCompaction, 1273 updateCacheMetrics, expectedBlockType, expectedDataBlockEncoding); 1274 if (cachedBlock != null) { 1275 if (LOG.isTraceEnabled()) { 1276 LOG.trace("From Cache " + cachedBlock); 1277 } 1278 span.addEvent("blockCacheHit"); 1279 assert cachedBlock.isUnpacked() : "Packed block leak."; 1280 if (cachedBlock.getBlockType().isData()) { 1281 if (updateCacheMetrics) { 1282 HFile.DATABLOCK_READ_COUNT.increment(); 1283 } 1284 // Validate encoding type for data blocks. We include encoding 1285 // type in the cache key, and we expect it to match on a cache hit. 1286 if (cachedBlock.getDataBlockEncoding() != dataBlockEncoder.getDataBlockEncoding()) { 1287 // Remember to release the block when in exceptional path. 1288 cacheConf.getBlockCache().ifPresent(cache -> { 1289 returnAndEvictBlock(cache, cacheKey, cachedBlock); 1290 }); 1291 throw new IOException("Cached block under key " + cacheKey + " " 1292 + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: " 1293 + dataBlockEncoder.getDataBlockEncoding() + "), path=" + path); 1294 } 1295 } 1296 // Cache-hit. Return! 1297 return cachedBlock; 1298 } 1299 1300 if (!useLock && cacheBlock && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) { 1301 // check cache again with lock 1302 useLock = true; 1303 continue; 1304 } 1305 // Carry on, please load. 1306 } 1307 1308 span.addEvent("blockCacheMiss"); 1309 // Load block from filesystem. 1310 HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread, 1311 !isCompaction, shouldUseHeap(expectedBlockType)); 1312 validateBlockType(hfileBlock, expectedBlockType); 1313 HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader); 1314 BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory(); 1315 1316 // Cache the block if necessary 1317 cacheConf.getBlockCache().ifPresent(cache -> { 1318 if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) { 1319 cache.cacheBlock(cacheKey, 1320 cacheConf.shouldCacheCompressed(category) ? hfileBlock : unpacked, 1321 cacheConf.isInMemory()); 1322 } 1323 }); 1324 if (unpacked != hfileBlock) { 1325 // End of life here if hfileBlock is an independent block. 1326 hfileBlock.release(); 1327 } 1328 if (updateCacheMetrics && hfileBlock.getBlockType().isData()) { 1329 HFile.DATABLOCK_READ_COUNT.increment(); 1330 } 1331 1332 return unpacked; 1333 } 1334 } finally { 1335 if (lockEntry != null) { 1336 offsetLock.releaseLockEntry(lockEntry); 1337 } 1338 span.end(); 1339 } 1340 } 1341 1342 @Override 1343 public boolean hasMVCCInfo() { 1344 return fileInfo.shouldIncludeMemStoreTS() && fileInfo.isDecodeMemstoreTS(); 1345 } 1346 1347 /** 1348 * Compares the actual type of a block retrieved from cache or disk with its expected type and 1349 * throws an exception in case of a mismatch. Expected block type of {@link BlockType#DATA} is 1350 * considered to match the actual block type [@link {@link BlockType#ENCODED_DATA} as well. 1351 * @param block a block retrieved from cache or disk 1352 * @param expectedBlockType the expected block type, or null to skip the check 1353 */ 1354 private void validateBlockType(HFileBlock block, BlockType expectedBlockType) throws IOException { 1355 if (expectedBlockType == null) { 1356 return; 1357 } 1358 BlockType actualBlockType = block.getBlockType(); 1359 if (expectedBlockType.isData() && actualBlockType.isData()) { 1360 // We consider DATA to match ENCODED_DATA for the purpose of this 1361 // verification. 1362 return; 1363 } 1364 if (actualBlockType != expectedBlockType) { 1365 throw new IOException("Expected block type " + expectedBlockType + ", " + "but got " 1366 + actualBlockType + ": " + block + ", path=" + path); 1367 } 1368 } 1369 1370 /** 1371 * @return Last key as cell in the file. May be null if file has no entries. Note that this is not 1372 * the last row key, but it is the Cell representation of the last key 1373 */ 1374 @Override 1375 public Optional<Cell> getLastKey() { 1376 return dataBlockIndexReader.isEmpty() 1377 ? Optional.empty() 1378 : Optional.of(fileInfo.getLastKeyCell()); 1379 } 1380 1381 /** 1382 * @return Midkey for this file. We work with block boundaries only so returned midkey is an 1383 * approximation only. 1384 */ 1385 @Override 1386 public Optional<Cell> midKey() throws IOException { 1387 return Optional.ofNullable(dataBlockIndexReader.midkey(this)); 1388 } 1389 1390 @Override 1391 public void close() throws IOException { 1392 close(cacheConf.shouldEvictOnClose()); 1393 } 1394 1395 @Override 1396 public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) { 1397 return dataBlockEncoder.getEffectiveEncodingInCache(isCompaction); 1398 } 1399 1400 /** For testing */ 1401 @Override 1402 public HFileBlock.FSReader getUncachedBlockReader() { 1403 return fsBlockReader; 1404 } 1405 1406 /** 1407 * Scanner that operates on encoded data blocks. 1408 */ 1409 protected static class EncodedScanner extends HFileScannerImpl { 1410 private final HFileBlockDecodingContext decodingCtx; 1411 private final DataBlockEncoder.EncodedSeeker seeker; 1412 private final DataBlockEncoder dataBlockEncoder; 1413 1414 public EncodedScanner(HFile.Reader reader, boolean cacheBlocks, boolean pread, 1415 boolean isCompaction, HFileContext meta, Configuration conf) { 1416 super(reader, cacheBlocks, pread, isCompaction); 1417 DataBlockEncoding encoding = reader.getDataBlockEncoding(); 1418 dataBlockEncoder = encoding.getEncoder(); 1419 decodingCtx = dataBlockEncoder.newDataBlockDecodingContext(conf, meta); 1420 seeker = dataBlockEncoder.createSeeker(decodingCtx); 1421 } 1422 1423 @Override 1424 public boolean isSeeked() { 1425 return curBlock != null; 1426 } 1427 1428 @Override 1429 public void setNonSeekedState() { 1430 reset(); 1431 } 1432 1433 /** 1434 * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first 1435 * key/value pair. 1436 * @param newBlock the block to make current, and read by {@link HFileReaderImpl#readBlock}, 1437 * it's a totally new block with new allocated {@link ByteBuff}, so if no 1438 * further reference to this block, we should release it carefully. 1439 */ 1440 @Override 1441 protected void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException { 1442 try { 1443 // sanity checks 1444 if (newBlock.getBlockType() != BlockType.ENCODED_DATA) { 1445 throw new IllegalStateException("EncodedScanner works only on encoded data blocks"); 1446 } 1447 short dataBlockEncoderId = newBlock.getDataBlockEncodingId(); 1448 if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) { 1449 String encoderCls = dataBlockEncoder.getClass().getName(); 1450 throw new CorruptHFileException( 1451 "Encoder " + encoderCls + " doesn't support data block encoding " 1452 + DataBlockEncoding.getNameFromId(dataBlockEncoderId) + ",path=" + reader.getPath()); 1453 } 1454 updateCurrBlockRef(newBlock); 1455 ByteBuff encodedBuffer = getEncodedBuffer(newBlock); 1456 seeker.setCurrentBuffer(encodedBuffer); 1457 } finally { 1458 releaseIfNotCurBlock(newBlock); 1459 } 1460 // Reset the next indexed key 1461 this.nextIndexedKey = null; 1462 } 1463 1464 private ByteBuff getEncodedBuffer(HFileBlock newBlock) { 1465 ByteBuff origBlock = newBlock.getBufferReadOnly(); 1466 int pos = newBlock.headerSize() + DataBlockEncoding.ID_SIZE; 1467 origBlock.position(pos); 1468 origBlock 1469 .limit(pos + newBlock.getUncompressedSizeWithoutHeader() - DataBlockEncoding.ID_SIZE); 1470 return origBlock.slice(); 1471 } 1472 1473 @Override 1474 protected boolean processFirstDataBlock() throws IOException { 1475 seeker.rewind(); 1476 return true; 1477 } 1478 1479 @Override 1480 public boolean next() throws IOException { 1481 boolean isValid = seeker.next(); 1482 if (!isValid) { 1483 HFileBlock newBlock = readNextDataBlock(); 1484 isValid = newBlock != null; 1485 if (isValid) { 1486 updateCurrentBlock(newBlock); 1487 } else { 1488 setNonSeekedState(); 1489 } 1490 } 1491 return isValid; 1492 } 1493 1494 @Override 1495 public Cell getKey() { 1496 assertValidSeek(); 1497 return seeker.getKey(); 1498 } 1499 1500 @Override 1501 public ByteBuffer getValue() { 1502 assertValidSeek(); 1503 return seeker.getValueShallowCopy(); 1504 } 1505 1506 @Override 1507 public Cell getCell() { 1508 if (this.curBlock == null) { 1509 return null; 1510 } 1511 return seeker.getCell(); 1512 } 1513 1514 @Override 1515 public String getKeyString() { 1516 return CellUtil.toString(getKey(), true); 1517 } 1518 1519 @Override 1520 public String getValueString() { 1521 ByteBuffer valueBuffer = getValue(); 1522 return ByteBufferUtils.toStringBinary(valueBuffer); 1523 } 1524 1525 private void assertValidSeek() { 1526 if (this.curBlock == null) { 1527 throw new NotSeekedException(reader.getPath()); 1528 } 1529 } 1530 1531 @Override 1532 protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) { 1533 return dataBlockEncoder.getFirstKeyCellInBlock(getEncodedBuffer(curBlock)); 1534 } 1535 1536 @Override 1537 protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind, 1538 Cell key, boolean seekBefore) throws IOException { 1539 if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) { 1540 updateCurrentBlock(seekToBlock); 1541 } else if (rewind) { 1542 seeker.rewind(); 1543 } 1544 this.nextIndexedKey = nextIndexedKey; 1545 return seeker.seekToKeyInBlock(key, seekBefore); 1546 } 1547 1548 @Override 1549 public int compareKey(CellComparator comparator, Cell key) { 1550 return seeker.compareKey(comparator, key); 1551 } 1552 } 1553 1554 /** 1555 * Returns a buffer with the Bloom filter metadata. The caller takes ownership of the buffer. 1556 */ 1557 @Override 1558 public DataInput getGeneralBloomFilterMetadata() throws IOException { 1559 return this.getBloomFilterMetadata(BlockType.GENERAL_BLOOM_META); 1560 } 1561 1562 @Override 1563 public DataInput getDeleteBloomFilterMetadata() throws IOException { 1564 return this.getBloomFilterMetadata(BlockType.DELETE_FAMILY_BLOOM_META); 1565 } 1566 1567 private DataInput getBloomFilterMetadata(BlockType blockType) throws IOException { 1568 if ( 1569 blockType != BlockType.GENERAL_BLOOM_META && blockType != BlockType.DELETE_FAMILY_BLOOM_META 1570 ) { 1571 throw new RuntimeException( 1572 "Block Type: " + blockType.toString() + " is not supported, path=" + path); 1573 } 1574 1575 for (HFileBlock b : fileInfo.getLoadOnOpenBlocks()) { 1576 if (b.getBlockType() == blockType) { 1577 return b.getByteStream(); 1578 } 1579 } 1580 return null; 1581 } 1582 1583 public boolean isFileInfoLoaded() { 1584 return true; // We load file info in constructor in version 2. 1585 } 1586 1587 @Override 1588 public HFileContext getFileContext() { 1589 return hfileContext; 1590 } 1591 1592 /** 1593 * Returns false if block prefetching was requested for this file and has not completed, true 1594 * otherwise 1595 */ 1596 @Override 1597 public boolean prefetchComplete() { 1598 return PrefetchExecutor.isCompleted(path); 1599 } 1600 1601 /** 1602 * Create a Scanner on this file. No seeks or reads are done on creation. Call 1603 * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is nothing to clean up 1604 * in a Scanner. Letting go of your references to the scanner is sufficient. NOTE: Do not use this 1605 * overload of getScanner for compactions. See 1606 * {@link #getScanner(Configuration, boolean, boolean, boolean)} 1607 * @param conf Store configuration. 1608 * @param cacheBlocks True if we should cache blocks read in by this scanner. 1609 * @param pread Use positional read rather than seek+read if true (pread is better for 1610 * random reads, seek+read is better scanning). 1611 * @return Scanner on this file. 1612 */ 1613 @Override 1614 public HFileScanner getScanner(Configuration conf, boolean cacheBlocks, final boolean pread) { 1615 return getScanner(conf, cacheBlocks, pread, false); 1616 } 1617 1618 /** 1619 * Create a Scanner on this file. No seeks or reads are done on creation. Call 1620 * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is nothing to clean up 1621 * in a Scanner. Letting go of your references to the scanner is sufficient. n * Store 1622 * configuration. n * True if we should cache blocks read in by this scanner. n * Use positional 1623 * read rather than seek+read if true (pread is better for random reads, seek+read is better 1624 * scanning). n * is scanner being used for a compaction? 1625 * @return Scanner on this file. 1626 */ 1627 @Override 1628 public HFileScanner getScanner(Configuration conf, boolean cacheBlocks, final boolean pread, 1629 final boolean isCompaction) { 1630 if (dataBlockEncoder.useEncodedScanner()) { 1631 return new EncodedScanner(this, cacheBlocks, pread, isCompaction, this.hfileContext, conf); 1632 } 1633 return new HFileScannerImpl(this, cacheBlocks, pread, isCompaction); 1634 } 1635 1636 public int getMajorVersion() { 1637 return 3; 1638 } 1639 1640 @Override 1641 public void unbufferStream() { 1642 fsBlockReader.unbufferStream(); 1643 } 1644}