001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import java.io.DataInput; 021import java.io.IOException; 022import java.nio.ByteBuffer; 023import java.util.ArrayList; 024import java.util.Optional; 025import org.apache.hadoop.conf.Configurable; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.fs.Path; 028import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue; 029import org.apache.hadoop.hbase.ByteBufferKeyValue; 030import org.apache.hadoop.hbase.Cell; 031import org.apache.hadoop.hbase.CellComparator; 032import org.apache.hadoop.hbase.CellUtil; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.KeyValue; 035import org.apache.hadoop.hbase.NoTagsByteBufferKeyValue; 036import org.apache.hadoop.hbase.PrivateCellUtil; 037import org.apache.hadoop.hbase.SizeCachedKeyValue; 038import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue; 039import org.apache.hadoop.hbase.io.compress.Compression; 040import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; 041import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 042import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; 043import org.apache.hadoop.hbase.nio.ByteBuff; 044import org.apache.hadoop.hbase.regionserver.KeyValueScanner; 045import org.apache.hadoop.hbase.trace.TraceUtil; 046import org.apache.hadoop.hbase.util.ByteBufferUtils; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.IdLock; 049import org.apache.hadoop.hbase.util.ObjectIntPair; 050import org.apache.hadoop.io.WritableUtils; 051import org.apache.htrace.core.TraceScope; 052import org.apache.yetus.audience.InterfaceAudience; 053import org.slf4j.Logger; 054import org.slf4j.LoggerFactory; 055import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 056 057/** 058 * Implementation that can handle all hfile versions of {@link HFile.Reader}. 059 */ 060@InterfaceAudience.Private 061@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD") 062public abstract class HFileReaderImpl implements HFile.Reader, Configurable { 063 // This class is HFileReaderV3 + HFileReaderV2 + AbstractHFileReader all squashed together into 064 // one file. Ditto for all the HFileReader.ScannerV? implementations. I was running up against 065 // the MaxInlineLevel limit because too many tiers involved reading from an hfile. Was also hard 066 // to navigate the source code when so many classes participating in read. 067 private static final Logger LOG = LoggerFactory.getLogger(HFileReaderImpl.class); 068 069 /** Data block index reader keeping the root data index in memory */ 070 protected HFileBlockIndex.CellBasedKeyBlockIndexReader dataBlockIndexReader; 071 072 /** Meta block index reader -- always single level */ 073 protected HFileBlockIndex.ByteArrayKeyBlockIndexReader metaBlockIndexReader; 074 075 protected FixedFileTrailer trailer; 076 077 private final boolean primaryReplicaReader; 078 079 /** 080 * What kind of data block encoding should be used while reading, writing, 081 * and handling cache. 082 */ 083 protected HFileDataBlockEncoder dataBlockEncoder = NoOpDataBlockEncoder.INSTANCE; 084 085 /** Block cache configuration. */ 086 protected final CacheConfig cacheConf; 087 088 protected ReaderContext context; 089 090 protected final HFileInfo fileInfo; 091 092 /** Path of file */ 093 protected final Path path; 094 095 /** File name to be used for block names */ 096 protected final String name; 097 098 private Configuration conf; 099 100 protected HFileContext hfileContext; 101 102 /** Filesystem-level block reader. */ 103 protected HFileBlock.FSReader fsBlockReader; 104 105 /** 106 * A "sparse lock" implementation allowing to lock on a particular block 107 * identified by offset. The purpose of this is to avoid two clients loading 108 * the same block, and have all but one client wait to get the block from the 109 * cache. 110 */ 111 private IdLock offsetLock = new IdLock(); 112 113 /** Minimum minor version supported by this HFile format */ 114 static final int MIN_MINOR_VERSION = 0; 115 116 /** Maximum minor version supported by this HFile format */ 117 // We went to version 2 when we moved to pb'ing fileinfo and the trailer on 118 // the file. This version can read Writables version 1. 119 static final int MAX_MINOR_VERSION = 3; 120 121 /** Minor versions starting with this number have faked index key */ 122 static final int MINOR_VERSION_WITH_FAKED_KEY = 3; 123 124 /** 125 * Opens a HFile. 126 * @param context Reader context info 127 * @param fileInfo HFile info 128 * @param cacheConf Cache configuration. 129 * @param conf Configuration 130 */ 131 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD") 132 public HFileReaderImpl(ReaderContext context, HFileInfo fileInfo, CacheConfig cacheConf, 133 Configuration conf) throws IOException { 134 this.cacheConf = cacheConf; 135 this.context = context; 136 this.path = context.getFilePath(); 137 this.name = path.getName(); 138 this.conf = conf; 139 this.primaryReplicaReader = context.isPrimaryReplicaReader(); 140 this.fileInfo = fileInfo; 141 this.trailer = fileInfo.getTrailer(); 142 this.hfileContext = fileInfo.getHFileContext(); 143 this.fsBlockReader = new HFileBlock.FSReaderImpl(context, hfileContext, 144 cacheConf.getByteBuffAllocator()); 145 this.dataBlockEncoder = HFileDataBlockEncoderImpl.createFromFileInfo(fileInfo); 146 fsBlockReader.setDataBlockEncoder(dataBlockEncoder); 147 dataBlockIndexReader = fileInfo.getDataBlockIndexReader(); 148 metaBlockIndexReader = fileInfo.getMetaBlockIndexReader(); 149 } 150 151 @SuppressWarnings("serial") 152 public static class BlockIndexNotLoadedException extends IllegalStateException { 153 public BlockIndexNotLoadedException(Path path) { 154 // Add a message in case anyone relies on it as opposed to class name. 155 super(path + " block index not loaded"); 156 } 157 } 158 159 private Optional<String> toStringFirstKey() { 160 return getFirstKey().map(CellUtil::getCellKeyAsString); 161 } 162 163 private Optional<String> toStringLastKey() { 164 return getLastKey().map(CellUtil::getCellKeyAsString); 165 } 166 167 @Override 168 public String toString() { 169 return "reader=" + path.toString() + 170 (!isFileInfoLoaded()? "": 171 ", compression=" + trailer.getCompressionCodec().getName() + 172 ", cacheConf=" + cacheConf + 173 ", firstKey=" + toStringFirstKey() + 174 ", lastKey=" + toStringLastKey()) + 175 ", avgKeyLen=" + fileInfo.getAvgKeyLen() + 176 ", avgValueLen=" + fileInfo.getAvgValueLen() + 177 ", entries=" + trailer.getEntryCount() + 178 ", length=" + context.getFileSize(); 179 } 180 181 @Override 182 public long length() { 183 return context.getFileSize(); 184 } 185 186 /** 187 * @return the first key in the file. May be null if file has no entries. Note 188 * that this is not the first row key, but rather the byte form of the 189 * first KeyValue. 190 */ 191 @Override 192 public Optional<Cell> getFirstKey() { 193 if (dataBlockIndexReader == null) { 194 throw new BlockIndexNotLoadedException(path); 195 } 196 return dataBlockIndexReader.isEmpty() ? Optional.empty() 197 : Optional.of(dataBlockIndexReader.getRootBlockKey(0)); 198 } 199 200 /** 201 * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's 202 * patch goes in to eliminate {@link KeyValue} here. 203 * 204 * @return the first row key, or null if the file is empty. 205 */ 206 @Override 207 public Optional<byte[]> getFirstRowKey() { 208 // We have to copy the row part to form the row key alone 209 return getFirstKey().map(CellUtil::cloneRow); 210 } 211 212 /** 213 * TODO left from {@link HFile} version 1: move this to StoreFile after 214 * Ryan's patch goes in to eliminate {@link KeyValue} here. 215 * 216 * @return the last row key, or null if the file is empty. 217 */ 218 @Override 219 public Optional<byte[]> getLastRowKey() { 220 // We have to copy the row part to form the row key alone 221 return getLastKey().map(CellUtil::cloneRow); 222 } 223 224 /** @return number of KV entries in this HFile */ 225 @Override 226 public long getEntries() { 227 return trailer.getEntryCount(); 228 } 229 230 /** @return comparator */ 231 @Override 232 public CellComparator getComparator() { 233 return this.hfileContext.getCellComparator(); 234 } 235 236 @VisibleForTesting 237 public Compression.Algorithm getCompressionAlgorithm() { 238 return trailer.getCompressionCodec(); 239 } 240 241 /** 242 * @return the total heap size of data and meta block indexes in bytes. Does 243 * not take into account non-root blocks of a multilevel data index. 244 */ 245 @Override 246 public long indexSize() { 247 return (dataBlockIndexReader != null ? dataBlockIndexReader.heapSize() : 0) 248 + ((metaBlockIndexReader != null) ? metaBlockIndexReader.heapSize() 249 : 0); 250 } 251 252 @Override 253 public String getName() { 254 return name; 255 } 256 257 @Override 258 public void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder) { 259 this.dataBlockEncoder = dataBlockEncoder; 260 this.fsBlockReader.setDataBlockEncoder(dataBlockEncoder); 261 } 262 263 @Override 264 public void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader) { 265 this.dataBlockIndexReader = reader; 266 } 267 268 @Override 269 public HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader() { 270 return dataBlockIndexReader; 271 } 272 273 @Override 274 public void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader) { 275 this.metaBlockIndexReader = reader; 276 } 277 278 @Override 279 public HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader() { 280 return metaBlockIndexReader; 281 } 282 283 @Override 284 public FixedFileTrailer getTrailer() { 285 return trailer; 286 } 287 288 @Override 289 public ReaderContext getContext() { 290 return this.context; 291 } 292 293 @Override 294 public HFileInfo getHFileInfo() { 295 return this.fileInfo; 296 } 297 298 @Override 299 public boolean isPrimaryReplicaReader() { 300 return primaryReplicaReader; 301 } 302 303 /** 304 * An exception thrown when an operation requiring a scanner to be seeked 305 * is invoked on a scanner that is not seeked. 306 */ 307 @SuppressWarnings("serial") 308 public static class NotSeekedException extends IllegalStateException { 309 public NotSeekedException(Path path) { 310 super(path + " not seeked to a key/value"); 311 } 312 } 313 314 protected static class HFileScannerImpl implements HFileScanner { 315 private ByteBuff blockBuffer; 316 protected final boolean cacheBlocks; 317 protected final boolean pread; 318 protected final boolean isCompaction; 319 private int currKeyLen; 320 private int currValueLen; 321 private int currMemstoreTSLen; 322 private long currMemstoreTS; 323 protected final HFile.Reader reader; 324 private int currTagsLen; 325 // buffer backed keyonlyKV 326 private ByteBufferKeyOnlyKeyValue bufBackedKeyOnlyKv = new ByteBufferKeyOnlyKeyValue(); 327 // A pair for reusing in blockSeek() so that we don't garbage lot of objects 328 final ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>(); 329 330 /** 331 * The next indexed key is to keep track of the indexed key of the next data block. 332 * If the nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the 333 * current data block is the last data block. 334 * 335 * If the nextIndexedKey is null, it means the nextIndexedKey has not been loaded yet. 336 */ 337 protected Cell nextIndexedKey; 338 // Current block being used. NOTICE: DON't release curBlock separately except in shipped() or 339 // close() methods. Because the shipped() or close() will do the release finally, even if any 340 // exception occur the curBlock will be released by the close() method (see 341 // RegionScannerImpl#handleException). Call the releaseIfNotCurBlock() to release the 342 // unreferenced block please. 343 protected HFileBlock curBlock; 344 // Previous blocks that were used in the course of the read 345 protected final ArrayList<HFileBlock> prevBlocks = new ArrayList<>(); 346 347 public HFileScannerImpl(final HFile.Reader reader, final boolean cacheBlocks, 348 final boolean pread, final boolean isCompaction) { 349 this.reader = reader; 350 this.cacheBlocks = cacheBlocks; 351 this.pread = pread; 352 this.isCompaction = isCompaction; 353 } 354 355 void updateCurrBlockRef(HFileBlock block) { 356 if (block != null && curBlock != null && block.getOffset() == curBlock.getOffset()) { 357 return; 358 } 359 if (this.curBlock != null && this.curBlock.isSharedMem()) { 360 prevBlocks.add(this.curBlock); 361 } 362 this.curBlock = block; 363 } 364 365 void reset() { 366 // We don't have to keep ref to heap block 367 if (this.curBlock != null && this.curBlock.isSharedMem()) { 368 this.prevBlocks.add(this.curBlock); 369 } 370 this.curBlock = null; 371 } 372 373 private void returnBlocks(boolean returnAll) { 374 this.prevBlocks.forEach(HFileBlock::release); 375 this.prevBlocks.clear(); 376 if (returnAll && this.curBlock != null) { 377 this.curBlock.release(); 378 this.curBlock = null; 379 } 380 } 381 382 @Override 383 public boolean isSeeked(){ 384 return blockBuffer != null; 385 } 386 387 @Override 388 public String toString() { 389 return "HFileScanner for reader " + String.valueOf(getReader()); 390 } 391 392 protected void assertSeeked() { 393 if (!isSeeked()) { 394 throw new NotSeekedException(reader.getPath()); 395 } 396 } 397 398 @Override 399 public HFile.Reader getReader() { 400 return reader; 401 } 402 403 // From non encoded HFiles, we always read back KeyValue or its descendant.(Note: When HFile 404 // block is in DBB, it will be OffheapKV). So all parts of the Cell is in a contiguous 405 // array/buffer. How many bytes we should wrap to make the KV is what this method returns. 406 private int getKVBufSize() { 407 int kvBufSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen; 408 if (currTagsLen > 0) { 409 kvBufSize += Bytes.SIZEOF_SHORT + currTagsLen; 410 } 411 return kvBufSize; 412 } 413 414 @Override 415 public void close() { 416 if (!pread) { 417 // For seek + pread stream socket should be closed when the scanner is closed. HBASE-9393 418 reader.unbufferStream(); 419 } 420 this.returnBlocks(true); 421 } 422 423 // Returns the #bytes in HFile for the current cell. Used to skip these many bytes in current 424 // HFile block's buffer so as to position to the next cell. 425 private int getCurCellSerializedSize() { 426 int curCellSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen 427 + currMemstoreTSLen; 428 if (this.reader.getFileContext().isIncludesTags()) { 429 curCellSize += Bytes.SIZEOF_SHORT + currTagsLen; 430 } 431 return curCellSize; 432 } 433 434 protected void readKeyValueLen() { 435 // This is a hot method. We go out of our way to make this method short so it can be 436 // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves 437 // because it is faster than going via range-checked ByteBuffer methods or going through a 438 // byte buffer array a byte at a time. 439 // Get a long at a time rather than read two individual ints. In micro-benchmarking, even 440 // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints. 441 // Trying to imitate what was done - need to profile if this is better or 442 // earlier way is better by doing mark and reset? 443 // But ensure that you read long instead of two ints 444 long ll = blockBuffer.getLongAfterPosition(0); 445 // Read top half as an int of key length and bottom int as value length 446 this.currKeyLen = (int)(ll >> Integer.SIZE); 447 this.currValueLen = (int)(Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll); 448 checkKeyValueLen(); 449 // Move position past the key and value lengths and then beyond the key and value 450 int p = (Bytes.SIZEOF_LONG + currKeyLen + currValueLen); 451 if (reader.getFileContext().isIncludesTags()) { 452 // Tags length is a short. 453 this.currTagsLen = blockBuffer.getShortAfterPosition(p); 454 checkTagsLen(); 455 p += (Bytes.SIZEOF_SHORT + currTagsLen); 456 } 457 readMvccVersion(p); 458 } 459 460 private final void checkTagsLen() { 461 if (checkLen(this.currTagsLen)) { 462 throw new IllegalStateException("Invalid currTagsLen " + this.currTagsLen + 463 ". Block offset: " + curBlock.getOffset() + ", block length: " + 464 this.blockBuffer.limit() + 465 ", position: " + this.blockBuffer.position() + " (without header)." + 466 " path=" + reader.getPath()); 467 } 468 } 469 470 /** 471 * Read mvcc. Does checks to see if we even need to read the mvcc at all. 472 */ 473 protected void readMvccVersion(final int offsetFromPos) { 474 // See if we even need to decode mvcc. 475 if (!this.reader.getHFileInfo().shouldIncludeMemStoreTS()) { 476 return; 477 } 478 if (!this.reader.getHFileInfo().isDecodeMemstoreTS()) { 479 currMemstoreTS = 0; 480 currMemstoreTSLen = 1; 481 return; 482 } 483 _readMvccVersion(offsetFromPos); 484 } 485 486 /** 487 * Actually do the mvcc read. Does no checks. 488 */ 489 private void _readMvccVersion(int offsetFromPos) { 490 // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e. 491 // previous if one-byte vint, we'd redo the vint call to find int size. 492 // Also the method is kept small so can be inlined. 493 byte firstByte = blockBuffer.getByteAfterPosition(offsetFromPos); 494 int len = WritableUtils.decodeVIntSize(firstByte); 495 if (len == 1) { 496 this.currMemstoreTS = firstByte; 497 } else { 498 int remaining = len -1; 499 long i = 0; 500 offsetFromPos++; 501 if (remaining >= Bytes.SIZEOF_INT) { 502 // The int read has to be converted to unsigned long so the & op 503 i = (blockBuffer.getIntAfterPosition(offsetFromPos) & 0x00000000ffffffffL); 504 remaining -= Bytes.SIZEOF_INT; 505 offsetFromPos += Bytes.SIZEOF_INT; 506 } 507 if (remaining >= Bytes.SIZEOF_SHORT) { 508 short s = blockBuffer.getShortAfterPosition(offsetFromPos); 509 i = i << 16; 510 i = i | (s & 0xFFFF); 511 remaining -= Bytes.SIZEOF_SHORT; 512 offsetFromPos += Bytes.SIZEOF_SHORT; 513 } 514 for (int idx = 0; idx < remaining; idx++) { 515 byte b = blockBuffer.getByteAfterPosition(offsetFromPos + idx); 516 i = i << 8; 517 i = i | (b & 0xFF); 518 } 519 currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); 520 } 521 this.currMemstoreTSLen = len; 522 } 523 524 /** 525 * Within a loaded block, seek looking for the last key that is smaller than 526 * (or equal to?) the key we are interested in. 527 * A note on the seekBefore: if you have seekBefore = true, AND the first 528 * key in the block = key, then you'll get thrown exceptions. The caller has 529 * to check for that case and load the previous block as appropriate. 530 * @param key 531 * the key to find 532 * @param seekBefore 533 * find the key before the given key in case of exact match. 534 * @return 0 in case of an exact key match, 1 in case of an inexact match, 535 * -2 in case of an inexact match and furthermore, the input key 536 * less than the first key of current block(e.g. using a faked index 537 * key) 538 */ 539 protected int blockSeek(Cell key, boolean seekBefore) { 540 int klen, vlen, tlen = 0; 541 int lastKeyValueSize = -1; 542 int offsetFromPos; 543 do { 544 offsetFromPos = 0; 545 // Better to ensure that we use the BB Utils here 546 long ll = blockBuffer.getLongAfterPosition(offsetFromPos); 547 klen = (int)(ll >> Integer.SIZE); 548 vlen = (int)(Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll); 549 if (checkKeyLen(klen) || checkLen(vlen)) { 550 throw new IllegalStateException("Invalid klen " + klen + " or vlen " 551 + vlen + ". Block offset: " 552 + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " 553 + blockBuffer.position() + " (without header)." 554 + " path=" + reader.getPath()); 555 } 556 offsetFromPos += Bytes.SIZEOF_LONG; 557 blockBuffer.asSubByteBuffer(blockBuffer.position() + offsetFromPos, klen, pair); 558 bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), klen); 559 int comp = 560 PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, bufBackedKeyOnlyKv); 561 offsetFromPos += klen + vlen; 562 if (this.reader.getFileContext().isIncludesTags()) { 563 // Read short as unsigned, high byte first 564 tlen = ((blockBuffer.getByteAfterPosition(offsetFromPos) & 0xff) << 8) 565 ^ (blockBuffer.getByteAfterPosition(offsetFromPos + 1) & 0xff); 566 if (checkLen(tlen)) { 567 throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: " 568 + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " 569 + blockBuffer.position() + " (without header)." 570 + " path=" + reader.getPath()); 571 } 572 // add the two bytes read for the tags. 573 offsetFromPos += tlen + (Bytes.SIZEOF_SHORT); 574 } 575 if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) { 576 // Directly read the mvcc based on current position 577 readMvccVersion(offsetFromPos); 578 } 579 if (comp == 0) { 580 if (seekBefore) { 581 if (lastKeyValueSize < 0) { 582 throw new IllegalStateException("blockSeek with seekBefore " 583 + "at the first key of the block: key=" + CellUtil.getCellKeyAsString(key) 584 + ", blockOffset=" + curBlock.getOffset() + ", onDiskSize=" 585 + curBlock.getOnDiskSizeWithHeader() 586 + ", path=" + reader.getPath()); 587 } 588 blockBuffer.moveBack(lastKeyValueSize); 589 readKeyValueLen(); 590 return 1; // non exact match. 591 } 592 currKeyLen = klen; 593 currValueLen = vlen; 594 currTagsLen = tlen; 595 return 0; // indicate exact match 596 } else if (comp < 0) { 597 if (lastKeyValueSize > 0) { 598 blockBuffer.moveBack(lastKeyValueSize); 599 } 600 readKeyValueLen(); 601 if (lastKeyValueSize == -1 && blockBuffer.position() == 0) { 602 return HConstants.INDEX_KEY_MAGIC; 603 } 604 return 1; 605 } 606 // The size of this key/value tuple, including key/value length fields. 607 lastKeyValueSize = klen + vlen + currMemstoreTSLen + KEY_VALUE_LEN_SIZE; 608 // include tag length also if tags included with KV 609 if (reader.getFileContext().isIncludesTags()) { 610 lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT; 611 } 612 blockBuffer.skip(lastKeyValueSize); 613 } while (blockBuffer.hasRemaining()); 614 615 // Seek to the last key we successfully read. This will happen if this is 616 // the last key/value pair in the file, in which case the following call 617 // to next() has to return false. 618 blockBuffer.moveBack(lastKeyValueSize); 619 readKeyValueLen(); 620 return 1; // didn't exactly find it. 621 } 622 623 @Override 624 public Cell getNextIndexedKey() { 625 return nextIndexedKey; 626 } 627 628 @Override 629 public int seekTo(Cell key) throws IOException { 630 return seekTo(key, true); 631 } 632 633 @Override 634 public int reseekTo(Cell key) throws IOException { 635 int compared; 636 if (isSeeked()) { 637 compared = compareKey(reader.getComparator(), key); 638 if (compared < 1) { 639 // If the required key is less than or equal to current key, then 640 // don't do anything. 641 return compared; 642 } else { 643 // The comparison with no_next_index_key has to be checked 644 if (this.nextIndexedKey != null && 645 (this.nextIndexedKey == KeyValueScanner.NO_NEXT_INDEXED_KEY || PrivateCellUtil 646 .compareKeyIgnoresMvcc(reader.getComparator(), key, nextIndexedKey) < 0)) { 647 // The reader shall continue to scan the current data block instead 648 // of querying the 649 // block index as long as it knows the target key is strictly 650 // smaller than 651 // the next indexed key or the current data block is the last data 652 // block. 653 return loadBlockAndSeekToKey(this.curBlock, nextIndexedKey, false, key, 654 false); 655 } 656 } 657 } 658 // Don't rewind on a reseek operation, because reseek implies that we are 659 // always going forward in the file. 660 return seekTo(key, false); 661 } 662 663 /** 664 * An internal API function. Seek to the given key, optionally rewinding to 665 * the first key of the block before doing the seek. 666 * 667 * @param key - a cell representing the key that we need to fetch 668 * @param rewind whether to rewind to the first key of the block before 669 * doing the seek. If this is false, we are assuming we never go 670 * back, otherwise the result is undefined. 671 * @return -1 if the key is earlier than the first key of the file, 672 * 0 if we are at the given key, 1 if we are past the given key 673 * -2 if the key is earlier than the first key of the file while 674 * using a faked index key 675 */ 676 public int seekTo(Cell key, boolean rewind) throws IOException { 677 HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader(); 678 BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, curBlock, 679 cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding(), reader); 680 if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) { 681 // This happens if the key e.g. falls before the beginning of the file. 682 return -1; 683 } 684 return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(), 685 blockWithScanInfo.getNextIndexedKey(), rewind, key, false); 686 } 687 688 @Override 689 public boolean seekBefore(Cell key) throws IOException { 690 HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, curBlock, 691 cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction), 692 reader); 693 if (seekToBlock == null) { 694 return false; 695 } 696 Cell firstKey = getFirstKeyCellInBlock(seekToBlock); 697 if (PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), firstKey, key) >= 0) { 698 long previousBlockOffset = seekToBlock.getPrevBlockOffset(); 699 // The key we are interested in 700 if (previousBlockOffset == -1) { 701 // we have a 'problem', the key we want is the first of the file. 702 releaseIfNotCurBlock(seekToBlock); 703 return false; 704 } 705 706 // The first key in the current block 'seekToBlock' is greater than the given 707 // seekBefore key. We will go ahead by reading the next block that satisfies the 708 // given key. Return the current block before reading the next one. 709 releaseIfNotCurBlock(seekToBlock); 710 // It is important that we compute and pass onDiskSize to the block 711 // reader so that it does not have to read the header separately to 712 // figure out the size. Currently, we do not have a way to do this 713 // correctly in the general case however. 714 // TODO: See https://issues.apache.org/jira/browse/HBASE-14576 715 int prevBlockSize = -1; 716 seekToBlock = reader.readBlock(previousBlockOffset, prevBlockSize, cacheBlocks, pread, 717 isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); 718 // TODO shortcut: seek forward in this block to the last key of the 719 // block. 720 } 721 loadBlockAndSeekToKey(seekToBlock, firstKey, true, key, true); 722 return true; 723 } 724 725 /** 726 * The curBlock will be released by shipping or close method, so only need to consider releasing 727 * the block, which was read from HFile before and not referenced by curBlock. 728 */ 729 protected void releaseIfNotCurBlock(HFileBlock block) { 730 if (curBlock != block) { 731 block.release(); 732 } 733 } 734 735 /** 736 * Scans blocks in the "scanned" section of the {@link HFile} until the next 737 * data block is found. 738 * 739 * @return the next block, or null if there are no more data blocks 740 */ 741 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH", 742 justification="Yeah, unnecessary null check; could do w/ clean up") 743 protected HFileBlock readNextDataBlock() throws IOException { 744 long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); 745 if (curBlock == null) { 746 return null; 747 } 748 HFileBlock block = this.curBlock; 749 do { 750 if (block.getOffset() >= lastDataBlockOffset) { 751 releaseIfNotCurBlock(block); 752 return null; 753 } 754 if (block.getOffset() < 0) { 755 releaseIfNotCurBlock(block); 756 throw new IOException("Invalid block offset=" + block + ", path=" + reader.getPath()); 757 } 758 // We are reading the next block without block type validation, because 759 // it might turn out to be a non-data block. 760 block = reader.readBlock(block.getOffset() + block.getOnDiskSizeWithHeader(), 761 block.getNextBlockOnDiskSize(), cacheBlocks, pread, isCompaction, true, null, 762 getEffectiveDataBlockEncoding()); 763 if (block != null && !block.getBlockType().isData()) { 764 // Whatever block we read we will be returning it unless 765 // it is a datablock. Just in case the blocks are non data blocks 766 block.release(); 767 } 768 } while (!block.getBlockType().isData()); 769 return block; 770 } 771 772 public DataBlockEncoding getEffectiveDataBlockEncoding() { 773 return this.reader.getEffectiveEncodingInCache(isCompaction); 774 } 775 776 @Override 777 public Cell getCell() { 778 if (!isSeeked()) { 779 return null; 780 } 781 782 Cell ret; 783 int cellBufSize = getKVBufSize(); 784 long seqId = 0L; 785 if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) { 786 seqId = currMemstoreTS; 787 } 788 if (blockBuffer.hasArray()) { 789 // TODO : reduce the varieties of KV here. Check if based on a boolean 790 // we can handle the 'no tags' case. 791 if (currTagsLen > 0) { 792 ret = new SizeCachedKeyValue(blockBuffer.array(), 793 blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId); 794 } else { 795 ret = new SizeCachedNoTagsKeyValue(blockBuffer.array(), 796 blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId); 797 } 798 } else { 799 ByteBuffer buf = blockBuffer.asSubByteBuffer(cellBufSize); 800 if (buf.isDirect()) { 801 ret = currTagsLen > 0 ? new ByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId) 802 : new NoTagsByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId); 803 } else { 804 if (currTagsLen > 0) { 805 ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + buf.position(), 806 cellBufSize, seqId); 807 } else { 808 ret = new SizeCachedNoTagsKeyValue(buf.array(), buf.arrayOffset() + buf.position(), 809 cellBufSize, seqId); 810 } 811 } 812 } 813 return ret; 814 } 815 816 @Override 817 public Cell getKey() { 818 assertSeeked(); 819 // Create a new object so that this getKey is cached as firstKey, lastKey 820 ObjectIntPair<ByteBuffer> keyPair = new ObjectIntPair<>(); 821 blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, keyPair); 822 ByteBuffer keyBuf = keyPair.getFirst(); 823 if (keyBuf.hasArray()) { 824 return new KeyValue.KeyOnlyKeyValue(keyBuf.array(), keyBuf.arrayOffset() 825 + keyPair.getSecond(), currKeyLen); 826 } else { 827 // Better to do a copy here instead of holding on to this BB so that 828 // we could release the blocks referring to this key. This key is specifically used 829 // in HalfStoreFileReader to get the firstkey and lastkey by creating a new scanner 830 // every time. So holding onto the BB (incase of DBB) is not advised here. 831 byte[] key = new byte[currKeyLen]; 832 ByteBufferUtils.copyFromBufferToArray(key, keyBuf, keyPair.getSecond(), 0, currKeyLen); 833 return new KeyValue.KeyOnlyKeyValue(key, 0, currKeyLen); 834 } 835 } 836 837 @Override 838 public ByteBuffer getValue() { 839 assertSeeked(); 840 // Okie to create new Pair. Not used in hot path 841 ObjectIntPair<ByteBuffer> valuePair = new ObjectIntPair<>(); 842 this.blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen, 843 currValueLen, valuePair); 844 ByteBuffer valBuf = valuePair.getFirst().duplicate(); 845 valBuf.position(valuePair.getSecond()); 846 valBuf.limit(currValueLen + valuePair.getSecond()); 847 return valBuf.slice(); 848 } 849 850 protected void setNonSeekedState() { 851 reset(); 852 blockBuffer = null; 853 currKeyLen = 0; 854 currValueLen = 0; 855 currMemstoreTS = 0; 856 currMemstoreTSLen = 0; 857 currTagsLen = 0; 858 } 859 860 /** 861 * Set the position on current backing blockBuffer. 862 */ 863 private void positionThisBlockBuffer() { 864 try { 865 blockBuffer.skip(getCurCellSerializedSize()); 866 } catch (IllegalArgumentException e) { 867 LOG.error("Current pos = " + blockBuffer.position() 868 + "; currKeyLen = " + currKeyLen + "; currValLen = " 869 + currValueLen + "; block limit = " + blockBuffer.limit() 870 + "; currBlock currBlockOffset = " + this.curBlock.getOffset() 871 + "; path=" + reader.getPath()); 872 throw e; 873 } 874 } 875 876 /** 877 * Set our selves up for the next 'next' invocation, set up next block. 878 * @return True is more to read else false if at the end. 879 */ 880 private boolean positionForNextBlock() throws IOException { 881 // Methods are small so they get inlined because they are 'hot'. 882 long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); 883 if (this.curBlock.getOffset() >= lastDataBlockOffset) { 884 setNonSeekedState(); 885 return false; 886 } 887 return isNextBlock(); 888 } 889 890 891 private boolean isNextBlock() throws IOException { 892 // Methods are small so they get inlined because they are 'hot'. 893 HFileBlock nextBlock = readNextDataBlock(); 894 if (nextBlock == null) { 895 setNonSeekedState(); 896 return false; 897 } 898 updateCurrentBlock(nextBlock); 899 return true; 900 } 901 902 private final boolean _next() throws IOException { 903 // Small method so can be inlined. It is a hot one. 904 if (blockBuffer.remaining() <= 0) { 905 return positionForNextBlock(); 906 } 907 908 // We are still in the same block. 909 readKeyValueLen(); 910 return true; 911 } 912 913 /** 914 * Go to the next key/value in the block section. Loads the next block if 915 * necessary. If successful, {@link #getKey()} and {@link #getValue()} can 916 * be called. 917 * 918 * @return true if successfully navigated to the next key/value 919 */ 920 @Override 921 public boolean next() throws IOException { 922 // This is a hot method so extreme measures taken to ensure it is small and inlineable. 923 // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation 924 assertSeeked(); 925 positionThisBlockBuffer(); 926 return _next(); 927 } 928 929 /** 930 * Positions this scanner at the start of the file. 931 * 932 * @return false if empty file; i.e. a call to next would return false and 933 * the current key and value are undefined. 934 */ 935 @Override 936 public boolean seekTo() throws IOException { 937 if (reader == null) { 938 return false; 939 } 940 941 if (reader.getTrailer().getEntryCount() == 0) { 942 // No data blocks. 943 return false; 944 } 945 946 long firstDataBlockOffset = reader.getTrailer().getFirstDataBlockOffset(); 947 if (curBlock != null && curBlock.getOffset() == firstDataBlockOffset) { 948 return processFirstDataBlock(); 949 } 950 951 readAndUpdateNewBlock(firstDataBlockOffset); 952 return true; 953 } 954 955 protected boolean processFirstDataBlock() throws IOException{ 956 blockBuffer.rewind(); 957 readKeyValueLen(); 958 return true; 959 } 960 961 protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException { 962 HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread, 963 isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); 964 if (newBlock.getOffset() < 0) { 965 releaseIfNotCurBlock(newBlock); 966 throw new IOException("Invalid offset=" + newBlock.getOffset() + 967 ", path=" + reader.getPath()); 968 } 969 updateCurrentBlock(newBlock); 970 } 971 972 protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind, 973 Cell key, boolean seekBefore) throws IOException { 974 if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) { 975 updateCurrentBlock(seekToBlock); 976 } else if (rewind) { 977 blockBuffer.rewind(); 978 } 979 // Update the nextIndexedKey 980 this.nextIndexedKey = nextIndexedKey; 981 return blockSeek(key, seekBefore); 982 } 983 984 /** 985 * @return True if v <= 0 or v > current block buffer limit. 986 */ 987 protected final boolean checkKeyLen(final int v) { 988 return v <= 0 || v > this.blockBuffer.limit(); 989 } 990 991 /** 992 * @return True if v < 0 or v > current block buffer limit. 993 */ 994 protected final boolean checkLen(final int v) { 995 return v < 0 || v > this.blockBuffer.limit(); 996 } 997 998 /** 999 * Check key and value lengths are wholesome. 1000 */ 1001 protected final void checkKeyValueLen() { 1002 if (checkKeyLen(this.currKeyLen) || checkLen(this.currValueLen)) { 1003 throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen 1004 + " or currValueLen " + this.currValueLen + ". Block offset: " 1005 + this.curBlock.getOffset() + ", block length: " 1006 + this.blockBuffer.limit() + ", position: " + this.blockBuffer.position() 1007 + " (without header)." + ", path=" + reader.getPath()); 1008 } 1009 } 1010 1011 /** 1012 * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first 1013 * key/value pair. 1014 * @param newBlock the block read by {@link HFileReaderImpl#readBlock}, it's a totally new block 1015 * with new allocated {@link ByteBuff}, so if no further reference to this block, we 1016 * should release it carefully. 1017 */ 1018 protected void updateCurrentBlock(HFileBlock newBlock) throws IOException { 1019 try { 1020 if (newBlock.getBlockType() != BlockType.DATA) { 1021 throw new IllegalStateException( 1022 "ScannerV2 works only on data blocks, got " + newBlock.getBlockType() + "; " 1023 + "HFileName=" + reader.getPath() + ", " + "dataBlockEncoder=" 1024 + reader.getDataBlockEncoding() + ", " + "isCompaction=" + isCompaction); 1025 } 1026 updateCurrBlockRef(newBlock); 1027 blockBuffer = newBlock.getBufferWithoutHeader(); 1028 readKeyValueLen(); 1029 } finally { 1030 releaseIfNotCurBlock(newBlock); 1031 } 1032 // Reset the next indexed key 1033 this.nextIndexedKey = null; 1034 } 1035 1036 protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) { 1037 ByteBuff buffer = curBlock.getBufferWithoutHeader(); 1038 // It is safe to manipulate this buffer because we own the buffer object. 1039 buffer.rewind(); 1040 int klen = buffer.getInt(); 1041 buffer.skip(Bytes.SIZEOF_INT);// Skip value len part 1042 ByteBuffer keyBuff = buffer.asSubByteBuffer(klen); 1043 if (keyBuff.hasArray()) { 1044 return new KeyValue.KeyOnlyKeyValue(keyBuff.array(), keyBuff.arrayOffset() 1045 + keyBuff.position(), klen); 1046 } else { 1047 return new ByteBufferKeyOnlyKeyValue(keyBuff, keyBuff.position(), klen); 1048 } 1049 } 1050 1051 @Override 1052 public String getKeyString() { 1053 return CellUtil.toString(getKey(), false); 1054 } 1055 1056 @Override 1057 public String getValueString() { 1058 return ByteBufferUtils.toStringBinary(getValue()); 1059 } 1060 1061 public int compareKey(CellComparator comparator, Cell key) { 1062 blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, pair); 1063 this.bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), currKeyLen); 1064 return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, this.bufBackedKeyOnlyKv); 1065 } 1066 1067 @Override 1068 public void shipped() throws IOException { 1069 this.returnBlocks(false); 1070 } 1071 } 1072 1073 @Override 1074 public Path getPath() { 1075 return path; 1076 } 1077 1078 @Override 1079 public DataBlockEncoding getDataBlockEncoding() { 1080 return dataBlockEncoder.getDataBlockEncoding(); 1081 } 1082 1083 @Override 1084 public Configuration getConf() { 1085 return conf; 1086 } 1087 1088 @Override 1089 public void setConf(Configuration conf) { 1090 this.conf = conf; 1091 } 1092 1093 /** Minor versions in HFile starting with this number have hbase checksums */ 1094 public static final int MINOR_VERSION_WITH_CHECKSUM = 1; 1095 /** In HFile minor version that does not support checksums */ 1096 public static final int MINOR_VERSION_NO_CHECKSUM = 0; 1097 1098 /** HFile minor version that introduced pbuf filetrailer */ 1099 public static final int PBUF_TRAILER_MINOR_VERSION = 2; 1100 1101 /** 1102 * The size of a (key length, value length) tuple that prefixes each entry in 1103 * a data block. 1104 */ 1105 public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT; 1106 1107 /** 1108 * Retrieve block from cache. Validates the retrieved block's type vs {@code expectedBlockType} 1109 * and its encoding vs. {@code expectedDataBlockEncoding}. Unpacks the block as necessary. 1110 */ 1111 private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock, 1112 boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType, 1113 DataBlockEncoding expectedDataBlockEncoding) throws IOException { 1114 // Check cache for block. If found return. 1115 BlockCache cache = cacheConf.getBlockCache().orElse(null); 1116 if (cache != null) { 1117 HFileBlock cachedBlock = 1118 (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock, updateCacheMetrics); 1119 if (cachedBlock != null) { 1120 if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) { 1121 HFileBlock compressedBlock = cachedBlock; 1122 cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader); 1123 // In case of compressed block after unpacking we can release the compressed block 1124 if (compressedBlock != cachedBlock) { 1125 compressedBlock.release(); 1126 } 1127 } 1128 try { 1129 validateBlockType(cachedBlock, expectedBlockType); 1130 } catch (IOException e) { 1131 returnAndEvictBlock(cache, cacheKey, cachedBlock); 1132 throw e; 1133 } 1134 1135 if (expectedDataBlockEncoding == null) { 1136 return cachedBlock; 1137 } 1138 DataBlockEncoding actualDataBlockEncoding = cachedBlock.getDataBlockEncoding(); 1139 // Block types other than data blocks always have 1140 // DataBlockEncoding.NONE. To avoid false negative cache misses, only 1141 // perform this check if cached block is a data block. 1142 if (cachedBlock.getBlockType().isData() && 1143 !actualDataBlockEncoding.equals(expectedDataBlockEncoding)) { 1144 // This mismatch may happen if a Scanner, which is used for say a 1145 // compaction, tries to read an encoded block from the block cache. 1146 // The reverse might happen when an EncodedScanner tries to read 1147 // un-encoded blocks which were cached earlier. 1148 // 1149 // Because returning a data block with an implicit BlockType mismatch 1150 // will cause the requesting scanner to throw a disk read should be 1151 // forced here. This will potentially cause a significant number of 1152 // cache misses, so update so we should keep track of this as it might 1153 // justify the work on a CompoundScanner. 1154 if (!expectedDataBlockEncoding.equals(DataBlockEncoding.NONE) && 1155 !actualDataBlockEncoding.equals(DataBlockEncoding.NONE)) { 1156 // If the block is encoded but the encoding does not match the 1157 // expected encoding it is likely the encoding was changed but the 1158 // block was not yet evicted. Evictions on file close happen async 1159 // so blocks with the old encoding still linger in cache for some 1160 // period of time. This event should be rare as it only happens on 1161 // schema definition change. 1162 LOG.info("Evicting cached block with key {} because data block encoding mismatch; " + 1163 "expected {}, actual {}, path={}", cacheKey, actualDataBlockEncoding, 1164 expectedDataBlockEncoding, path); 1165 // This is an error scenario. so here we need to release the block. 1166 returnAndEvictBlock(cache, cacheKey, cachedBlock); 1167 } 1168 return null; 1169 } 1170 return cachedBlock; 1171 } 1172 } 1173 return null; 1174 } 1175 1176 private void returnAndEvictBlock(BlockCache cache, BlockCacheKey cacheKey, Cacheable block) { 1177 block.release(); 1178 cache.evictBlock(cacheKey); 1179 } 1180 1181 /** 1182 * @param cacheBlock Add block to cache, if found 1183 * @return block wrapped in a ByteBuffer, with header skipped 1184 */ 1185 @Override 1186 public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) 1187 throws IOException { 1188 if (trailer.getMetaIndexCount() == 0) { 1189 return null; // there are no meta blocks 1190 } 1191 if (metaBlockIndexReader == null) { 1192 throw new IOException(path + " meta index not loaded"); 1193 } 1194 1195 byte[] mbname = Bytes.toBytes(metaBlockName); 1196 int block = metaBlockIndexReader.rootBlockContainingKey(mbname, 1197 0, mbname.length); 1198 if (block == -1) { 1199 return null; 1200 } 1201 long blockSize = metaBlockIndexReader.getRootBlockDataSize(block); 1202 1203 // Per meta key from any given file, synchronize reads for said block. This 1204 // is OK to do for meta blocks because the meta block index is always 1205 // single-level. 1206 synchronized (metaBlockIndexReader.getRootBlockKey(block)) { 1207 // Check cache for block. If found return. 1208 long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block); 1209 BlockCacheKey cacheKey = 1210 new BlockCacheKey(name, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META); 1211 1212 cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory()); 1213 HFileBlock cachedBlock = 1214 getCachedBlock(cacheKey, cacheBlock, false, true, true, BlockType.META, null); 1215 if (cachedBlock != null) { 1216 assert cachedBlock.isUnpacked() : "Packed block leak."; 1217 // Return a distinct 'shallow copy' of the block, 1218 // so pos does not get messed by the scanner 1219 return cachedBlock; 1220 } 1221 // Cache Miss, please load. 1222 1223 HFileBlock compressedBlock = 1224 fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false, true); 1225 HFileBlock uncompressedBlock = compressedBlock.unpack(hfileContext, fsBlockReader); 1226 if (compressedBlock != uncompressedBlock) { 1227 compressedBlock.release(); 1228 } 1229 1230 // Cache the block 1231 if (cacheBlock) { 1232 cacheConf.getBlockCache().ifPresent( 1233 cache -> cache.cacheBlock(cacheKey, uncompressedBlock, cacheConf.isInMemory())); 1234 } 1235 return uncompressedBlock; 1236 } 1237 } 1238 1239 /** 1240 * If expected block is data block, we'll allocate the ByteBuff of block from 1241 * {@link org.apache.hadoop.hbase.io.ByteBuffAllocator} and it's usually an off-heap one, 1242 * otherwise it will allocate from heap. 1243 * @see org.apache.hadoop.hbase.io.hfile.HFileBlock.FSReader#readBlockData(long, long, boolean, 1244 * boolean, boolean) 1245 */ 1246 private boolean shouldUseHeap(BlockType expectedBlockType) { 1247 if (!cacheConf.getBlockCache().isPresent()) { 1248 return false; 1249 } else if (!cacheConf.isCombinedBlockCache()) { 1250 // Block to cache in LruBlockCache must be an heap one. So just allocate block memory from 1251 // heap for saving an extra off-heap to heap copying. 1252 return true; 1253 } 1254 return expectedBlockType != null && !expectedBlockType.isData(); 1255 } 1256 1257 @Override 1258 public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, 1259 final boolean cacheBlock, boolean pread, final boolean isCompaction, 1260 boolean updateCacheMetrics, BlockType expectedBlockType, 1261 DataBlockEncoding expectedDataBlockEncoding) 1262 throws IOException { 1263 if (dataBlockIndexReader == null) { 1264 throw new IOException(path + " block index not loaded"); 1265 } 1266 long trailerOffset = trailer.getLoadOnOpenDataOffset(); 1267 if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) { 1268 throw new IOException("Requested block is out of range: " + dataBlockOffset + 1269 ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() + 1270 ", trailer.getLoadOnOpenDataOffset: " + trailerOffset + 1271 ", path=" + path); 1272 } 1273 // For any given block from any given file, synchronize reads for said 1274 // block. 1275 // Without a cache, this synchronizing is needless overhead, but really 1276 // the other choice is to duplicate work (which the cache would prevent you 1277 // from doing). 1278 1279 BlockCacheKey cacheKey = new BlockCacheKey(name, dataBlockOffset, 1280 this.isPrimaryReplicaReader(), expectedBlockType); 1281 1282 boolean useLock = false; 1283 IdLock.Entry lockEntry = null; 1284 try (TraceScope traceScope = TraceUtil.createTrace("HFileReaderImpl.readBlock")) { 1285 while (true) { 1286 // Check cache for block. If found return. 1287 if (cacheConf.shouldReadBlockFromCache(expectedBlockType)) { 1288 if (useLock) { 1289 lockEntry = offsetLock.getLockEntry(dataBlockOffset); 1290 } 1291 // Try and get the block from the block cache. If the useLock variable is true then this 1292 // is the second time through the loop and it should not be counted as a block cache miss. 1293 HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, useLock, isCompaction, 1294 updateCacheMetrics, expectedBlockType, expectedDataBlockEncoding); 1295 if (cachedBlock != null) { 1296 if (LOG.isTraceEnabled()) { 1297 LOG.trace("From Cache " + cachedBlock); 1298 } 1299 TraceUtil.addTimelineAnnotation("blockCacheHit"); 1300 assert cachedBlock.isUnpacked() : "Packed block leak."; 1301 if (cachedBlock.getBlockType().isData()) { 1302 if (updateCacheMetrics) { 1303 HFile.DATABLOCK_READ_COUNT.increment(); 1304 } 1305 // Validate encoding type for data blocks. We include encoding 1306 // type in the cache key, and we expect it to match on a cache hit. 1307 if (cachedBlock.getDataBlockEncoding() != dataBlockEncoder.getDataBlockEncoding()) { 1308 // Remember to release the block when in exceptional path. 1309 cacheConf.getBlockCache().ifPresent(cache -> { 1310 returnAndEvictBlock(cache, cacheKey, cachedBlock); 1311 }); 1312 throw new IOException("Cached block under key " + cacheKey + " " 1313 + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: " 1314 + dataBlockEncoder.getDataBlockEncoding() + "), path=" + path); 1315 } 1316 } 1317 // Cache-hit. Return! 1318 return cachedBlock; 1319 } 1320 1321 if (!useLock && cacheBlock && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) { 1322 // check cache again with lock 1323 useLock = true; 1324 continue; 1325 } 1326 // Carry on, please load. 1327 } 1328 1329 TraceUtil.addTimelineAnnotation("blockCacheMiss"); 1330 // Load block from filesystem. 1331 HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread, 1332 !isCompaction, shouldUseHeap(expectedBlockType)); 1333 validateBlockType(hfileBlock, expectedBlockType); 1334 HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader); 1335 BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory(); 1336 1337 // Cache the block if necessary 1338 cacheConf.getBlockCache().ifPresent(cache -> { 1339 if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) { 1340 cache.cacheBlock(cacheKey, 1341 cacheConf.shouldCacheCompressed(category) ? hfileBlock : unpacked, 1342 cacheConf.isInMemory()); 1343 } 1344 }); 1345 if (unpacked != hfileBlock) { 1346 // End of life here if hfileBlock is an independent block. 1347 hfileBlock.release(); 1348 } 1349 if (updateCacheMetrics && hfileBlock.getBlockType().isData()) { 1350 HFile.DATABLOCK_READ_COUNT.increment(); 1351 } 1352 1353 return unpacked; 1354 } 1355 } finally { 1356 if (lockEntry != null) { 1357 offsetLock.releaseLockEntry(lockEntry); 1358 } 1359 } 1360 } 1361 1362 @Override 1363 public boolean hasMVCCInfo() { 1364 return fileInfo.shouldIncludeMemStoreTS() && fileInfo.isDecodeMemstoreTS(); 1365 } 1366 1367 /** 1368 * Compares the actual type of a block retrieved from cache or disk with its 1369 * expected type and throws an exception in case of a mismatch. Expected 1370 * block type of {@link BlockType#DATA} is considered to match the actual 1371 * block type [@link {@link BlockType#ENCODED_DATA} as well. 1372 * @param block a block retrieved from cache or disk 1373 * @param expectedBlockType the expected block type, or null to skip the 1374 * check 1375 */ 1376 private void validateBlockType(HFileBlock block, 1377 BlockType expectedBlockType) throws IOException { 1378 if (expectedBlockType == null) { 1379 return; 1380 } 1381 BlockType actualBlockType = block.getBlockType(); 1382 if (expectedBlockType.isData() && actualBlockType.isData()) { 1383 // We consider DATA to match ENCODED_DATA for the purpose of this 1384 // verification. 1385 return; 1386 } 1387 if (actualBlockType != expectedBlockType) { 1388 throw new IOException("Expected block type " + expectedBlockType + ", " + 1389 "but got " + actualBlockType + ": " + block + ", path=" + path); 1390 } 1391 } 1392 1393 /** 1394 * @return Last key as cell in the file. May be null if file has no entries. Note that 1395 * this is not the last row key, but it is the Cell representation of the last 1396 * key 1397 */ 1398 @Override 1399 public Optional<Cell> getLastKey() { 1400 return dataBlockIndexReader.isEmpty() ? Optional.empty() : 1401 Optional.of(fileInfo.getLastKeyCell()); 1402 } 1403 1404 /** 1405 * @return Midkey for this file. We work with block boundaries only so 1406 * returned midkey is an approximation only. 1407 */ 1408 @Override 1409 public Optional<Cell> midKey() throws IOException { 1410 return Optional.ofNullable(dataBlockIndexReader.midkey(this)); 1411 } 1412 1413 @Override 1414 public void close() throws IOException { 1415 close(cacheConf.shouldEvictOnClose()); 1416 } 1417 1418 @Override 1419 public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) { 1420 return dataBlockEncoder.getEffectiveEncodingInCache(isCompaction); 1421 } 1422 1423 /** For testing */ 1424 @Override 1425 public HFileBlock.FSReader getUncachedBlockReader() { 1426 return fsBlockReader; 1427 } 1428 1429 /** 1430 * Scanner that operates on encoded data blocks. 1431 */ 1432 protected static class EncodedScanner extends HFileScannerImpl { 1433 private final HFileBlockDecodingContext decodingCtx; 1434 private final DataBlockEncoder.EncodedSeeker seeker; 1435 private final DataBlockEncoder dataBlockEncoder; 1436 1437 public EncodedScanner(HFile.Reader reader, boolean cacheBlocks, 1438 boolean pread, boolean isCompaction, HFileContext meta) { 1439 super(reader, cacheBlocks, pread, isCompaction); 1440 DataBlockEncoding encoding = reader.getDataBlockEncoding(); 1441 dataBlockEncoder = encoding.getEncoder(); 1442 decodingCtx = dataBlockEncoder.newDataBlockDecodingContext(meta); 1443 seeker = dataBlockEncoder.createSeeker(decodingCtx); 1444 } 1445 1446 @Override 1447 public boolean isSeeked(){ 1448 return curBlock != null; 1449 } 1450 1451 @Override 1452 public void setNonSeekedState() { 1453 reset(); 1454 } 1455 1456 /** 1457 * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first 1458 * key/value pair. 1459 * @param newBlock the block to make current, and read by {@link HFileReaderImpl#readBlock}, 1460 * it's a totally new block with new allocated {@link ByteBuff}, so if no further 1461 * reference to this block, we should release it carefully. 1462 */ 1463 @Override 1464 protected void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException { 1465 try { 1466 // sanity checks 1467 if (newBlock.getBlockType() != BlockType.ENCODED_DATA) { 1468 throw new IllegalStateException("EncodedScanner works only on encoded data blocks"); 1469 } 1470 short dataBlockEncoderId = newBlock.getDataBlockEncodingId(); 1471 if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) { 1472 String encoderCls = dataBlockEncoder.getClass().getName(); 1473 throw new CorruptHFileException("Encoder " + encoderCls + 1474 " doesn't support data block encoding " + 1475 DataBlockEncoding.getNameFromId(dataBlockEncoderId) + ",path=" + reader.getPath()); 1476 } 1477 updateCurrBlockRef(newBlock); 1478 ByteBuff encodedBuffer = getEncodedBuffer(newBlock); 1479 seeker.setCurrentBuffer(encodedBuffer); 1480 } finally { 1481 releaseIfNotCurBlock(newBlock); 1482 } 1483 // Reset the next indexed key 1484 this.nextIndexedKey = null; 1485 } 1486 1487 private ByteBuff getEncodedBuffer(HFileBlock newBlock) { 1488 ByteBuff origBlock = newBlock.getBufferReadOnly(); 1489 int pos = newBlock.headerSize() + DataBlockEncoding.ID_SIZE; 1490 origBlock.position(pos); 1491 origBlock 1492 .limit(pos + newBlock.getUncompressedSizeWithoutHeader() - DataBlockEncoding.ID_SIZE); 1493 return origBlock.slice(); 1494 } 1495 1496 @Override 1497 protected boolean processFirstDataBlock() throws IOException { 1498 seeker.rewind(); 1499 return true; 1500 } 1501 1502 @Override 1503 public boolean next() throws IOException { 1504 boolean isValid = seeker.next(); 1505 if (!isValid) { 1506 HFileBlock newBlock = readNextDataBlock(); 1507 isValid = newBlock != null; 1508 if (isValid) { 1509 updateCurrentBlock(newBlock); 1510 } else { 1511 setNonSeekedState(); 1512 } 1513 } 1514 return isValid; 1515 } 1516 1517 @Override 1518 public Cell getKey() { 1519 assertValidSeek(); 1520 return seeker.getKey(); 1521 } 1522 1523 @Override 1524 public ByteBuffer getValue() { 1525 assertValidSeek(); 1526 return seeker.getValueShallowCopy(); 1527 } 1528 1529 @Override 1530 public Cell getCell() { 1531 if (this.curBlock == null) { 1532 return null; 1533 } 1534 return seeker.getCell(); 1535 } 1536 1537 @Override 1538 public String getKeyString() { 1539 return CellUtil.toString(getKey(), true); 1540 } 1541 1542 @Override 1543 public String getValueString() { 1544 ByteBuffer valueBuffer = getValue(); 1545 return ByteBufferUtils.toStringBinary(valueBuffer); 1546 } 1547 1548 private void assertValidSeek() { 1549 if (this.curBlock == null) { 1550 throw new NotSeekedException(reader.getPath()); 1551 } 1552 } 1553 1554 @Override 1555 protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) { 1556 return dataBlockEncoder.getFirstKeyCellInBlock(getEncodedBuffer(curBlock)); 1557 } 1558 1559 @Override 1560 protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, 1561 boolean rewind, Cell key, boolean seekBefore) throws IOException { 1562 if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) { 1563 updateCurrentBlock(seekToBlock); 1564 } else if (rewind) { 1565 seeker.rewind(); 1566 } 1567 this.nextIndexedKey = nextIndexedKey; 1568 return seeker.seekToKeyInBlock(key, seekBefore); 1569 } 1570 1571 @Override 1572 public int compareKey(CellComparator comparator, Cell key) { 1573 return seeker.compareKey(comparator, key); 1574 } 1575 } 1576 1577 /** 1578 * Returns a buffer with the Bloom filter metadata. The caller takes 1579 * ownership of the buffer. 1580 */ 1581 @Override 1582 public DataInput getGeneralBloomFilterMetadata() throws IOException { 1583 return this.getBloomFilterMetadata(BlockType.GENERAL_BLOOM_META); 1584 } 1585 1586 @Override 1587 public DataInput getDeleteBloomFilterMetadata() throws IOException { 1588 return this.getBloomFilterMetadata(BlockType.DELETE_FAMILY_BLOOM_META); 1589 } 1590 1591 private DataInput getBloomFilterMetadata(BlockType blockType) 1592 throws IOException { 1593 if (blockType != BlockType.GENERAL_BLOOM_META && 1594 blockType != BlockType.DELETE_FAMILY_BLOOM_META) { 1595 throw new RuntimeException("Block Type: " + blockType.toString() + 1596 " is not supported, path=" + path) ; 1597 } 1598 1599 for (HFileBlock b : fileInfo.getLoadOnOpenBlocks()) { 1600 if (b.getBlockType() == blockType) { 1601 return b.getByteStream(); 1602 } 1603 } 1604 return null; 1605 } 1606 1607 public boolean isFileInfoLoaded() { 1608 return true; // We load file info in constructor in version 2. 1609 } 1610 1611 @Override 1612 public HFileContext getFileContext() { 1613 return hfileContext; 1614 } 1615 1616 /** 1617 * Returns false if block prefetching was requested for this file and has 1618 * not completed, true otherwise 1619 */ 1620 @Override 1621 @VisibleForTesting 1622 public boolean prefetchComplete() { 1623 return PrefetchExecutor.isCompleted(path); 1624 } 1625 1626 /** 1627 * Create a Scanner on this file. No seeks or reads are done on creation. Call 1628 * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is 1629 * nothing to clean up in a Scanner. Letting go of your references to the 1630 * scanner is sufficient. NOTE: Do not use this overload of getScanner for 1631 * compactions. See {@link #getScanner(boolean, boolean, boolean)} 1632 * 1633 * @param cacheBlocks True if we should cache blocks read in by this scanner. 1634 * @param pread Use positional read rather than seek+read if true (pread is 1635 * better for random reads, seek+read is better scanning). 1636 * @return Scanner on this file. 1637 */ 1638 @Override 1639 @VisibleForTesting 1640 public HFileScanner getScanner(boolean cacheBlocks, final boolean pread) { 1641 return getScanner(cacheBlocks, pread, false); 1642 } 1643 1644 /** 1645 * Create a Scanner on this file. No seeks or reads are done on creation. Call 1646 * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is 1647 * nothing to clean up in a Scanner. Letting go of your references to the 1648 * scanner is sufficient. 1649 * @param cacheBlocks 1650 * True if we should cache blocks read in by this scanner. 1651 * @param pread 1652 * Use positional read rather than seek+read if true (pread is better 1653 * for random reads, seek+read is better scanning). 1654 * @param isCompaction 1655 * is scanner being used for a compaction? 1656 * @return Scanner on this file. 1657 */ 1658 @Override 1659 public HFileScanner getScanner(boolean cacheBlocks, final boolean pread, 1660 final boolean isCompaction) { 1661 if (dataBlockEncoder.useEncodedScanner()) { 1662 return new EncodedScanner(this, cacheBlocks, pread, isCompaction, this.hfileContext); 1663 } 1664 return new HFileScannerImpl(this, cacheBlocks, pread, isCompaction); 1665 } 1666 1667 public int getMajorVersion() { 1668 return 3; 1669 } 1670 1671 @Override 1672 public void unbufferStream() { 1673 fsBlockReader.unbufferStream(); 1674 } 1675}