001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import java.io.DataInput; 021import java.io.IOException; 022import java.nio.ByteBuffer; 023import java.security.Key; 024import java.util.ArrayList; 025import java.util.List; 026import java.util.Optional; 027import java.util.concurrent.atomic.AtomicInteger; 028 029import org.apache.hadoop.conf.Configurable; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue; 033import org.apache.hadoop.hbase.Cell; 034import org.apache.hadoop.hbase.CellComparator; 035import org.apache.hadoop.hbase.CellUtil; 036import org.apache.hadoop.hbase.HConstants; 037import org.apache.hadoop.hbase.PrivateCellUtil; 038import org.apache.hadoop.hbase.KeyValue; 039import org.apache.hadoop.hbase.ByteBufferKeyValue; 040import org.apache.hadoop.hbase.SizeCachedKeyValue; 041import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue; 042import org.apache.hadoop.hbase.trace.TraceUtil; 043import org.apache.yetus.audience.InterfaceAudience; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046import org.apache.hadoop.hbase.fs.HFileSystem; 047import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 048import org.apache.hadoop.hbase.io.compress.Compression; 049import org.apache.hadoop.hbase.io.crypto.Cipher; 050import org.apache.hadoop.hbase.io.crypto.Encryption; 051import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; 052import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 053import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; 054import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; 055import org.apache.hadoop.hbase.nio.ByteBuff; 056import org.apache.hadoop.hbase.regionserver.KeyValueScanner; 057import org.apache.hadoop.hbase.security.EncryptionUtil; 058import org.apache.hadoop.hbase.util.ByteBufferUtils; 059import org.apache.hadoop.hbase.util.Bytes; 060import org.apache.hadoop.hbase.util.IdLock; 061import org.apache.hadoop.hbase.util.ObjectIntPair; 062import org.apache.hadoop.io.WritableUtils; 063import org.apache.htrace.core.TraceScope; 064 065import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 066 067/** 068 * Implementation that can handle all hfile versions of {@link HFile.Reader}. 069 */ 070@InterfaceAudience.Private 071@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD") 072public class HFileReaderImpl implements HFile.Reader, Configurable { 073 // This class is HFileReaderV3 + HFileReaderV2 + AbstractHFileReader all squashed together into 074 // one file. Ditto for all the HFileReader.ScannerV? implementations. I was running up against 075 // the MaxInlineLevel limit because too many tiers involved reading from an hfile. Was also hard 076 // to navigate the source code when so many classes participating in read. 077 private static final Logger LOG = LoggerFactory.getLogger(HFileReaderImpl.class); 078 079 /** Data block index reader keeping the root data index in memory */ 080 private HFileBlockIndex.CellBasedKeyBlockIndexReader dataBlockIndexReader; 081 082 /** Meta block index reader -- always single level */ 083 private HFileBlockIndex.ByteArrayKeyBlockIndexReader metaBlockIndexReader; 084 085 private final FixedFileTrailer trailer; 086 087 /** Filled when we read in the trailer. */ 088 private final Compression.Algorithm compressAlgo; 089 090 private final boolean primaryReplicaReader; 091 092 /** 093 * What kind of data block encoding should be used while reading, writing, 094 * and handling cache. 095 */ 096 private HFileDataBlockEncoder dataBlockEncoder = NoOpDataBlockEncoder.INSTANCE; 097 098 /** Last key in the file. Filled in when we read in the file info */ 099 private Cell lastKeyCell = null; 100 101 /** Average key length read from file info */ 102 private int avgKeyLen = -1; 103 104 /** Average value length read from file info */ 105 private int avgValueLen = -1; 106 107 /** Key comparator */ 108 private CellComparator comparator = CellComparator.getInstance(); 109 110 /** Size of this file. */ 111 private final long fileSize; 112 113 /** Block cache configuration. */ 114 private final CacheConfig cacheConf; 115 116 /** Path of file */ 117 private final Path path; 118 119 /** File name to be used for block names */ 120 private final String name; 121 122 private FileInfo fileInfo; 123 124 private Configuration conf; 125 126 private HFileContext hfileContext; 127 128 /** Filesystem-level block reader. */ 129 private HFileBlock.FSReader fsBlockReader; 130 131 /** 132 * A "sparse lock" implementation allowing to lock on a particular block 133 * identified by offset. The purpose of this is to avoid two clients loading 134 * the same block, and have all but one client wait to get the block from the 135 * cache. 136 */ 137 private IdLock offsetLock = new IdLock(); 138 139 /** 140 * Blocks read from the load-on-open section, excluding data root index, meta 141 * index, and file info. 142 */ 143 private List<HFileBlock> loadOnOpenBlocks = new ArrayList<>(); 144 145 /** Minimum minor version supported by this HFile format */ 146 static final int MIN_MINOR_VERSION = 0; 147 148 /** Maximum minor version supported by this HFile format */ 149 // We went to version 2 when we moved to pb'ing fileinfo and the trailer on 150 // the file. This version can read Writables version 1. 151 static final int MAX_MINOR_VERSION = 3; 152 153 /** 154 * We can read files whose major version is v2 IFF their minor version is at least 3. 155 */ 156 private static final int MIN_V2_MINOR_VERSION_WITH_PB = 3; 157 158 /** Minor versions starting with this number have faked index key */ 159 static final int MINOR_VERSION_WITH_FAKED_KEY = 3; 160 161 @VisibleForTesting 162 @Deprecated 163 public HFileReaderImpl(Path path, FixedFileTrailer trailer, FSDataInputStreamWrapper fsdis, 164 long fileSize, CacheConfig cacheConf, HFileSystem hfs, Configuration conf) 165 throws IOException { 166 this(path, trailer, fsdis, fileSize, cacheConf, hfs, true, conf); 167 } 168 169 /** 170 * Opens a HFile. You must load the index before you can use it by calling 171 * {@link #loadFileInfo()}. 172 * @param path 173 * Path to HFile. 174 * @param trailer 175 * File trailer. 176 * @param fsdis 177 * input stream. 178 * @param fileSize 179 * Length of the stream. 180 * @param cacheConf 181 * Cache configuration. 182 * @param hfs 183 * The file system. 184 * @param conf 185 * Configuration 186 */ 187 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD") 188 public HFileReaderImpl(Path path, FixedFileTrailer trailer, FSDataInputStreamWrapper fsdis, 189 long fileSize, CacheConfig cacheConf, HFileSystem hfs, boolean primaryReplicaReader, 190 Configuration conf) throws IOException { 191 this.trailer = trailer; 192 this.compressAlgo = trailer.getCompressionCodec(); 193 this.cacheConf = cacheConf; 194 this.fileSize = fileSize; 195 this.path = path; 196 this.name = path.getName(); 197 this.conf = conf; 198 this.primaryReplicaReader = primaryReplicaReader; 199 checkFileVersion(); 200 this.hfileContext = createHFileContext(fsdis, fileSize, hfs, path, trailer); 201 this.fsBlockReader = new HFileBlock.FSReaderImpl(fsdis, fileSize, hfs, path, hfileContext); 202 203 // Comparator class name is stored in the trailer in version 2. 204 comparator = trailer.createComparator(); 205 dataBlockIndexReader = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, 206 trailer.getNumDataIndexLevels(), this); 207 metaBlockIndexReader = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1); 208 209 // Parse load-on-open data. 210 211 HFileBlock.BlockIterator blockIter = fsBlockReader.blockRange( 212 trailer.getLoadOnOpenDataOffset(), 213 fileSize - trailer.getTrailerSize()); 214 215 // Data index. We also read statistics about the block index written after 216 // the root level. 217 dataBlockIndexReader.readMultiLevelIndexRoot( 218 blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), 219 trailer.getDataIndexCount()); 220 221 // Meta index. 222 metaBlockIndexReader.readRootIndex( 223 blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), 224 trailer.getMetaIndexCount()); 225 226 // File info 227 fileInfo = new FileInfo(); 228 fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream()); 229 byte[] creationTimeBytes = fileInfo.get(FileInfo.CREATE_TIME_TS); 230 this.hfileContext.setFileCreateTime(creationTimeBytes == null? 0: 231 Bytes.toLong(creationTimeBytes)); 232 if (fileInfo.get(FileInfo.LASTKEY) != null) { 233 lastKeyCell = new KeyValue.KeyOnlyKeyValue(fileInfo.get(FileInfo.LASTKEY)); 234 } 235 avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN)); 236 avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN)); 237 byte [] keyValueFormatVersion = fileInfo.get(HFileWriterImpl.KEY_VALUE_VERSION); 238 includesMemstoreTS = keyValueFormatVersion != null && 239 Bytes.toInt(keyValueFormatVersion) == HFileWriterImpl.KEY_VALUE_VER_WITH_MEMSTORE; 240 fsBlockReader.setIncludesMemStoreTS(includesMemstoreTS); 241 if (includesMemstoreTS) { 242 decodeMemstoreTS = Bytes.toLong(fileInfo.get(HFileWriterImpl.MAX_MEMSTORE_TS_KEY)) > 0; 243 } 244 245 // Read data block encoding algorithm name from file info. 246 dataBlockEncoder = HFileDataBlockEncoderImpl.createFromFileInfo(fileInfo); 247 fsBlockReader.setDataBlockEncoder(dataBlockEncoder); 248 249 // Store all other load-on-open blocks for further consumption. 250 HFileBlock b; 251 while ((b = blockIter.nextBlock()) != null) { 252 loadOnOpenBlocks.add(b); 253 } 254 255 // Prefetch file blocks upon open if requested 256 if (cacheConf.shouldPrefetchOnOpen()) { 257 PrefetchExecutor.request(path, new Runnable() { 258 @Override 259 public void run() { 260 long offset = 0; 261 long end = 0; 262 try { 263 end = getTrailer().getLoadOnOpenDataOffset(); 264 if (LOG.isTraceEnabled()) { 265 LOG.trace("Prefetch start " + getPathOffsetEndStr(path, offset, end)); 266 } 267 // TODO: Could we use block iterator in here? Would that get stuff into the cache? 268 HFileBlock prevBlock = null; 269 while (offset < end) { 270 if (Thread.interrupted()) { 271 break; 272 } 273 // Perhaps we got our block from cache? Unlikely as this may be, if it happens, then 274 // the internal-to-hfileblock thread local which holds the overread that gets the 275 // next header, will not have happened...so, pass in the onDiskSize gotten from the 276 // cached block. This 'optimization' triggers extremely rarely I'd say. 277 long onDiskSize = prevBlock != null? prevBlock.getNextBlockOnDiskSize(): -1; 278 HFileBlock block = readBlock(offset, onDiskSize, /*cacheBlock=*/true, 279 /*pread=*/true, false, false, null, null); 280 // Need not update the current block. Ideally here the readBlock won't find the 281 // block in cache. We call this readBlock so that block data is read from FS and 282 // cached in BC. So there is no reference count increment that happens here. 283 // The return will ideally be a noop because the block is not of MemoryType SHARED. 284 returnBlock(block); 285 prevBlock = block; 286 offset += block.getOnDiskSizeWithHeader(); 287 } 288 } catch (IOException e) { 289 // IOExceptions are probably due to region closes (relocation, etc.) 290 if (LOG.isTraceEnabled()) { 291 LOG.trace("Prefetch " + getPathOffsetEndStr(path, offset, end), e); 292 } 293 } catch (NullPointerException e) { 294 LOG.warn("Stream moved/closed or prefetch cancelled?" + 295 getPathOffsetEndStr(path, offset, end), e); 296 } catch (Exception e) { 297 // Other exceptions are interesting 298 LOG.warn("Prefetch " + getPathOffsetEndStr(path, offset, end), e); 299 } finally { 300 PrefetchExecutor.complete(path); 301 } 302 } 303 }); 304 } 305 306 byte[] tmp = fileInfo.get(FileInfo.MAX_TAGS_LEN); 307 // max tag length is not present in the HFile means tags were not at all written to file. 308 if (tmp != null) { 309 hfileContext.setIncludesTags(true); 310 tmp = fileInfo.get(FileInfo.TAGS_COMPRESSED); 311 if (tmp != null && Bytes.toBoolean(tmp)) { 312 hfileContext.setCompressTags(true); 313 } 314 } 315 } 316 317 private static String getPathOffsetEndStr(final Path path, final long offset, final long end) { 318 return "path=" + path.toString() + ", offset=" + offset + ", end=" + end; 319 } 320 321 /** 322 * File version check is a little sloppy. We read v3 files but can also read v2 files if their 323 * content has been pb'd; files written with 0.98. 324 */ 325 private void checkFileVersion() { 326 int majorVersion = trailer.getMajorVersion(); 327 if (majorVersion == getMajorVersion()) return; 328 int minorVersion = trailer.getMinorVersion(); 329 if (majorVersion == 2 && minorVersion >= MIN_V2_MINOR_VERSION_WITH_PB) return; 330 // We can read v3 or v2 versions of hfile. 331 throw new IllegalArgumentException("Invalid HFile version: major=" + 332 trailer.getMajorVersion() + ", minor=" + trailer.getMinorVersion() + ": expected at least " + 333 "major=2 and minor=" + MAX_MINOR_VERSION + ", path=" + path); 334 } 335 336 @SuppressWarnings("serial") 337 public static class BlockIndexNotLoadedException extends IllegalStateException { 338 public BlockIndexNotLoadedException(Path path) { 339 // Add a message in case anyone relies on it as opposed to class name. 340 super(path + " block index not loaded"); 341 } 342 } 343 344 private Optional<String> toStringFirstKey() { 345 return getFirstKey().map(CellUtil::getCellKeyAsString); 346 } 347 348 private Optional<String> toStringLastKey() { 349 return getLastKey().map(CellUtil::getCellKeyAsString); 350 } 351 352 @Override 353 public String toString() { 354 return "reader=" + path.toString() + 355 (!isFileInfoLoaded()? "": 356 ", compression=" + compressAlgo.getName() + 357 ", cacheConf=" + cacheConf + 358 ", firstKey=" + toStringFirstKey() + 359 ", lastKey=" + toStringLastKey()) + 360 ", avgKeyLen=" + avgKeyLen + 361 ", avgValueLen=" + avgValueLen + 362 ", entries=" + trailer.getEntryCount() + 363 ", length=" + fileSize; 364 } 365 366 @Override 367 public long length() { 368 return fileSize; 369 } 370 371 @Override 372 public void returnBlock(HFileBlock block) { 373 BlockCache blockCache = this.cacheConf.getBlockCache(); 374 if (blockCache != null && block != null) { 375 BlockCacheKey cacheKey = new BlockCacheKey(this.getFileContext().getHFileName(), 376 block.getOffset(), this.isPrimaryReplicaReader(), block.getBlockType()); 377 blockCache.returnBlock(cacheKey, block); 378 } 379 } 380 /** 381 * @return the first key in the file. May be null if file has no entries. Note 382 * that this is not the first row key, but rather the byte form of the 383 * first KeyValue. 384 */ 385 @Override 386 public Optional<Cell> getFirstKey() { 387 if (dataBlockIndexReader == null) { 388 throw new BlockIndexNotLoadedException(path); 389 } 390 return dataBlockIndexReader.isEmpty() ? Optional.empty() 391 : Optional.of(dataBlockIndexReader.getRootBlockKey(0)); 392 } 393 394 /** 395 * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's 396 * patch goes in to eliminate {@link KeyValue} here. 397 * 398 * @return the first row key, or null if the file is empty. 399 */ 400 @Override 401 public Optional<byte[]> getFirstRowKey() { 402 // We have to copy the row part to form the row key alone 403 return getFirstKey().map(CellUtil::cloneRow); 404 } 405 406 /** 407 * TODO left from {@link HFile} version 1: move this to StoreFile after 408 * Ryan's patch goes in to eliminate {@link KeyValue} here. 409 * 410 * @return the last row key, or null if the file is empty. 411 */ 412 @Override 413 public Optional<byte[]> getLastRowKey() { 414 // We have to copy the row part to form the row key alone 415 return getLastKey().map(CellUtil::cloneRow); 416 } 417 418 /** @return number of KV entries in this HFile */ 419 @Override 420 public long getEntries() { 421 return trailer.getEntryCount(); 422 } 423 424 /** @return comparator */ 425 @Override 426 public CellComparator getComparator() { 427 return comparator; 428 } 429 430 /** @return compression algorithm */ 431 @Override 432 public Compression.Algorithm getCompressionAlgorithm() { 433 return compressAlgo; 434 } 435 436 /** 437 * @return the total heap size of data and meta block indexes in bytes. Does 438 * not take into account non-root blocks of a multilevel data index. 439 */ 440 @Override 441 public long indexSize() { 442 return (dataBlockIndexReader != null ? dataBlockIndexReader.heapSize() : 0) 443 + ((metaBlockIndexReader != null) ? metaBlockIndexReader.heapSize() 444 : 0); 445 } 446 447 @Override 448 public String getName() { 449 return name; 450 } 451 452 @Override 453 public HFileBlockIndex.BlockIndexReader getDataBlockIndexReader() { 454 return dataBlockIndexReader; 455 } 456 457 @Override 458 public FixedFileTrailer getTrailer() { 459 return trailer; 460 } 461 462 @Override 463 public boolean isPrimaryReplicaReader() { 464 return primaryReplicaReader; 465 } 466 467 @Override 468 public FileInfo loadFileInfo() throws IOException { 469 return fileInfo; 470 } 471 472 /** 473 * An exception thrown when an operation requiring a scanner to be seeked 474 * is invoked on a scanner that is not seeked. 475 */ 476 @SuppressWarnings("serial") 477 public static class NotSeekedException extends IllegalStateException { 478 public NotSeekedException(Path path) { 479 super(path + " not seeked to a key/value"); 480 } 481 } 482 483 protected static class HFileScannerImpl implements HFileScanner { 484 private ByteBuff blockBuffer; 485 protected final boolean cacheBlocks; 486 protected final boolean pread; 487 protected final boolean isCompaction; 488 private int currKeyLen; 489 private int currValueLen; 490 private int currMemstoreTSLen; 491 private long currMemstoreTS; 492 // Updated but never read? 493 protected AtomicInteger blockFetches = new AtomicInteger(0); 494 protected final HFile.Reader reader; 495 private int currTagsLen; 496 // buffer backed keyonlyKV 497 private ByteBufferKeyOnlyKeyValue bufBackedKeyOnlyKv = new ByteBufferKeyOnlyKeyValue(); 498 // A pair for reusing in blockSeek() so that we don't garbage lot of objects 499 final ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>(); 500 501 /** 502 * The next indexed key is to keep track of the indexed key of the next data block. 503 * If the nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the 504 * current data block is the last data block. 505 * 506 * If the nextIndexedKey is null, it means the nextIndexedKey has not been loaded yet. 507 */ 508 protected Cell nextIndexedKey; 509 // Current block being used 510 protected HFileBlock curBlock; 511 // Previous blocks that were used in the course of the read 512 protected final ArrayList<HFileBlock> prevBlocks = new ArrayList<>(); 513 514 public HFileScannerImpl(final HFile.Reader reader, final boolean cacheBlocks, 515 final boolean pread, final boolean isCompaction) { 516 this.reader = reader; 517 this.cacheBlocks = cacheBlocks; 518 this.pread = pread; 519 this.isCompaction = isCompaction; 520 } 521 522 void updateCurrBlockRef(HFileBlock block) { 523 if (block != null && this.curBlock != null && 524 block.getOffset() == this.curBlock.getOffset()) { 525 return; 526 } 527 // We don't have to keep ref to EXCLUSIVE type of block 528 if (this.curBlock != null && this.curBlock.usesSharedMemory()) { 529 prevBlocks.add(this.curBlock); 530 } 531 this.curBlock = block; 532 } 533 534 void reset() { 535 // We don't have to keep ref to EXCLUSIVE type of block 536 if (this.curBlock != null && this.curBlock.usesSharedMemory()) { 537 this.prevBlocks.add(this.curBlock); 538 } 539 this.curBlock = null; 540 } 541 542 private void returnBlockToCache(HFileBlock block) { 543 if (LOG.isTraceEnabled()) { 544 LOG.trace("Returning the block : " + block); 545 } 546 this.reader.returnBlock(block); 547 } 548 549 private void returnBlocks(boolean returnAll) { 550 for (int i = 0; i < this.prevBlocks.size(); i++) { 551 returnBlockToCache(this.prevBlocks.get(i)); 552 } 553 this.prevBlocks.clear(); 554 if (returnAll && this.curBlock != null) { 555 returnBlockToCache(this.curBlock); 556 this.curBlock = null; 557 } 558 } 559 @Override 560 public boolean isSeeked(){ 561 return blockBuffer != null; 562 } 563 564 @Override 565 public String toString() { 566 return "HFileScanner for reader " + String.valueOf(getReader()); 567 } 568 569 protected void assertSeeked() { 570 if (!isSeeked()) 571 throw new NotSeekedException(reader.getPath()); 572 } 573 574 @Override 575 public HFile.Reader getReader() { 576 return reader; 577 } 578 579 // From non encoded HFiles, we always read back KeyValue or its descendant.(Note: When HFile 580 // block is in DBB, it will be OffheapKV). So all parts of the Cell is in a contiguous 581 // array/buffer. How many bytes we should wrap to make the KV is what this method returns. 582 private int getKVBufSize() { 583 int kvBufSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen; 584 if (currTagsLen > 0) { 585 kvBufSize += Bytes.SIZEOF_SHORT + currTagsLen; 586 } 587 return kvBufSize; 588 } 589 590 @Override 591 public void close() { 592 if (!pread) { 593 // For seek + pread stream socket should be closed when the scanner is closed. HBASE-9393 594 reader.unbufferStream(); 595 } 596 this.returnBlocks(true); 597 } 598 599 // Returns the #bytes in HFile for the current cell. Used to skip these many bytes in current 600 // HFile block's buffer so as to position to the next cell. 601 private int getCurCellSerializedSize() { 602 int curCellSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen 603 + currMemstoreTSLen; 604 if (this.reader.getFileContext().isIncludesTags()) { 605 curCellSize += Bytes.SIZEOF_SHORT + currTagsLen; 606 } 607 return curCellSize; 608 } 609 610 protected void readKeyValueLen() { 611 // This is a hot method. We go out of our way to make this method short so it can be 612 // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves 613 // because it is faster than going via range-checked ByteBuffer methods or going through a 614 // byte buffer array a byte at a time. 615 // Get a long at a time rather than read two individual ints. In micro-benchmarking, even 616 // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints. 617 // Trying to imitate what was done - need to profile if this is better or 618 // earlier way is better by doing mark and reset? 619 // But ensure that you read long instead of two ints 620 long ll = blockBuffer.getLongAfterPosition(0); 621 // Read top half as an int of key length and bottom int as value length 622 this.currKeyLen = (int)(ll >> Integer.SIZE); 623 this.currValueLen = (int)(Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll); 624 checkKeyValueLen(); 625 // Move position past the key and value lengths and then beyond the key and value 626 int p = (Bytes.SIZEOF_LONG + currKeyLen + currValueLen); 627 if (reader.getFileContext().isIncludesTags()) { 628 // Tags length is a short. 629 this.currTagsLen = blockBuffer.getShortAfterPosition(p); 630 checkTagsLen(); 631 p += (Bytes.SIZEOF_SHORT + currTagsLen); 632 } 633 readMvccVersion(p); 634 } 635 636 private final void checkTagsLen() { 637 if (checkLen(this.currTagsLen)) { 638 throw new IllegalStateException("Invalid currTagsLen " + this.currTagsLen + 639 ". Block offset: " + curBlock.getOffset() + ", block length: " + 640 this.blockBuffer.limit() + 641 ", position: " + this.blockBuffer.position() + " (without header)." + 642 " path=" + reader.getPath()); 643 } 644 } 645 646 /** 647 * Read mvcc. Does checks to see if we even need to read the mvcc at all. 648 * @param offsetFromPos 649 */ 650 protected void readMvccVersion(final int offsetFromPos) { 651 // See if we even need to decode mvcc. 652 if (!this.reader.shouldIncludeMemStoreTS()) return; 653 if (!this.reader.isDecodeMemStoreTS()) { 654 currMemstoreTS = 0; 655 currMemstoreTSLen = 1; 656 return; 657 } 658 _readMvccVersion(offsetFromPos); 659 } 660 661 /** 662 * Actually do the mvcc read. Does no checks. 663 * @param offsetFromPos 664 */ 665 private void _readMvccVersion(int offsetFromPos) { 666 // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e. 667 // previous if one-byte vint, we'd redo the vint call to find int size. 668 // Also the method is kept small so can be inlined. 669 byte firstByte = blockBuffer.getByteAfterPosition(offsetFromPos); 670 int len = WritableUtils.decodeVIntSize(firstByte); 671 if (len == 1) { 672 this.currMemstoreTS = firstByte; 673 } else { 674 int remaining = len -1; 675 long i = 0; 676 offsetFromPos++; 677 if (remaining >= Bytes.SIZEOF_INT) { 678 // The int read has to be converted to unsigned long so the & op 679 i = (blockBuffer.getIntAfterPosition(offsetFromPos) & 0x00000000ffffffffL); 680 remaining -= Bytes.SIZEOF_INT; 681 offsetFromPos += Bytes.SIZEOF_INT; 682 } 683 if (remaining >= Bytes.SIZEOF_SHORT) { 684 short s = blockBuffer.getShortAfterPosition(offsetFromPos); 685 i = i << 16; 686 i = i | (s & 0xFFFF); 687 remaining -= Bytes.SIZEOF_SHORT; 688 offsetFromPos += Bytes.SIZEOF_SHORT; 689 } 690 for (int idx = 0; idx < remaining; idx++) { 691 byte b = blockBuffer.getByteAfterPosition(offsetFromPos + idx); 692 i = i << 8; 693 i = i | (b & 0xFF); 694 } 695 currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); 696 } 697 this.currMemstoreTSLen = len; 698 } 699 700 /** 701 * Within a loaded block, seek looking for the last key that is smaller than 702 * (or equal to?) the key we are interested in. 703 * A note on the seekBefore: if you have seekBefore = true, AND the first 704 * key in the block = key, then you'll get thrown exceptions. The caller has 705 * to check for that case and load the previous block as appropriate. 706 * @param key 707 * the key to find 708 * @param seekBefore 709 * find the key before the given key in case of exact match. 710 * @return 0 in case of an exact key match, 1 in case of an inexact match, 711 * -2 in case of an inexact match and furthermore, the input key 712 * less than the first key of current block(e.g. using a faked index 713 * key) 714 */ 715 protected int blockSeek(Cell key, boolean seekBefore) { 716 int klen, vlen, tlen = 0; 717 int lastKeyValueSize = -1; 718 int offsetFromPos; 719 do { 720 offsetFromPos = 0; 721 // Better to ensure that we use the BB Utils here 722 long ll = blockBuffer.getLongAfterPosition(offsetFromPos); 723 klen = (int)(ll >> Integer.SIZE); 724 vlen = (int)(Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll); 725 if (checkKeyLen(klen) || checkLen(vlen)) { 726 throw new IllegalStateException("Invalid klen " + klen + " or vlen " 727 + vlen + ". Block offset: " 728 + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " 729 + blockBuffer.position() + " (without header)." 730 + " path=" + reader.getPath()); 731 } 732 offsetFromPos += Bytes.SIZEOF_LONG; 733 blockBuffer.asSubByteBuffer(blockBuffer.position() + offsetFromPos, klen, pair); 734 bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), klen); 735 int comp = 736 PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, bufBackedKeyOnlyKv); 737 offsetFromPos += klen + vlen; 738 if (this.reader.getFileContext().isIncludesTags()) { 739 // Read short as unsigned, high byte first 740 tlen = ((blockBuffer.getByteAfterPosition(offsetFromPos) & 0xff) << 8) 741 ^ (blockBuffer.getByteAfterPosition(offsetFromPos + 1) & 0xff); 742 if (checkLen(tlen)) { 743 throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: " 744 + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " 745 + blockBuffer.position() + " (without header)." 746 + " path=" + reader.getPath()); 747 } 748 // add the two bytes read for the tags. 749 offsetFromPos += tlen + (Bytes.SIZEOF_SHORT); 750 } 751 if (this.reader.shouldIncludeMemStoreTS()) { 752 // Directly read the mvcc based on current position 753 readMvccVersion(offsetFromPos); 754 } 755 if (comp == 0) { 756 if (seekBefore) { 757 if (lastKeyValueSize < 0) { 758 throw new IllegalStateException("blockSeek with seekBefore " 759 + "at the first key of the block: key=" + CellUtil.getCellKeyAsString(key) 760 + ", blockOffset=" + curBlock.getOffset() + ", onDiskSize=" 761 + curBlock.getOnDiskSizeWithHeader() 762 + ", path=" + reader.getPath()); 763 } 764 blockBuffer.moveBack(lastKeyValueSize); 765 readKeyValueLen(); 766 return 1; // non exact match. 767 } 768 currKeyLen = klen; 769 currValueLen = vlen; 770 currTagsLen = tlen; 771 return 0; // indicate exact match 772 } else if (comp < 0) { 773 if (lastKeyValueSize > 0) { 774 blockBuffer.moveBack(lastKeyValueSize); 775 } 776 readKeyValueLen(); 777 if (lastKeyValueSize == -1 && blockBuffer.position() == 0) { 778 return HConstants.INDEX_KEY_MAGIC; 779 } 780 return 1; 781 } 782 // The size of this key/value tuple, including key/value length fields. 783 lastKeyValueSize = klen + vlen + currMemstoreTSLen + KEY_VALUE_LEN_SIZE; 784 // include tag length also if tags included with KV 785 if (reader.getFileContext().isIncludesTags()) { 786 lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT; 787 } 788 blockBuffer.skip(lastKeyValueSize); 789 } while (blockBuffer.hasRemaining()); 790 791 // Seek to the last key we successfully read. This will happen if this is 792 // the last key/value pair in the file, in which case the following call 793 // to next() has to return false. 794 blockBuffer.moveBack(lastKeyValueSize); 795 readKeyValueLen(); 796 return 1; // didn't exactly find it. 797 } 798 799 @Override 800 public Cell getNextIndexedKey() { 801 return nextIndexedKey; 802 } 803 804 @Override 805 public int seekTo(Cell key) throws IOException { 806 return seekTo(key, true); 807 } 808 809 @Override 810 public int reseekTo(Cell key) throws IOException { 811 int compared; 812 if (isSeeked()) { 813 compared = compareKey(reader.getComparator(), key); 814 if (compared < 1) { 815 // If the required key is less than or equal to current key, then 816 // don't do anything. 817 return compared; 818 } else { 819 // The comparison with no_next_index_key has to be checked 820 if (this.nextIndexedKey != null && 821 (this.nextIndexedKey == KeyValueScanner.NO_NEXT_INDEXED_KEY || PrivateCellUtil 822 .compareKeyIgnoresMvcc(reader.getComparator(), key, nextIndexedKey) < 0)) { 823 // The reader shall continue to scan the current data block instead 824 // of querying the 825 // block index as long as it knows the target key is strictly 826 // smaller than 827 // the next indexed key or the current data block is the last data 828 // block. 829 return loadBlockAndSeekToKey(this.curBlock, nextIndexedKey, false, key, 830 false); 831 } 832 833 } 834 } 835 // Don't rewind on a reseek operation, because reseek implies that we are 836 // always going forward in the file. 837 return seekTo(key, false); 838 } 839 840 /** 841 * An internal API function. Seek to the given key, optionally rewinding to 842 * the first key of the block before doing the seek. 843 * 844 * @param key - a cell representing the key that we need to fetch 845 * @param rewind whether to rewind to the first key of the block before 846 * doing the seek. If this is false, we are assuming we never go 847 * back, otherwise the result is undefined. 848 * @return -1 if the key is earlier than the first key of the file, 849 * 0 if we are at the given key, 1 if we are past the given key 850 * -2 if the key is earlier than the first key of the file while 851 * using a faked index key 852 * @throws IOException 853 */ 854 public int seekTo(Cell key, boolean rewind) throws IOException { 855 HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader(); 856 BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, curBlock, 857 cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding()); 858 if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) { 859 // This happens if the key e.g. falls before the beginning of the 860 // file. 861 return -1; 862 } 863 return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(), 864 blockWithScanInfo.getNextIndexedKey(), rewind, key, false); 865 } 866 867 @Override 868 public boolean seekBefore(Cell key) throws IOException { 869 HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, curBlock, 870 cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction)); 871 if (seekToBlock == null) { 872 return false; 873 } 874 Cell firstKey = getFirstKeyCellInBlock(seekToBlock); 875 if (PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), firstKey, key) >= 0) { 876 long previousBlockOffset = seekToBlock.getPrevBlockOffset(); 877 // The key we are interested in 878 if (previousBlockOffset == -1) { 879 // we have a 'problem', the key we want is the first of the file. 880 return false; 881 } 882 883 // The first key in the current block 'seekToBlock' is greater than the given 884 // seekBefore key. We will go ahead by reading the next block that satisfies the 885 // given key. Return the current block before reading the next one. 886 reader.returnBlock(seekToBlock); 887 // It is important that we compute and pass onDiskSize to the block 888 // reader so that it does not have to read the header separately to 889 // figure out the size. Currently, we do not have a way to do this 890 // correctly in the general case however. 891 // TODO: See https://issues.apache.org/jira/browse/HBASE-14576 892 int prevBlockSize = -1; 893 seekToBlock = reader.readBlock(previousBlockOffset, 894 prevBlockSize, cacheBlocks, 895 pread, isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); 896 // TODO shortcut: seek forward in this block to the last key of the 897 // block. 898 } 899 loadBlockAndSeekToKey(seekToBlock, firstKey, true, key, true); 900 return true; 901 } 902 903 /** 904 * Scans blocks in the "scanned" section of the {@link HFile} until the next 905 * data block is found. 906 * 907 * @return the next block, or null if there are no more data blocks 908 * @throws IOException 909 */ 910 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH", 911 justification="Yeah, unnecessary null check; could do w/ clean up") 912 protected HFileBlock readNextDataBlock() throws IOException { 913 long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); 914 if (curBlock == null) 915 return null; 916 917 HFileBlock block = this.curBlock; 918 919 do { 920 if (block.getOffset() >= lastDataBlockOffset) { 921 return null; 922 } 923 924 if (block.getOffset() < 0) { 925 throw new IOException( 926 "Invalid block file offset: " + block + ", path=" + reader.getPath()); 927 } 928 929 // We are reading the next block without block type validation, because 930 // it might turn out to be a non-data block. 931 block = reader.readBlock(block.getOffset() + block.getOnDiskSizeWithHeader(), 932 block.getNextBlockOnDiskSize(), cacheBlocks, pread, 933 isCompaction, true, null, getEffectiveDataBlockEncoding()); 934 if (block != null && !block.getBlockType().isData()) { // Findbugs: NP_NULL_ON_SOME_PATH 935 // Whatever block we read we will be returning it unless 936 // it is a datablock. Just in case the blocks are non data blocks 937 reader.returnBlock(block); 938 } 939 } while (!block.getBlockType().isData()); 940 941 return block; 942 } 943 944 public DataBlockEncoding getEffectiveDataBlockEncoding() { 945 return this.reader.getEffectiveEncodingInCache(isCompaction); 946 } 947 948 @Override 949 public Cell getCell() { 950 if (!isSeeked()) 951 return null; 952 953 Cell ret; 954 int cellBufSize = getKVBufSize(); 955 long seqId = 0L; 956 if (this.reader.shouldIncludeMemStoreTS()) { 957 seqId = currMemstoreTS; 958 } 959 if (blockBuffer.hasArray()) { 960 // TODO : reduce the varieties of KV here. Check if based on a boolean 961 // we can handle the 'no tags' case. 962 if (currTagsLen > 0) { 963 ret = new SizeCachedKeyValue(blockBuffer.array(), 964 blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId); 965 } else { 966 ret = new SizeCachedNoTagsKeyValue(blockBuffer.array(), 967 blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId); 968 } 969 } else { 970 ByteBuffer buf = blockBuffer.asSubByteBuffer(cellBufSize); 971 if (buf.isDirect()) { 972 ret = new ByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId); 973 } else { 974 if (currTagsLen > 0) { 975 ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + buf.position(), 976 cellBufSize, seqId); 977 } else { 978 ret = new SizeCachedNoTagsKeyValue(buf.array(), buf.arrayOffset() + buf.position(), 979 cellBufSize, seqId); 980 } 981 } 982 } 983 return ret; 984 } 985 986 @Override 987 public Cell getKey() { 988 assertSeeked(); 989 // Create a new object so that this getKey is cached as firstKey, lastKey 990 ObjectIntPair<ByteBuffer> keyPair = new ObjectIntPair<>(); 991 blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, keyPair); 992 ByteBuffer keyBuf = keyPair.getFirst(); 993 if (keyBuf.hasArray()) { 994 return new KeyValue.KeyOnlyKeyValue(keyBuf.array(), keyBuf.arrayOffset() 995 + keyPair.getSecond(), currKeyLen); 996 } else { 997 // Better to do a copy here instead of holding on to this BB so that 998 // we could release the blocks referring to this key. This key is specifically used 999 // in HalfStoreFileReader to get the firstkey and lastkey by creating a new scanner 1000 // every time. So holding onto the BB (incase of DBB) is not advised here. 1001 byte[] key = new byte[currKeyLen]; 1002 ByteBufferUtils.copyFromBufferToArray(key, keyBuf, keyPair.getSecond(), 0, currKeyLen); 1003 return new KeyValue.KeyOnlyKeyValue(key, 0, currKeyLen); 1004 } 1005 } 1006 1007 @Override 1008 public ByteBuffer getValue() { 1009 assertSeeked(); 1010 // Okie to create new Pair. Not used in hot path 1011 ObjectIntPair<ByteBuffer> valuePair = new ObjectIntPair<>(); 1012 this.blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen, 1013 currValueLen, valuePair); 1014 ByteBuffer valBuf = valuePair.getFirst().duplicate(); 1015 valBuf.position(valuePair.getSecond()); 1016 valBuf.limit(currValueLen + valuePair.getSecond()); 1017 return valBuf.slice(); 1018 } 1019 1020 protected void setNonSeekedState() { 1021 reset(); 1022 blockBuffer = null; 1023 currKeyLen = 0; 1024 currValueLen = 0; 1025 currMemstoreTS = 0; 1026 currMemstoreTSLen = 0; 1027 currTagsLen = 0; 1028 } 1029 1030 /** 1031 * Set the position on current backing blockBuffer. 1032 */ 1033 private void positionThisBlockBuffer() { 1034 try { 1035 blockBuffer.skip(getCurCellSerializedSize()); 1036 } catch (IllegalArgumentException e) { 1037 LOG.error("Current pos = " + blockBuffer.position() 1038 + "; currKeyLen = " + currKeyLen + "; currValLen = " 1039 + currValueLen + "; block limit = " + blockBuffer.limit() 1040 + "; currBlock currBlockOffset = " + this.curBlock.getOffset() 1041 + "; path=" + reader.getPath()); 1042 throw e; 1043 } 1044 } 1045 1046 /** 1047 * Set our selves up for the next 'next' invocation, set up next block. 1048 * @return True is more to read else false if at the end. 1049 * @throws IOException 1050 */ 1051 private boolean positionForNextBlock() throws IOException { 1052 // Methods are small so they get inlined because they are 'hot'. 1053 long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); 1054 if (this.curBlock.getOffset() >= lastDataBlockOffset) { 1055 setNonSeekedState(); 1056 return false; 1057 } 1058 return isNextBlock(); 1059 } 1060 1061 1062 private boolean isNextBlock() throws IOException { 1063 // Methods are small so they get inlined because they are 'hot'. 1064 HFileBlock nextBlock = readNextDataBlock(); 1065 if (nextBlock == null) { 1066 setNonSeekedState(); 1067 return false; 1068 } 1069 updateCurrentBlock(nextBlock); 1070 return true; 1071 } 1072 1073 private final boolean _next() throws IOException { 1074 // Small method so can be inlined. It is a hot one. 1075 if (blockBuffer.remaining() <= 0) { 1076 return positionForNextBlock(); 1077 } 1078 1079 // We are still in the same block. 1080 readKeyValueLen(); 1081 return true; 1082 } 1083 1084 /** 1085 * Go to the next key/value in the block section. Loads the next block if 1086 * necessary. If successful, {@link #getKey()} and {@link #getValue()} can 1087 * be called. 1088 * 1089 * @return true if successfully navigated to the next key/value 1090 */ 1091 @Override 1092 public boolean next() throws IOException { 1093 // This is a hot method so extreme measures taken to ensure it is small and inlineable. 1094 // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation 1095 assertSeeked(); 1096 positionThisBlockBuffer(); 1097 return _next(); 1098 } 1099 1100 /** 1101 * Positions this scanner at the start of the file. 1102 * 1103 * @return false if empty file; i.e. a call to next would return false and 1104 * the current key and value are undefined. 1105 * @throws IOException 1106 */ 1107 @Override 1108 public boolean seekTo() throws IOException { 1109 if (reader == null) { 1110 return false; 1111 } 1112 1113 if (reader.getTrailer().getEntryCount() == 0) { 1114 // No data blocks. 1115 return false; 1116 } 1117 1118 long firstDataBlockOffset = reader.getTrailer().getFirstDataBlockOffset(); 1119 if (curBlock != null 1120 && curBlock.getOffset() == firstDataBlockOffset) { 1121 return processFirstDataBlock(); 1122 } 1123 1124 readAndUpdateNewBlock(firstDataBlockOffset); 1125 return true; 1126 } 1127 1128 protected boolean processFirstDataBlock() throws IOException{ 1129 blockBuffer.rewind(); 1130 readKeyValueLen(); 1131 return true; 1132 } 1133 1134 protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException, 1135 CorruptHFileException { 1136 HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread, 1137 isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); 1138 if (newBlock.getOffset() < 0) { 1139 throw new IOException( 1140 "Invalid block offset: " + newBlock.getOffset() + ", path=" + reader.getPath()); 1141 } 1142 updateCurrentBlock(newBlock); 1143 } 1144 1145 protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, 1146 boolean rewind, Cell key, boolean seekBefore) throws IOException { 1147 if (this.curBlock == null 1148 || this.curBlock.getOffset() != seekToBlock.getOffset()) { 1149 updateCurrentBlock(seekToBlock); 1150 } else if (rewind) { 1151 blockBuffer.rewind(); 1152 } 1153 1154 // Update the nextIndexedKey 1155 this.nextIndexedKey = nextIndexedKey; 1156 return blockSeek(key, seekBefore); 1157 } 1158 1159 /** 1160 * @param v 1161 * @return True if v <= 0 or v > current block buffer limit. 1162 */ 1163 protected final boolean checkKeyLen(final int v) { 1164 return v <= 0 || v > this.blockBuffer.limit(); 1165 } 1166 1167 /** 1168 * @param v 1169 * @return True if v < 0 or v > current block buffer limit. 1170 */ 1171 protected final boolean checkLen(final int v) { 1172 return v < 0 || v > this.blockBuffer.limit(); 1173 } 1174 1175 /** 1176 * Check key and value lengths are wholesome. 1177 */ 1178 protected final void checkKeyValueLen() { 1179 if (checkKeyLen(this.currKeyLen) || checkLen(this.currValueLen)) { 1180 throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen 1181 + " or currValueLen " + this.currValueLen + ". Block offset: " 1182 + this.curBlock.getOffset() + ", block length: " 1183 + this.blockBuffer.limit() + ", position: " + this.blockBuffer.position() 1184 + " (without header)." + ", path=" + reader.getPath()); 1185 } 1186 } 1187 1188 /** 1189 * Updates the current block to be the given {@link HFileBlock}. Seeks to 1190 * the the first key/value pair. 1191 * 1192 * @param newBlock the block to make current 1193 */ 1194 protected void updateCurrentBlock(HFileBlock newBlock) throws IOException { 1195 // Set the active block on the reader 1196 // sanity check 1197 if (newBlock.getBlockType() != BlockType.DATA) { 1198 throw new IllegalStateException("ScannerV2 works only on data " + "blocks, got " 1199 + newBlock.getBlockType() + "; " + "HFileName=" + reader.getPath() 1200 + ", " + "dataBlockEncoder=" + reader.getDataBlockEncoding() + ", " + "isCompaction=" 1201 + isCompaction); 1202 } 1203 1204 updateCurrBlockRef(newBlock); 1205 blockBuffer = newBlock.getBufferWithoutHeader(); 1206 readKeyValueLen(); 1207 blockFetches.incrementAndGet(); 1208 1209 // Reset the next indexed key 1210 this.nextIndexedKey = null; 1211 } 1212 1213 protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) { 1214 ByteBuff buffer = curBlock.getBufferWithoutHeader(); 1215 // It is safe to manipulate this buffer because we own the buffer object. 1216 buffer.rewind(); 1217 int klen = buffer.getInt(); 1218 buffer.skip(Bytes.SIZEOF_INT);// Skip value len part 1219 ByteBuffer keyBuff = buffer.asSubByteBuffer(klen); 1220 if (keyBuff.hasArray()) { 1221 return new KeyValue.KeyOnlyKeyValue(keyBuff.array(), keyBuff.arrayOffset() 1222 + keyBuff.position(), klen); 1223 } else { 1224 return new ByteBufferKeyOnlyKeyValue(keyBuff, keyBuff.position(), klen); 1225 } 1226 } 1227 1228 @Override 1229 public String getKeyString() { 1230 return CellUtil.toString(getKey(), false); 1231 } 1232 1233 @Override 1234 public String getValueString() { 1235 return ByteBufferUtils.toStringBinary(getValue()); 1236 } 1237 1238 public int compareKey(CellComparator comparator, Cell key) { 1239 blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, pair); 1240 this.bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), currKeyLen); 1241 return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, this.bufBackedKeyOnlyKv); 1242 } 1243 1244 @Override 1245 public void shipped() throws IOException { 1246 this.returnBlocks(false); 1247 } 1248 } 1249 1250 @Override 1251 public Path getPath() { 1252 return path; 1253 } 1254 1255 @Override 1256 public DataBlockEncoding getDataBlockEncoding() { 1257 return dataBlockEncoder.getDataBlockEncoding(); 1258 } 1259 1260 @Override 1261 public Configuration getConf() { 1262 return conf; 1263 } 1264 1265 @Override 1266 public void setConf(Configuration conf) { 1267 this.conf = conf; 1268 } 1269 1270 /** Minor versions in HFile starting with this number have hbase checksums */ 1271 public static final int MINOR_VERSION_WITH_CHECKSUM = 1; 1272 /** In HFile minor version that does not support checksums */ 1273 public static final int MINOR_VERSION_NO_CHECKSUM = 0; 1274 1275 /** HFile minor version that introduced pbuf filetrailer */ 1276 public static final int PBUF_TRAILER_MINOR_VERSION = 2; 1277 1278 /** 1279 * The size of a (key length, value length) tuple that prefixes each entry in 1280 * a data block. 1281 */ 1282 public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT; 1283 1284 private boolean includesMemstoreTS = false; 1285 protected boolean decodeMemstoreTS = false; 1286 1287 1288 @Override 1289 public boolean isDecodeMemStoreTS() { 1290 return this.decodeMemstoreTS; 1291 } 1292 1293 @Override 1294 public boolean shouldIncludeMemStoreTS() { 1295 return includesMemstoreTS; 1296 } 1297 1298 /** 1299 * Retrieve block from cache. Validates the retrieved block's type vs {@code expectedBlockType} 1300 * and its encoding vs. {@code expectedDataBlockEncoding}. Unpacks the block as necessary. 1301 */ 1302 private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock, 1303 boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType, 1304 DataBlockEncoding expectedDataBlockEncoding) throws IOException { 1305 // Check cache for block. If found return. 1306 if (cacheConf.isBlockCacheEnabled()) { 1307 BlockCache cache = cacheConf.getBlockCache(); 1308 HFileBlock cachedBlock = (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock, 1309 updateCacheMetrics); 1310 if (cachedBlock != null) { 1311 if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) { 1312 HFileBlock compressedBlock = cachedBlock; 1313 cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader); 1314 // In case of compressed block after unpacking we can return the compressed block 1315 if (compressedBlock != cachedBlock) { 1316 cache.returnBlock(cacheKey, compressedBlock); 1317 } 1318 } 1319 validateBlockType(cachedBlock, expectedBlockType); 1320 1321 if (expectedDataBlockEncoding == null) { 1322 return cachedBlock; 1323 } 1324 DataBlockEncoding actualDataBlockEncoding = 1325 cachedBlock.getDataBlockEncoding(); 1326 // Block types other than data blocks always have 1327 // DataBlockEncoding.NONE. To avoid false negative cache misses, only 1328 // perform this check if cached block is a data block. 1329 if (cachedBlock.getBlockType().isData() && 1330 !actualDataBlockEncoding.equals(expectedDataBlockEncoding)) { 1331 // This mismatch may happen if a Scanner, which is used for say a 1332 // compaction, tries to read an encoded block from the block cache. 1333 // The reverse might happen when an EncodedScanner tries to read 1334 // un-encoded blocks which were cached earlier. 1335 // 1336 // Because returning a data block with an implicit BlockType mismatch 1337 // will cause the requesting scanner to throw a disk read should be 1338 // forced here. This will potentially cause a significant number of 1339 // cache misses, so update so we should keep track of this as it might 1340 // justify the work on a CompoundScanner. 1341 if (!expectedDataBlockEncoding.equals(DataBlockEncoding.NONE) && 1342 !actualDataBlockEncoding.equals(DataBlockEncoding.NONE)) { 1343 // If the block is encoded but the encoding does not match the 1344 // expected encoding it is likely the encoding was changed but the 1345 // block was not yet evicted. Evictions on file close happen async 1346 // so blocks with the old encoding still linger in cache for some 1347 // period of time. This event should be rare as it only happens on 1348 // schema definition change. 1349 LOG.info("Evicting cached block with key " + cacheKey 1350 + " because of a data block encoding mismatch" + "; expected: " 1351 + expectedDataBlockEncoding + ", actual: " + actualDataBlockEncoding + ", path=" 1352 + path); 1353 // This is an error scenario. so here we need to decrement the 1354 // count. 1355 cache.returnBlock(cacheKey, cachedBlock); 1356 cache.evictBlock(cacheKey); 1357 } 1358 return null; 1359 } 1360 return cachedBlock; 1361 } 1362 } 1363 return null; 1364 } 1365 1366 /** 1367 * @param metaBlockName 1368 * @param cacheBlock Add block to cache, if found 1369 * @return block wrapped in a ByteBuffer, with header skipped 1370 * @throws IOException 1371 */ 1372 @Override 1373 public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) 1374 throws IOException { 1375 if (trailer.getMetaIndexCount() == 0) { 1376 return null; // there are no meta blocks 1377 } 1378 if (metaBlockIndexReader == null) { 1379 throw new IOException(path + " meta index not loaded"); 1380 } 1381 1382 byte[] mbname = Bytes.toBytes(metaBlockName); 1383 int block = metaBlockIndexReader.rootBlockContainingKey(mbname, 1384 0, mbname.length); 1385 if (block == -1) 1386 return null; 1387 long blockSize = metaBlockIndexReader.getRootBlockDataSize(block); 1388 1389 // Per meta key from any given file, synchronize reads for said block. This 1390 // is OK to do for meta blocks because the meta block index is always 1391 // single-level. 1392 synchronized (metaBlockIndexReader 1393 .getRootBlockKey(block)) { 1394 // Check cache for block. If found return. 1395 long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block); 1396 BlockCacheKey cacheKey = new BlockCacheKey(name, metaBlockOffset, 1397 this.isPrimaryReplicaReader(), BlockType.META); 1398 1399 cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory()); 1400 if (cacheConf.isBlockCacheEnabled()) { 1401 HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, false, true, true, 1402 BlockType.META, null); 1403 if (cachedBlock != null) { 1404 assert cachedBlock.isUnpacked() : "Packed block leak."; 1405 // Return a distinct 'shallow copy' of the block, 1406 // so pos does not get messed by the scanner 1407 return cachedBlock; 1408 } 1409 // Cache Miss, please load. 1410 } 1411 1412 HFileBlock metaBlock = fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false). 1413 unpack(hfileContext, fsBlockReader); 1414 1415 // Cache the block 1416 if (cacheBlock) { 1417 cacheConf.getBlockCache().cacheBlock(cacheKey, metaBlock, cacheConf.isInMemory()); 1418 } 1419 1420 return metaBlock; 1421 } 1422 } 1423 1424 @Override 1425 public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, 1426 final boolean cacheBlock, boolean pread, final boolean isCompaction, 1427 boolean updateCacheMetrics, BlockType expectedBlockType, 1428 DataBlockEncoding expectedDataBlockEncoding) 1429 throws IOException { 1430 if (dataBlockIndexReader == null) { 1431 throw new IOException(path + " block index not loaded"); 1432 } 1433 long trailerOffset = trailer.getLoadOnOpenDataOffset(); 1434 if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) { 1435 throw new IOException("Requested block is out of range: " + dataBlockOffset + 1436 ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() + 1437 ", trailer.getLoadOnOpenDataOffset: " + trailerOffset + 1438 ", path=" + path); 1439 } 1440 // For any given block from any given file, synchronize reads for said 1441 // block. 1442 // Without a cache, this synchronizing is needless overhead, but really 1443 // the other choice is to duplicate work (which the cache would prevent you 1444 // from doing). 1445 1446 BlockCacheKey cacheKey = new BlockCacheKey(name, dataBlockOffset, 1447 this.isPrimaryReplicaReader(), expectedBlockType); 1448 1449 boolean useLock = false; 1450 IdLock.Entry lockEntry = null; 1451 try (TraceScope traceScope = TraceUtil.createTrace("HFileReaderImpl.readBlock")) { 1452 while (true) { 1453 // Check cache for block. If found return. 1454 if (cacheConf.shouldReadBlockFromCache(expectedBlockType)) { 1455 if (useLock) { 1456 lockEntry = offsetLock.getLockEntry(dataBlockOffset); 1457 } 1458 // Try and get the block from the block cache. If the useLock variable is true then this 1459 // is the second time through the loop and it should not be counted as a block cache miss. 1460 HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, useLock, isCompaction, 1461 updateCacheMetrics, expectedBlockType, expectedDataBlockEncoding); 1462 if (cachedBlock != null) { 1463 if (LOG.isTraceEnabled()) { 1464 LOG.trace("From Cache " + cachedBlock); 1465 } 1466 TraceUtil.addTimelineAnnotation("blockCacheHit"); 1467 assert cachedBlock.isUnpacked() : "Packed block leak."; 1468 if (cachedBlock.getBlockType().isData()) { 1469 if (updateCacheMetrics) { 1470 HFile.DATABLOCK_READ_COUNT.increment(); 1471 } 1472 // Validate encoding type for data blocks. We include encoding 1473 // type in the cache key, and we expect it to match on a cache hit. 1474 if (cachedBlock.getDataBlockEncoding() != dataBlockEncoder.getDataBlockEncoding()) { 1475 throw new IOException("Cached block under key " + cacheKey + " " 1476 + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: " 1477 + dataBlockEncoder.getDataBlockEncoding() + ")" 1478 + ", path=" + path); 1479 } 1480 } 1481 // Cache-hit. Return! 1482 return cachedBlock; 1483 } 1484 1485 if (!useLock && cacheBlock && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) { 1486 // check cache again with lock 1487 useLock = true; 1488 continue; 1489 } 1490 // Carry on, please load. 1491 } 1492 1493 TraceUtil.addTimelineAnnotation("blockCacheMiss"); 1494 // Load block from filesystem. 1495 HFileBlock hfileBlock = 1496 fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread, !isCompaction); 1497 validateBlockType(hfileBlock, expectedBlockType); 1498 HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader); 1499 BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory(); 1500 1501 // Cache the block if necessary 1502 if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) { 1503 cacheConf.getBlockCache().cacheBlock(cacheKey, 1504 cacheConf.shouldCacheCompressed(category) ? hfileBlock : unpacked, 1505 cacheConf.isInMemory()); 1506 } 1507 1508 if (updateCacheMetrics && hfileBlock.getBlockType().isData()) { 1509 HFile.DATABLOCK_READ_COUNT.increment(); 1510 } 1511 1512 return unpacked; 1513 } 1514 } finally { 1515 if (lockEntry != null) { 1516 offsetLock.releaseLockEntry(lockEntry); 1517 } 1518 } 1519 } 1520 1521 @Override 1522 public boolean hasMVCCInfo() { 1523 return includesMemstoreTS && decodeMemstoreTS; 1524 } 1525 1526 /** 1527 * Compares the actual type of a block retrieved from cache or disk with its 1528 * expected type and throws an exception in case of a mismatch. Expected 1529 * block type of {@link BlockType#DATA} is considered to match the actual 1530 * block type [@link {@link BlockType#ENCODED_DATA} as well. 1531 * @param block a block retrieved from cache or disk 1532 * @param expectedBlockType the expected block type, or null to skip the 1533 * check 1534 */ 1535 private void validateBlockType(HFileBlock block, 1536 BlockType expectedBlockType) throws IOException { 1537 if (expectedBlockType == null) { 1538 return; 1539 } 1540 BlockType actualBlockType = block.getBlockType(); 1541 if (expectedBlockType.isData() && actualBlockType.isData()) { 1542 // We consider DATA to match ENCODED_DATA for the purpose of this 1543 // verification. 1544 return; 1545 } 1546 if (actualBlockType != expectedBlockType) { 1547 throw new IOException("Expected block type " + expectedBlockType + ", " + 1548 "but got " + actualBlockType + ": " + block + ", path=" + path); 1549 } 1550 } 1551 1552 /** 1553 * @return Last key as cell in the file. May be null if file has no entries. Note that 1554 * this is not the last row key, but it is the Cell representation of the last 1555 * key 1556 */ 1557 @Override 1558 public Optional<Cell> getLastKey() { 1559 return dataBlockIndexReader.isEmpty() ? Optional.empty() : Optional.of(lastKeyCell); 1560 } 1561 1562 /** 1563 * @return Midkey for this file. We work with block boundaries only so 1564 * returned midkey is an approximation only. 1565 * @throws IOException 1566 */ 1567 @Override 1568 public Optional<Cell> midKey() throws IOException { 1569 return Optional.ofNullable(dataBlockIndexReader.midkey()); 1570 } 1571 1572 @Override 1573 public void close() throws IOException { 1574 close(cacheConf.shouldEvictOnClose()); 1575 } 1576 1577 @Override 1578 public void close(boolean evictOnClose) throws IOException { 1579 PrefetchExecutor.cancel(path); 1580 if (evictOnClose && cacheConf.isBlockCacheEnabled()) { 1581 int numEvicted = cacheConf.getBlockCache().evictBlocksByHfileName(name); 1582 if (LOG.isTraceEnabled()) { 1583 LOG.trace("On close, file=" + name + " evicted=" + numEvicted 1584 + " block(s)"); 1585 } 1586 } 1587 fsBlockReader.closeStreams(); 1588 } 1589 1590 @Override 1591 public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) { 1592 return dataBlockEncoder.getEffectiveEncodingInCache(isCompaction); 1593 } 1594 1595 /** For testing */ 1596 @Override 1597 public HFileBlock.FSReader getUncachedBlockReader() { 1598 return fsBlockReader; 1599 } 1600 1601 /** 1602 * Scanner that operates on encoded data blocks. 1603 */ 1604 protected static class EncodedScanner extends HFileScannerImpl { 1605 private final HFileBlockDecodingContext decodingCtx; 1606 private final DataBlockEncoder.EncodedSeeker seeker; 1607 private final DataBlockEncoder dataBlockEncoder; 1608 1609 public EncodedScanner(HFile.Reader reader, boolean cacheBlocks, 1610 boolean pread, boolean isCompaction, HFileContext meta) { 1611 super(reader, cacheBlocks, pread, isCompaction); 1612 DataBlockEncoding encoding = reader.getDataBlockEncoding(); 1613 dataBlockEncoder = encoding.getEncoder(); 1614 decodingCtx = dataBlockEncoder.newDataBlockDecodingContext(meta); 1615 seeker = dataBlockEncoder.createSeeker( 1616 reader.getComparator(), decodingCtx); 1617 } 1618 1619 @Override 1620 public boolean isSeeked(){ 1621 return curBlock != null; 1622 } 1623 1624 @Override 1625 public void setNonSeekedState() { 1626 reset(); 1627 } 1628 1629 /** 1630 * Updates the current block to be the given {@link HFileBlock}. Seeks to 1631 * the the first key/value pair. 1632 * 1633 * @param newBlock the block to make current 1634 * @throws CorruptHFileException 1635 */ 1636 @Override 1637 protected void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException { 1638 1639 // sanity checks 1640 if (newBlock.getBlockType() != BlockType.ENCODED_DATA) { 1641 throw new IllegalStateException("EncodedScanner works only on encoded data blocks"); 1642 } 1643 short dataBlockEncoderId = newBlock.getDataBlockEncodingId(); 1644 if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) { 1645 String encoderCls = dataBlockEncoder.getClass().getName(); 1646 throw new CorruptHFileException("Encoder " + encoderCls 1647 + " doesn't support data block encoding " 1648 + DataBlockEncoding.getNameFromId(dataBlockEncoderId) 1649 + ", path=" + reader.getPath()); 1650 } 1651 updateCurrBlockRef(newBlock); 1652 ByteBuff encodedBuffer = getEncodedBuffer(newBlock); 1653 seeker.setCurrentBuffer(encodedBuffer); 1654 blockFetches.incrementAndGet(); 1655 1656 // Reset the next indexed key 1657 this.nextIndexedKey = null; 1658 } 1659 1660 private ByteBuff getEncodedBuffer(HFileBlock newBlock) { 1661 ByteBuff origBlock = newBlock.getBufferReadOnly(); 1662 int pos = newBlock.headerSize() + DataBlockEncoding.ID_SIZE; 1663 origBlock.position(pos); 1664 origBlock 1665 .limit(pos + newBlock.getUncompressedSizeWithoutHeader() - DataBlockEncoding.ID_SIZE); 1666 return origBlock.slice(); 1667 } 1668 1669 @Override 1670 protected boolean processFirstDataBlock() throws IOException { 1671 seeker.rewind(); 1672 return true; 1673 } 1674 1675 @Override 1676 public boolean next() throws IOException { 1677 boolean isValid = seeker.next(); 1678 if (!isValid) { 1679 HFileBlock newBlock = readNextDataBlock(); 1680 isValid = newBlock != null; 1681 if (isValid) { 1682 updateCurrentBlock(newBlock); 1683 } else { 1684 setNonSeekedState(); 1685 } 1686 } 1687 return isValid; 1688 } 1689 1690 @Override 1691 public Cell getKey() { 1692 assertValidSeek(); 1693 return seeker.getKey(); 1694 } 1695 1696 @Override 1697 public ByteBuffer getValue() { 1698 assertValidSeek(); 1699 return seeker.getValueShallowCopy(); 1700 } 1701 1702 @Override 1703 public Cell getCell() { 1704 if (this.curBlock == null) { 1705 return null; 1706 } 1707 return seeker.getCell(); 1708 } 1709 1710 @Override 1711 public String getKeyString() { 1712 return CellUtil.toString(getKey(), true); 1713 } 1714 1715 @Override 1716 public String getValueString() { 1717 ByteBuffer valueBuffer = getValue(); 1718 return ByteBufferUtils.toStringBinary(valueBuffer); 1719 } 1720 1721 private void assertValidSeek() { 1722 if (this.curBlock == null) { 1723 throw new NotSeekedException(reader.getPath()); 1724 } 1725 } 1726 1727 @Override 1728 protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) { 1729 return dataBlockEncoder.getFirstKeyCellInBlock(getEncodedBuffer(curBlock)); 1730 } 1731 1732 @Override 1733 protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, 1734 boolean rewind, Cell key, boolean seekBefore) throws IOException { 1735 if (this.curBlock == null 1736 || this.curBlock.getOffset() != seekToBlock.getOffset()) { 1737 updateCurrentBlock(seekToBlock); 1738 } else if (rewind) { 1739 seeker.rewind(); 1740 } 1741 this.nextIndexedKey = nextIndexedKey; 1742 return seeker.seekToKeyInBlock(key, seekBefore); 1743 } 1744 1745 @Override 1746 public int compareKey(CellComparator comparator, Cell key) { 1747 return seeker.compareKey(comparator, key); 1748 } 1749 } 1750 1751 /** 1752 * Returns a buffer with the Bloom filter metadata. The caller takes 1753 * ownership of the buffer. 1754 */ 1755 @Override 1756 public DataInput getGeneralBloomFilterMetadata() throws IOException { 1757 return this.getBloomFilterMetadata(BlockType.GENERAL_BLOOM_META); 1758 } 1759 1760 @Override 1761 public DataInput getDeleteBloomFilterMetadata() throws IOException { 1762 return this.getBloomFilterMetadata(BlockType.DELETE_FAMILY_BLOOM_META); 1763 } 1764 1765 private DataInput getBloomFilterMetadata(BlockType blockType) 1766 throws IOException { 1767 if (blockType != BlockType.GENERAL_BLOOM_META && 1768 blockType != BlockType.DELETE_FAMILY_BLOOM_META) { 1769 throw new RuntimeException("Block Type: " + blockType.toString() + 1770 " is not supported, path=" + path) ; 1771 } 1772 1773 for (HFileBlock b : loadOnOpenBlocks) 1774 if (b.getBlockType() == blockType) 1775 return b.getByteStream(); 1776 return null; 1777 } 1778 1779 public boolean isFileInfoLoaded() { 1780 return true; // We load file info in constructor in version 2. 1781 } 1782 1783 @Override 1784 public HFileContext getFileContext() { 1785 return hfileContext; 1786 } 1787 1788 /** 1789 * Returns false if block prefetching was requested for this file and has 1790 * not completed, true otherwise 1791 */ 1792 @Override 1793 @VisibleForTesting 1794 public boolean prefetchComplete() { 1795 return PrefetchExecutor.isCompleted(path); 1796 } 1797 1798 protected HFileContext createHFileContext(FSDataInputStreamWrapper fsdis, long fileSize, 1799 HFileSystem hfs, Path path, FixedFileTrailer trailer) throws IOException { 1800 HFileContextBuilder builder = new HFileContextBuilder() 1801 .withIncludesMvcc(shouldIncludeMemStoreTS()) 1802 .withHBaseCheckSum(true) 1803 .withHFileName(this.getName()) 1804 .withCompression(this.compressAlgo); 1805 1806 // Check for any key material available 1807 byte[] keyBytes = trailer.getEncryptionKey(); 1808 if (keyBytes != null) { 1809 Encryption.Context cryptoContext = Encryption.newContext(conf); 1810 Key key; 1811 key = EncryptionUtil.unwrapKey(conf, keyBytes); 1812 // Use the algorithm the key wants 1813 Cipher cipher = Encryption.getCipher(conf, key.getAlgorithm()); 1814 if (cipher == null) { 1815 throw new IOException("Cipher '" + key.getAlgorithm() + "' is not available" 1816 + ", path=" + path); 1817 } 1818 cryptoContext.setCipher(cipher); 1819 cryptoContext.setKey(key); 1820 builder.withEncryptionContext(cryptoContext); 1821 } 1822 1823 HFileContext context = builder.build(); 1824 1825 if (LOG.isTraceEnabled()) { 1826 LOG.trace("Reader" + (path != null? " for " + path: "") + 1827 " initialized with cacheConf: " + cacheConf + 1828 " comparator: " + comparator.getClass().getSimpleName() + 1829 " fileContext: " + context); 1830 } 1831 1832 return context; 1833 } 1834 1835 /** 1836 * Create a Scanner on this file. No seeks or reads are done on creation. Call 1837 * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is 1838 * nothing to clean up in a Scanner. Letting go of your references to the 1839 * scanner is sufficient. NOTE: Do not use this overload of getScanner for 1840 * compactions. See {@link #getScanner(boolean, boolean, boolean)} 1841 * 1842 * @param cacheBlocks True if we should cache blocks read in by this scanner. 1843 * @param pread Use positional read rather than seek+read if true (pread is 1844 * better for random reads, seek+read is better scanning). 1845 * @return Scanner on this file. 1846 */ 1847 @Override 1848 @VisibleForTesting 1849 public HFileScanner getScanner(boolean cacheBlocks, final boolean pread) { 1850 return getScanner(cacheBlocks, pread, false); 1851 } 1852 1853 /** 1854 * Create a Scanner on this file. No seeks or reads are done on creation. Call 1855 * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is 1856 * nothing to clean up in a Scanner. Letting go of your references to the 1857 * scanner is sufficient. 1858 * @param cacheBlocks 1859 * True if we should cache blocks read in by this scanner. 1860 * @param pread 1861 * Use positional read rather than seek+read if true (pread is better 1862 * for random reads, seek+read is better scanning). 1863 * @param isCompaction 1864 * is scanner being used for a compaction? 1865 * @return Scanner on this file. 1866 */ 1867 @Override 1868 public HFileScanner getScanner(boolean cacheBlocks, final boolean pread, 1869 final boolean isCompaction) { 1870 if (dataBlockEncoder.useEncodedScanner()) { 1871 return new EncodedScanner(this, cacheBlocks, pread, isCompaction, this.hfileContext); 1872 } 1873 return new HFileScannerImpl(this, cacheBlocks, pread, isCompaction); 1874 } 1875 1876 public int getMajorVersion() { 1877 return 3; 1878 } 1879 1880 @Override 1881 public void unbufferStream() { 1882 fsBlockReader.unbufferStream(); 1883 } 1884}