001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import java.io.Closeable; 021import java.io.DataInput; 022import java.io.IOException; 023import java.net.InetSocketAddress; 024import java.util.ArrayList; 025import java.util.List; 026import java.util.Optional; 027import java.util.concurrent.atomic.LongAdder; 028import org.apache.commons.io.IOUtils; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FSDataOutputStream; 031import org.apache.hadoop.fs.FileStatus; 032import org.apache.hadoop.fs.FileSystem; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.fs.PathFilter; 035import org.apache.hadoop.hbase.Cell; 036import org.apache.hadoop.hbase.CellComparator; 037import org.apache.hadoop.hbase.HConstants; 038import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 039import org.apache.hadoop.hbase.io.MetricsIO; 040import org.apache.hadoop.hbase.io.MetricsIOWrapperImpl; 041import org.apache.hadoop.hbase.io.compress.Compression; 042import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 043import org.apache.hadoop.hbase.io.hfile.ReaderContext.ReaderType; 044import org.apache.hadoop.hbase.regionserver.CellSink; 045import org.apache.hadoop.hbase.regionserver.ShipperListener; 046import org.apache.hadoop.hbase.util.BloomFilterWriter; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.FSUtils; 049import org.apache.hadoop.io.Writable; 050import org.apache.yetus.audience.InterfaceAudience; 051import org.slf4j.Logger; 052import org.slf4j.LoggerFactory; 053 054import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 055 056/** 057 * File format for hbase. A file of sorted key/value pairs. Both keys and values are byte arrays. 058 * <p> 059 * The memory footprint of a HFile includes the following (below is taken from the <a 060 * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation but applies also 061 * to HFile): 062 * <ul> 063 * <li>Some constant overhead of reading or writing a compressed block. 064 * <ul> 065 * <li>Each compressed block requires one compression/decompression codec for I/O. 066 * <li>Temporary space to buffer the key. 067 * <li>Temporary space to buffer the value. 068 * </ul> 069 * <li>HFile index, which is proportional to the total number of Data Blocks. The total amount of 070 * memory needed to hold the index can be estimated as (56+AvgKeySize)*NumBlocks. 071 * </ul> 072 * Suggestions on performance optimization. 073 * <ul> 074 * <li>Minimum block size. We recommend a setting of minimum block size between 8KB to 1MB for 075 * general usage. Larger block size is preferred if files are primarily for sequential access. 076 * However, it would lead to inefficient random access (because there are more data to decompress). 077 * Smaller blocks are good for random access, but require more memory to hold the block index, and 078 * may be slower to create (because we must flush the compressor stream at the conclusion of each 079 * data block, which leads to an FS I/O flush). Further, due to the internal caching in Compression 080 * codec, the smallest possible block size would be around 20KB-30KB. 081 * <li>The current implementation does not offer true multi-threading for reading. The 082 * implementation uses FSDataInputStream seek()+read(), which is shown to be much faster than 083 * positioned-read call in single thread mode. However, it also means that if multiple threads 084 * attempt to access the same HFile (using multiple scanners) simultaneously, the actual I/O is 085 * carried out sequentially even if they access different DFS blocks (Reexamine! pread seems to be 086 * 10% faster than seek+read in my testing -- stack). 087 * <li>Compression codec. Use "none" if the data is not very compressable (by compressable, I mean a 088 * compression ratio at least 2:1). Generally, use "lzo" as the starting point for experimenting. 089 * "gz" overs slightly better compression ratio over "lzo" but requires 4x CPU to compress and 2x 090 * CPU to decompress, comparing to "lzo". 091 * </ul> 092 * For more on the background behind HFile, see <a 093 * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>. 094 * <p> 095 * File is made of data blocks followed by meta data blocks (if any), a fileinfo block, data block 096 * index, meta data block index, and a fixed size trailer which records the offsets at which file 097 * changes content type. 098 * 099 * <pre> 100 * <data blocks><meta blocks><fileinfo>< 101 * data index><meta index><trailer> 102 * </pre> 103 * 104 * Each block has a bit of magic at its start. Block are comprised of key/values. In data blocks, 105 * they are both byte arrays. Metadata blocks are a String key and a byte array value. An empty file 106 * looks like this: 107 * 108 * <pre> 109 * <fileinfo><trailer> 110 * </pre> 111 * 112 * . That is, there are not data nor meta blocks present. 113 * <p> 114 * TODO: Do scanners need to be able to take a start and end row? TODO: Should BlockIndex know the 115 * name of its file? Should it have a Path that points at its file say for the case where an index 116 * lives apart from an HFile instance? 117 */ 118@InterfaceAudience.Private 119public final class HFile { 120 // LOG is being used in HFileBlock and CheckSumUtil 121 static final Logger LOG = LoggerFactory.getLogger(HFile.class); 122 123 /** 124 * Maximum length of key in HFile. 125 */ 126 public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE; 127 128 /** 129 * Default compression: none. 130 */ 131 public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM = 132 Compression.Algorithm.NONE; 133 134 /** Minimum supported HFile format version */ 135 public static final int MIN_FORMAT_VERSION = 2; 136 137 /** 138 * Maximum supported HFile format version 139 */ 140 public static final int MAX_FORMAT_VERSION = 3; 141 142 /** 143 * Minimum HFile format version with support for persisting cell tags 144 */ 145 public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3; 146 147 /** Default compression name: none. */ 148 public final static String DEFAULT_COMPRESSION = DEFAULT_COMPRESSION_ALGORITHM.getName(); 149 150 /** Meta data block name for bloom filter bits. */ 151 public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA"; 152 153 /** 154 * We assume that HFile path ends with ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at 155 * least this many levels of nesting. This is needed for identifying table and CF name from an 156 * HFile path. 157 */ 158 public final static int MIN_NUM_HFILE_PATH_LEVELS = 5; 159 160 /** 161 * The number of bytes per checksum. 162 */ 163 public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024; 164 165 // For measuring number of checksum failures 166 static final LongAdder CHECKSUM_FAILURES = new LongAdder(); 167 168 // For tests. Gets incremented when we read a block whether from HDFS or from Cache. 169 public static final LongAdder DATABLOCK_READ_COUNT = new LongAdder(); 170 171 /** Static instance for the metrics so that HFileReaders access the same instance */ 172 static final MetricsIO metrics = new MetricsIO(new MetricsIOWrapperImpl()); 173 174 /** 175 * Shutdown constructor. 176 */ 177 private HFile() { 178 } 179 180 /** 181 * Number of checksum verification failures. It also clears the counter. 182 */ 183 public static final long getAndResetChecksumFailuresCount() { 184 return CHECKSUM_FAILURES.sumThenReset(); 185 } 186 187 /** 188 * Number of checksum verification failures. It also clears the counter. 189 */ 190 public static final long getChecksumFailuresCount() { 191 return CHECKSUM_FAILURES.sum(); 192 } 193 194 public static final void updateReadLatency(long latencyMillis, boolean pread) { 195 if (pread) { 196 metrics.updateFsPreadTime(latencyMillis); 197 } else { 198 metrics.updateFsReadTime(latencyMillis); 199 } 200 } 201 202 public static final void updateWriteLatency(long latencyMillis) { 203 metrics.updateFsWriteTime(latencyMillis); 204 } 205 206 /** API required to write an {@link HFile} */ 207 public interface Writer extends Closeable, CellSink, ShipperListener { 208 /** Max memstore (mvcc) timestamp in FileInfo */ 209 public static final byte[] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY"); 210 211 /** Add an element to the file info map. */ 212 void appendFileInfo(byte[] key, byte[] value) throws IOException; 213 214 /** Returns the path to this {@link HFile} */ 215 Path getPath(); 216 217 /** 218 * Adds an inline block writer such as a multi-level block index writer or a compound Bloom 219 * filter writer. 220 */ 221 void addInlineBlockWriter(InlineBlockWriter bloomWriter); 222 223 // The below three methods take Writables. We'd like to undo Writables but undoing the below 224 // would be pretty painful. Could take a byte [] or a Message but we want to be backward 225 // compatible around hfiles so would need to map between Message and Writable or byte [] and 226 // current Writable serialization. This would be a bit of work to little gain. Thats my 227 // thinking at moment. St.Ack 20121129 228 229 void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter); 230 231 /** 232 * Store general Bloom filter in the file. This does not deal with Bloom filter internals but is 233 * necessary, since Bloom filters are stored differently in HFile version 1 and version 2. 234 */ 235 void addGeneralBloomFilter(BloomFilterWriter bfw); 236 237 /** 238 * Store delete family Bloom filter in the file, which is only supported in HFile V2. 239 */ 240 void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException; 241 242 /** 243 * Return the file context for the HFile this writer belongs to 244 */ 245 HFileContext getFileContext(); 246 } 247 248 /** 249 * This variety of ways to construct writers is used throughout the code, and we want to be able 250 * to swap writer implementations. 251 */ 252 public static class WriterFactory { 253 protected final Configuration conf; 254 protected final CacheConfig cacheConf; 255 protected FileSystem fs; 256 protected Path path; 257 protected FSDataOutputStream ostream; 258 protected InetSocketAddress[] favoredNodes; 259 private HFileContext fileContext; 260 protected boolean shouldDropBehind = false; 261 262 WriterFactory(Configuration conf, CacheConfig cacheConf) { 263 this.conf = conf; 264 this.cacheConf = cacheConf; 265 } 266 267 public WriterFactory withPath(FileSystem fs, Path path) { 268 Preconditions.checkNotNull(fs); 269 Preconditions.checkNotNull(path); 270 this.fs = fs; 271 this.path = path; 272 return this; 273 } 274 275 public WriterFactory withOutputStream(FSDataOutputStream ostream) { 276 Preconditions.checkNotNull(ostream); 277 this.ostream = ostream; 278 return this; 279 } 280 281 public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) { 282 // Deliberately not checking for null here. 283 this.favoredNodes = favoredNodes; 284 return this; 285 } 286 287 public WriterFactory withFileContext(HFileContext fileContext) { 288 this.fileContext = fileContext; 289 return this; 290 } 291 292 public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) { 293 this.shouldDropBehind = shouldDropBehind; 294 return this; 295 } 296 297 public Writer create() throws IOException { 298 if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) { 299 throw new AssertionError("Please specify exactly one of " + "filesystem/path or path"); 300 } 301 if (path != null) { 302 ostream = HFileWriterImpl.createOutputStream(conf, fs, path, favoredNodes); 303 try { 304 ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction()); 305 } catch (UnsupportedOperationException uoe) { 306 LOG.trace("Unable to set drop behind on {}", path, uoe); 307 LOG.debug("Unable to set drop behind on {}", path.getName()); 308 } 309 } 310 return new HFileWriterImpl(conf, cacheConf, path, ostream, fileContext); 311 } 312 } 313 314 /** The configuration key for HFile version to use for new files */ 315 public static final String FORMAT_VERSION_KEY = "hfile.format.version"; 316 317 public static int getFormatVersion(Configuration conf) { 318 int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION); 319 checkFormatVersion(version); 320 return version; 321 } 322 323 /** 324 * Returns the factory to be used to create {@link HFile} writers. Disables block cache access for 325 * all writers created through the returned factory. 326 */ 327 public static final WriterFactory getWriterFactoryNoCache(Configuration conf) { 328 return HFile.getWriterFactory(conf, CacheConfig.DISABLED); 329 } 330 331 /** 332 * Returns the factory to be used to create {@link HFile} writers 333 */ 334 public static final WriterFactory getWriterFactory(Configuration conf, CacheConfig cacheConf) { 335 int version = getFormatVersion(conf); 336 switch (version) { 337 case 2: 338 throw new IllegalArgumentException("This should never happen. " 339 + "Did you change hfile.format.version to read v2? This version of the software writes v3" 340 + " hfiles only (but it can read v2 files without having to update hfile.format.version " 341 + "in hbase-site.xml)"); 342 case 3: 343 return new HFile.WriterFactory(conf, cacheConf); 344 default: 345 throw new IllegalArgumentException( 346 "Cannot create writer for HFile " + "format version " + version); 347 } 348 } 349 350 /** 351 * An abstraction used by the block index. Implementations will check cache for any asked-for 352 * block and return cached block if found. Otherwise, after reading from fs, will try and put 353 * block into cache before returning. 354 */ 355 public interface CachingBlockReader { 356 /** 357 * Read in a file block. 358 * @param offset offset to read. 359 * @param onDiskBlockSize size of the block 360 * @param isCompaction is this block being read as part of a compaction 361 * @param expectedBlockType the block type we are expecting to read with this read 362 * operation, or null to read whatever block type is available 363 * and avoid checking (that might reduce caching efficiency of 364 * encoded data blocks) 365 * @param expectedDataBlockEncoding the data block encoding the caller is expecting data blocks 366 * to be in, or null to not perform this check and return the 367 * block irrespective of the encoding. This check only applies 368 * to data blocks and can be set to null when the caller is 369 * expecting to read a non-data block and has set 370 * expectedBlockType accordingly. 371 * @return Block wrapped in a ByteBuffer. 372 */ 373 HFileBlock readBlock(long offset, long onDiskBlockSize, boolean cacheBlock, final boolean pread, 374 final boolean isCompaction, final boolean updateCacheMetrics, BlockType expectedBlockType, 375 DataBlockEncoding expectedDataBlockEncoding) throws IOException; 376 377 HFileBlock readBlock(long offset, long onDiskBlockSize, boolean cacheBlock, final boolean pread, 378 final boolean isCompaction, final boolean updateCacheMetrics, BlockType expectedBlockType, 379 DataBlockEncoding expectedDataBlockEncoding, boolean cacheOnly) throws IOException; 380 } 381 382 /** An interface used by clients to open and iterate an {@link HFile}. */ 383 public interface Reader extends Closeable, CachingBlockReader { 384 /** 385 * Returns this reader's "name". Usually the last component of the path. Needs to be constant as 386 * the file is being moved to support caching on write. 387 */ 388 String getName(); 389 390 CellComparator getComparator(); 391 392 HFileScanner getScanner(Configuration conf, boolean cacheBlocks, boolean pread, 393 boolean isCompaction); 394 395 HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException; 396 397 Optional<Cell> getLastKey(); 398 399 Optional<Cell> midKey() throws IOException; 400 401 long length(); 402 403 long getEntries(); 404 405 Optional<Cell> getFirstKey(); 406 407 long indexSize(); 408 409 Optional<byte[]> getFirstRowKey(); 410 411 Optional<byte[]> getLastRowKey(); 412 413 FixedFileTrailer getTrailer(); 414 415 void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader); 416 417 HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader(); 418 419 void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader); 420 421 HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader(); 422 423 HFileScanner getScanner(Configuration conf, boolean cacheBlocks, boolean pread); 424 425 /** 426 * Retrieves general Bloom filter metadata as appropriate for each {@link HFile} version. Knows 427 * nothing about how that metadata is structured. 428 */ 429 DataInput getGeneralBloomFilterMetadata() throws IOException; 430 431 /** 432 * Retrieves delete family Bloom filter metadata as appropriate for each {@link HFile} version. 433 * Knows nothing about how that metadata is structured. 434 */ 435 DataInput getDeleteBloomFilterMetadata() throws IOException; 436 437 Path getPath(); 438 439 /** Close method with optional evictOnClose */ 440 void close(boolean evictOnClose) throws IOException; 441 442 DataBlockEncoding getDataBlockEncoding(); 443 444 boolean hasMVCCInfo(); 445 446 /** 447 * Return the file context of the HFile this reader belongs to 448 */ 449 HFileContext getFileContext(); 450 451 boolean isPrimaryReplicaReader(); 452 453 DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction); 454 455 HFileBlock.FSReader getUncachedBlockReader(); 456 457 boolean prefetchComplete(); 458 459 /** 460 * To close the stream's socket. Note: This can be concurrently called from multiple threads and 461 * implementation should take care of thread safety. 462 */ 463 void unbufferStream(); 464 465 ReaderContext getContext(); 466 467 HFileInfo getHFileInfo(); 468 469 void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder); 470 } 471 472 /** 473 * Method returns the reader given the specified arguments. TODO This is a bad abstraction. See 474 * HBASE-6635. 475 * @param context Reader context info 476 * @param fileInfo HFile info 477 * @param cacheConf Cache configuation values, cannot be null. 478 * @param conf Configuration 479 * @return an appropriate instance of HFileReader 480 * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException 481 */ 482 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SF_SWITCH_FALLTHROUGH", 483 justification = "Intentional") 484 public static Reader createReader(ReaderContext context, HFileInfo fileInfo, 485 CacheConfig cacheConf, Configuration conf) throws IOException { 486 try { 487 if (context.getReaderType() == ReaderType.STREAM) { 488 // stream reader will share trailer with pread reader, see HFileStreamReader#copyFields 489 return new HFileStreamReader(context, fileInfo, cacheConf, conf); 490 } 491 FixedFileTrailer trailer = fileInfo.getTrailer(); 492 switch (trailer.getMajorVersion()) { 493 case 2: 494 LOG.debug("Opening HFile v2 with v3 reader"); 495 // Fall through. FindBugs: SF_SWITCH_FALLTHROUGH 496 case 3: 497 return new HFilePreadReader(context, fileInfo, cacheConf, conf); 498 default: 499 throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion()); 500 } 501 } catch (Throwable t) { 502 IOUtils.closeQuietly(context.getInputStreamWrapper(), 503 e -> LOG.warn("failed to close input stream wrapper", e)); 504 throw new CorruptHFileException( 505 "Problem reading HFile Trailer from file " + context.getFilePath(), t); 506 } finally { 507 context.getInputStreamWrapper().unbuffer(); 508 } 509 } 510 511 /** 512 * Creates reader with cache configuration disabled 513 * @param fs filesystem 514 * @param path Path to file to read 515 * @param conf Configuration 516 * @return an active Reader instance 517 * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile 518 * is corrupt/invalid. 519 */ 520 public static Reader createReader(FileSystem fs, Path path, Configuration conf) 521 throws IOException { 522 // The primaryReplicaReader is mainly used for constructing block cache key, so if we do not use 523 // block cache then it is OK to set it as any value. We use true here. 524 return createReader(fs, path, CacheConfig.DISABLED, true, conf); 525 } 526 527 /** 528 * @param fs filesystem 529 * @param path Path to file to read 530 * @param cacheConf This must not be null. 531 * @param primaryReplicaReader true if this is a reader for primary replica 532 * @param conf Configuration 533 * @return an active Reader instance 534 * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile 535 * is corrupt/invalid. 536 * @see CacheConfig#CacheConfig(Configuration) 537 */ 538 public static Reader createReader(FileSystem fs, Path path, CacheConfig cacheConf, 539 boolean primaryReplicaReader, Configuration conf) throws IOException { 540 Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf"); 541 FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path); 542 ReaderContext context = 543 new ReaderContextBuilder().withFilePath(path).withInputStreamWrapper(stream) 544 .withFileSize(fs.getFileStatus(path).getLen()).withFileSystem(stream.getHfs()) 545 .withPrimaryReplicaReader(primaryReplicaReader).withReaderType(ReaderType.PREAD).build(); 546 HFileInfo fileInfo = new HFileInfo(context, conf); 547 Reader reader = createReader(context, fileInfo, cacheConf, conf); 548 fileInfo.initMetaAndIndex(reader); 549 return reader; 550 } 551 552 /** 553 * Returns true if the specified file has a valid HFile Trailer. 554 * @param fs filesystem 555 * @param path Path to file to verify 556 * @return true if the file has a valid HFile Trailer, otherwise false 557 * @throws IOException if failed to read from the underlying stream 558 */ 559 public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException { 560 return isHFileFormat(fs, fs.getFileStatus(path)); 561 } 562 563 /** 564 * Returns true if the specified file has a valid HFile Trailer. 565 * @param fs filesystem 566 * @param fileStatus the file to verify 567 * @return true if the file has a valid HFile Trailer, otherwise false 568 * @throws IOException if failed to read from the underlying stream 569 */ 570 public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus) 571 throws IOException { 572 final Path path = fileStatus.getPath(); 573 final long size = fileStatus.getLen(); 574 try (FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path)) { 575 boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum(); 576 assert !isHBaseChecksum; // Initially we must read with FS checksum. 577 FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size); 578 return true; 579 } catch (IllegalArgumentException e) { 580 return false; 581 } 582 } 583 584 /** 585 * Get names of supported compression algorithms. The names are acceptable by HFile.Writer. 586 * @return Array of strings, each represents a supported compression algorithm. Currently, the 587 * following compression algorithms are supported. 588 * <ul> 589 * <li>"none" - No compression. 590 * <li>"gz" - GZIP compression. 591 * </ul> 592 */ 593 public static String[] getSupportedCompressionAlgorithms() { 594 return Compression.getSupportedAlgorithms(); 595 } 596 597 // Utility methods. 598 /* 599 * @param l Long to convert to an int. 600 * @return <code>l</code> cast as an int. 601 */ 602 static int longToInt(final long l) { 603 // Expecting the size() of a block not exceeding 4GB. Assuming the 604 // size() will wrap to negative integer if it exceeds 2GB (From tfile). 605 return (int) (l & 0x00000000ffffffffL); 606 } 607 608 /** 609 * Returns all HFiles belonging to the given region directory. Could return an empty list. 610 * @param fs The file system reference. 611 * @param regionDir The region directory to scan. 612 * @return The list of files found. 613 * @throws IOException When scanning the files fails. 614 */ 615 public static List<Path> getStoreFiles(FileSystem fs, Path regionDir) throws IOException { 616 List<Path> regionHFiles = new ArrayList<>(); 617 PathFilter dirFilter = new FSUtils.DirFilter(fs); 618 FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter); 619 for (FileStatus dir : familyDirs) { 620 FileStatus[] files = fs.listStatus(dir.getPath()); 621 for (FileStatus file : files) { 622 if ( 623 !file.isDirectory() 624 && (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) 625 && (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR)) 626 ) { 627 regionHFiles.add(file.getPath()); 628 } 629 } 630 } 631 return regionHFiles; 632 } 633 634 /** 635 * Checks the given {@link HFile} format version, and throws an exception if invalid. Note that if 636 * the version number comes from an input file and has not been verified, the caller needs to 637 * re-throw an {@link IOException} to indicate that this is not a software error, but corrupted 638 * input. 639 * @param version an HFile version 640 * @throws IllegalArgumentException if the version is invalid 641 */ 642 public static void checkFormatVersion(int version) throws IllegalArgumentException { 643 if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) { 644 throw new IllegalArgumentException("Invalid HFile version: " + version + " (expected to be " 645 + "between " + MIN_FORMAT_VERSION + " and " + MAX_FORMAT_VERSION + ")"); 646 } 647 } 648 649 public static void checkHFileVersion(final Configuration c) { 650 int version = c.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION); 651 if (version < MAX_FORMAT_VERSION || version > MAX_FORMAT_VERSION) { 652 throw new IllegalArgumentException( 653 "The setting for " + FORMAT_VERSION_KEY + " (in your hbase-*.xml files) is " + version 654 + " which does not match " + MAX_FORMAT_VERSION 655 + "; are you running with a configuration from an older or newer hbase install (an " 656 + "incompatible hbase-default.xml or hbase-site.xml on your CLASSPATH)?"); 657 } 658 } 659 660 public static void main(String[] args) throws Exception { 661 // delegate to preserve old behavior 662 HFilePrettyPrinter.main(args); 663 } 664}