001/* 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.io.hfile; 020 021import java.io.Closeable; 022import java.io.DataInput; 023import java.io.IOException; 024import java.net.InetSocketAddress; 025import java.util.ArrayList; 026import java.util.List; 027import java.util.Optional; 028import java.util.concurrent.atomic.LongAdder; 029import org.apache.commons.io.IOUtils; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.fs.FSDataOutputStream; 032import org.apache.hadoop.fs.FileStatus; 033import org.apache.hadoop.fs.FileSystem; 034import org.apache.hadoop.fs.Path; 035import org.apache.hadoop.fs.PathFilter; 036import org.apache.hadoop.hbase.Cell; 037import org.apache.hadoop.hbase.CellComparator; 038import org.apache.hadoop.hbase.HConstants; 039import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 040import org.apache.hadoop.hbase.io.MetricsIO; 041import org.apache.hadoop.hbase.io.MetricsIOWrapperImpl; 042import org.apache.hadoop.hbase.io.compress.Compression; 043import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 044import org.apache.hadoop.hbase.io.hfile.ReaderContext.ReaderType; 045import org.apache.hadoop.hbase.regionserver.CellSink; 046import org.apache.hadoop.hbase.regionserver.ShipperListener; 047import org.apache.hadoop.hbase.util.BloomFilterWriter; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.apache.hadoop.hbase.util.FSUtils; 050import org.apache.hadoop.io.Writable; 051import org.apache.yetus.audience.InterfaceAudience; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 055import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 056 057/** 058 * File format for hbase. 059 * A file of sorted key/value pairs. Both keys and values are byte arrays. 060 * <p> 061 * The memory footprint of a HFile includes the following (below is taken from the 062 * <a 063 * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation 064 * but applies also to HFile): 065 * <ul> 066 * <li>Some constant overhead of reading or writing a compressed block. 067 * <ul> 068 * <li>Each compressed block requires one compression/decompression codec for 069 * I/O. 070 * <li>Temporary space to buffer the key. 071 * <li>Temporary space to buffer the value. 072 * </ul> 073 * <li>HFile index, which is proportional to the total number of Data Blocks. 074 * The total amount of memory needed to hold the index can be estimated as 075 * (56+AvgKeySize)*NumBlocks. 076 * </ul> 077 * Suggestions on performance optimization. 078 * <ul> 079 * <li>Minimum block size. We recommend a setting of minimum block size between 080 * 8KB to 1MB for general usage. Larger block size is preferred if files are 081 * primarily for sequential access. However, it would lead to inefficient random 082 * access (because there are more data to decompress). Smaller blocks are good 083 * for random access, but require more memory to hold the block index, and may 084 * be slower to create (because we must flush the compressor stream at the 085 * conclusion of each data block, which leads to an FS I/O flush). Further, due 086 * to the internal caching in Compression codec, the smallest possible block 087 * size would be around 20KB-30KB. 088 * <li>The current implementation does not offer true multi-threading for 089 * reading. The implementation uses FSDataInputStream seek()+read(), which is 090 * shown to be much faster than positioned-read call in single thread mode. 091 * However, it also means that if multiple threads attempt to access the same 092 * HFile (using multiple scanners) simultaneously, the actual I/O is carried out 093 * sequentially even if they access different DFS blocks (Reexamine! pread seems 094 * to be 10% faster than seek+read in my testing -- stack). 095 * <li>Compression codec. Use "none" if the data is not very compressable (by 096 * compressable, I mean a compression ratio at least 2:1). Generally, use "lzo" 097 * as the starting point for experimenting. "gz" overs slightly better 098 * compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to 099 * decompress, comparing to "lzo". 100 * </ul> 101 * 102 * For more on the background behind HFile, see <a 103 * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>. 104 * <p> 105 * File is made of data blocks followed by meta data blocks (if any), a fileinfo 106 * block, data block index, meta data block index, and a fixed size trailer 107 * which records the offsets at which file changes content type. 108 * <pre><data blocks><meta blocks><fileinfo>< 109 * data index><meta index><trailer></pre> 110 * Each block has a bit of magic at its start. Block are comprised of 111 * key/values. In data blocks, they are both byte arrays. Metadata blocks are 112 * a String key and a byte array value. An empty file looks like this: 113 * <pre><fileinfo><trailer></pre>. That is, there are not data nor meta 114 * blocks present. 115 * <p> 116 * TODO: Do scanners need to be able to take a start and end row? 117 * TODO: Should BlockIndex know the name of its file? Should it have a Path 118 * that points at its file say for the case where an index lives apart from 119 * an HFile instance? 120 */ 121@InterfaceAudience.Private 122public final class HFile { 123 // LOG is being used in HFileBlock and CheckSumUtil 124 static final Logger LOG = LoggerFactory.getLogger(HFile.class); 125 126 /** 127 * Maximum length of key in HFile. 128 */ 129 public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE; 130 131 /** 132 * Default compression: none. 133 */ 134 public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM = 135 Compression.Algorithm.NONE; 136 137 /** Minimum supported HFile format version */ 138 public static final int MIN_FORMAT_VERSION = 2; 139 140 /** Maximum supported HFile format version 141 */ 142 public static final int MAX_FORMAT_VERSION = 3; 143 144 /** 145 * Minimum HFile format version with support for persisting cell tags 146 */ 147 public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3; 148 149 /** Default compression name: none. */ 150 public final static String DEFAULT_COMPRESSION = 151 DEFAULT_COMPRESSION_ALGORITHM.getName(); 152 153 /** Meta data block name for bloom filter bits. */ 154 public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA"; 155 156 /** 157 * We assume that HFile path ends with 158 * ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at least this 159 * many levels of nesting. This is needed for identifying table and CF name 160 * from an HFile path. 161 */ 162 public final static int MIN_NUM_HFILE_PATH_LEVELS = 5; 163 164 /** 165 * The number of bytes per checksum. 166 */ 167 public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024; 168 169 // For measuring number of checksum failures 170 static final LongAdder CHECKSUM_FAILURES = new LongAdder(); 171 172 // For tests. Gets incremented when we read a block whether from HDFS or from Cache. 173 public static final LongAdder DATABLOCK_READ_COUNT = new LongAdder(); 174 175 /** Static instance for the metrics so that HFileReaders access the same instance */ 176 static final MetricsIO metrics = new MetricsIO(new MetricsIOWrapperImpl()); 177 178 /** 179 * Shutdown constructor. 180 */ 181 private HFile() {} 182 183 /** 184 * Number of checksum verification failures. It also 185 * clears the counter. 186 */ 187 public static final long getAndResetChecksumFailuresCount() { 188 return CHECKSUM_FAILURES.sumThenReset(); 189 } 190 191 /** 192 * Number of checksum verification failures. It also 193 * clears the counter. 194 */ 195 public static final long getChecksumFailuresCount() { 196 return CHECKSUM_FAILURES.sum(); 197 } 198 199 public static final void updateReadLatency(long latencyMillis, boolean pread) { 200 if (pread) { 201 metrics.updateFsPreadTime(latencyMillis); 202 } else { 203 metrics.updateFsReadTime(latencyMillis); 204 } 205 } 206 207 public static final void updateWriteLatency(long latencyMillis) { 208 metrics.updateFsWriteTime(latencyMillis); 209 } 210 211 /** API required to write an {@link HFile} */ 212 public interface Writer extends Closeable, CellSink, ShipperListener { 213 /** Max memstore (mvcc) timestamp in FileInfo */ 214 public static final byte [] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY"); 215 216 /** Add an element to the file info map. */ 217 void appendFileInfo(byte[] key, byte[] value) throws IOException; 218 219 /** @return the path to this {@link HFile} */ 220 Path getPath(); 221 222 /** 223 * Adds an inline block writer such as a multi-level block index writer or 224 * a compound Bloom filter writer. 225 */ 226 void addInlineBlockWriter(InlineBlockWriter bloomWriter); 227 228 // The below three methods take Writables. We'd like to undo Writables but undoing the below 229 // would be pretty painful. Could take a byte [] or a Message but we want to be backward 230 // compatible around hfiles so would need to map between Message and Writable or byte [] and 231 // current Writable serialization. This would be a bit of work to little gain. Thats my 232 // thinking at moment. St.Ack 20121129 233 234 void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter); 235 236 /** 237 * Store general Bloom filter in the file. This does not deal with Bloom filter 238 * internals but is necessary, since Bloom filters are stored differently 239 * in HFile version 1 and version 2. 240 */ 241 void addGeneralBloomFilter(BloomFilterWriter bfw); 242 243 /** 244 * Store delete family Bloom filter in the file, which is only supported in 245 * HFile V2. 246 */ 247 void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException; 248 249 /** 250 * Return the file context for the HFile this writer belongs to 251 */ 252 HFileContext getFileContext(); 253 } 254 255 /** 256 * This variety of ways to construct writers is used throughout the code, and 257 * we want to be able to swap writer implementations. 258 */ 259 public static class WriterFactory { 260 protected final Configuration conf; 261 protected final CacheConfig cacheConf; 262 protected FileSystem fs; 263 protected Path path; 264 protected FSDataOutputStream ostream; 265 protected InetSocketAddress[] favoredNodes; 266 private HFileContext fileContext; 267 protected boolean shouldDropBehind = false; 268 269 WriterFactory(Configuration conf, CacheConfig cacheConf) { 270 this.conf = conf; 271 this.cacheConf = cacheConf; 272 } 273 274 public WriterFactory withPath(FileSystem fs, Path path) { 275 Preconditions.checkNotNull(fs); 276 Preconditions.checkNotNull(path); 277 this.fs = fs; 278 this.path = path; 279 return this; 280 } 281 282 public WriterFactory withOutputStream(FSDataOutputStream ostream) { 283 Preconditions.checkNotNull(ostream); 284 this.ostream = ostream; 285 return this; 286 } 287 288 public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) { 289 // Deliberately not checking for null here. 290 this.favoredNodes = favoredNodes; 291 return this; 292 } 293 294 public WriterFactory withFileContext(HFileContext fileContext) { 295 this.fileContext = fileContext; 296 return this; 297 } 298 299 public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) { 300 this.shouldDropBehind = shouldDropBehind; 301 return this; 302 } 303 304 305 public Writer create() throws IOException { 306 if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) { 307 throw new AssertionError("Please specify exactly one of " + 308 "filesystem/path or path"); 309 } 310 if (path != null) { 311 ostream = HFileWriterImpl.createOutputStream(conf, fs, path, favoredNodes); 312 try { 313 ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction()); 314 } catch (UnsupportedOperationException uoe) { 315 LOG.trace("Unable to set drop behind on {}", path, uoe); 316 LOG.debug("Unable to set drop behind on {}", path.getName()); 317 } 318 } 319 return new HFileWriterImpl(conf, cacheConf, path, ostream, fileContext); 320 } 321 } 322 323 /** The configuration key for HFile version to use for new files */ 324 public static final String FORMAT_VERSION_KEY = "hfile.format.version"; 325 326 public static int getFormatVersion(Configuration conf) { 327 int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION); 328 checkFormatVersion(version); 329 return version; 330 } 331 332 /** 333 * Returns the factory to be used to create {@link HFile} writers. 334 * Disables block cache access for all writers created through the 335 * returned factory. 336 */ 337 public static final WriterFactory getWriterFactoryNoCache(Configuration 338 conf) { 339 return HFile.getWriterFactory(conf, CacheConfig.DISABLED); 340 } 341 342 /** 343 * Returns the factory to be used to create {@link HFile} writers 344 */ 345 public static final WriterFactory getWriterFactory(Configuration conf, 346 CacheConfig cacheConf) { 347 int version = getFormatVersion(conf); 348 switch (version) { 349 case 2: 350 throw new IllegalArgumentException("This should never happen. " + 351 "Did you change hfile.format.version to read v2? This version of the software writes v3" + 352 " hfiles only (but it can read v2 files without having to update hfile.format.version " + 353 "in hbase-site.xml)"); 354 case 3: 355 return new HFile.WriterFactory(conf, cacheConf); 356 default: 357 throw new IllegalArgumentException("Cannot create writer for HFile " + 358 "format version " + version); 359 } 360 } 361 362 /** 363 * An abstraction used by the block index. 364 * Implementations will check cache for any asked-for block and return cached block if found. 365 * Otherwise, after reading from fs, will try and put block into cache before returning. 366 */ 367 public interface CachingBlockReader { 368 /** 369 * Read in a file block. 370 * @param offset offset to read. 371 * @param onDiskBlockSize size of the block 372 * @param isCompaction is this block being read as part of a compaction 373 * @param expectedBlockType the block type we are expecting to read with this read operation, 374 * or null to read whatever block type is available and avoid checking (that might reduce 375 * caching efficiency of encoded data blocks) 376 * @param expectedDataBlockEncoding the data block encoding the caller is expecting data blocks 377 * to be in, or null to not perform this check and return the block irrespective of the 378 * encoding. This check only applies to data blocks and can be set to null when the caller is 379 * expecting to read a non-data block and has set expectedBlockType accordingly. 380 * @return Block wrapped in a ByteBuffer. 381 */ 382 HFileBlock readBlock(long offset, long onDiskBlockSize, 383 boolean cacheBlock, final boolean pread, final boolean isCompaction, 384 final boolean updateCacheMetrics, BlockType expectedBlockType, 385 DataBlockEncoding expectedDataBlockEncoding) 386 throws IOException; 387 } 388 389 /** An interface used by clients to open and iterate an {@link HFile}. */ 390 public interface Reader extends Closeable, CachingBlockReader { 391 /** 392 * Returns this reader's "name". Usually the last component of the path. 393 * Needs to be constant as the file is being moved to support caching on 394 * write. 395 */ 396 String getName(); 397 398 CellComparator getComparator(); 399 400 HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction); 401 402 HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException; 403 404 Optional<Cell> getLastKey(); 405 406 Optional<Cell> midKey() throws IOException; 407 408 long length(); 409 410 long getEntries(); 411 412 Optional<Cell> getFirstKey(); 413 414 long indexSize(); 415 416 Optional<byte[]> getFirstRowKey(); 417 418 Optional<byte[]> getLastRowKey(); 419 420 FixedFileTrailer getTrailer(); 421 422 void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader); 423 HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader(); 424 425 void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader); 426 HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader(); 427 428 HFileScanner getScanner(boolean cacheBlocks, boolean pread); 429 430 /** 431 * Retrieves general Bloom filter metadata as appropriate for each 432 * {@link HFile} version. 433 * Knows nothing about how that metadata is structured. 434 */ 435 DataInput getGeneralBloomFilterMetadata() throws IOException; 436 437 /** 438 * Retrieves delete family Bloom filter metadata as appropriate for each 439 * {@link HFile} version. 440 * Knows nothing about how that metadata is structured. 441 */ 442 DataInput getDeleteBloomFilterMetadata() throws IOException; 443 444 Path getPath(); 445 446 /** Close method with optional evictOnClose */ 447 void close(boolean evictOnClose) throws IOException; 448 449 DataBlockEncoding getDataBlockEncoding(); 450 451 boolean hasMVCCInfo(); 452 453 /** 454 * Return the file context of the HFile this reader belongs to 455 */ 456 HFileContext getFileContext(); 457 458 boolean isPrimaryReplicaReader(); 459 460 DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction); 461 462 @VisibleForTesting 463 HFileBlock.FSReader getUncachedBlockReader(); 464 465 @VisibleForTesting 466 boolean prefetchComplete(); 467 468 /** 469 * To close the stream's socket. Note: This can be concurrently called from multiple threads and 470 * implementation should take care of thread safety. 471 */ 472 void unbufferStream(); 473 474 ReaderContext getContext(); 475 HFileInfo getHFileInfo(); 476 void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder); 477 } 478 479 /** 480 * Method returns the reader given the specified arguments. 481 * TODO This is a bad abstraction. See HBASE-6635. 482 * 483 * @param context Reader context info 484 * @param fileInfo HFile info 485 * @param cacheConf Cache configuation values, cannot be null. 486 * @param conf Configuration 487 * @return an appropriate instance of HFileReader 488 * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException 489 */ 490 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SF_SWITCH_FALLTHROUGH", 491 justification="Intentional") 492 public static Reader createReader(ReaderContext context, HFileInfo fileInfo, 493 CacheConfig cacheConf, Configuration conf) throws IOException { 494 try { 495 if (context.getReaderType() == ReaderType.STREAM) { 496 // stream reader will share trailer with pread reader, see HFileStreamReader#copyFields 497 return new HFileStreamReader(context, fileInfo, cacheConf, conf); 498 } 499 FixedFileTrailer trailer = fileInfo.getTrailer(); 500 switch (trailer.getMajorVersion()) { 501 case 2: 502 LOG.debug("Opening HFile v2 with v3 reader"); 503 // Fall through. FindBugs: SF_SWITCH_FALLTHROUGH 504 case 3: 505 return new HFilePreadReader(context, fileInfo, cacheConf, conf); 506 default: 507 throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion()); 508 } 509 } catch (Throwable t) { 510 IOUtils.closeQuietly(context.getInputStreamWrapper()); 511 throw new CorruptHFileException("Problem reading HFile Trailer from file " 512 + context.getFilePath(), t); 513 } finally { 514 context.getInputStreamWrapper().unbuffer(); 515 } 516 } 517 518 /** 519 * Creates reader with cache configuration disabled 520 * @param fs filesystem 521 * @param path Path to file to read 522 * @param conf Configuration 523 * @return an active Reader instance 524 * @throws IOException Will throw a CorruptHFileException 525 * (DoNotRetryIOException subtype) if hfile is corrupt/invalid. 526 */ 527 public static Reader createReader(FileSystem fs, Path path, Configuration conf) 528 throws IOException { 529 // The primaryReplicaReader is mainly used for constructing block cache key, so if we do not use 530 // block cache then it is OK to set it as any value. We use true here. 531 return createReader(fs, path, CacheConfig.DISABLED, true, conf); 532 } 533 534 /** 535 * @param fs filesystem 536 * @param path Path to file to read 537 * @param cacheConf This must not be null. @see 538 * {@link org.apache.hadoop.hbase.io.hfile.CacheConfig#CacheConfig(Configuration)} 539 * @param primaryReplicaReader true if this is a reader for primary replica 540 * @param conf Configuration 541 * @return an active Reader instance 542 * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile 543 * is corrupt/invalid. 544 */ 545 public static Reader createReader(FileSystem fs, Path path, CacheConfig cacheConf, 546 boolean primaryReplicaReader, Configuration conf) throws IOException { 547 Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf"); 548 FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path); 549 ReaderContext context = new ReaderContextBuilder() 550 .withFilePath(path) 551 .withInputStreamWrapper(stream) 552 .withFileSize(fs.getFileStatus(path).getLen()) 553 .withFileSystem(stream.getHfs()) 554 .withPrimaryReplicaReader(primaryReplicaReader) 555 .withReaderType(ReaderType.PREAD) 556 .build(); 557 HFileInfo fileInfo = new HFileInfo(context, conf); 558 Reader reader = createReader(context, fileInfo, cacheConf, conf); 559 fileInfo.initMetaAndIndex(reader); 560 return reader; 561 } 562 563 /** 564 * Returns true if the specified file has a valid HFile Trailer. 565 * @param fs filesystem 566 * @param path Path to file to verify 567 * @return true if the file has a valid HFile Trailer, otherwise false 568 * @throws IOException if failed to read from the underlying stream 569 */ 570 public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException { 571 return isHFileFormat(fs, fs.getFileStatus(path)); 572 } 573 574 /** 575 * Returns true if the specified file has a valid HFile Trailer. 576 * @param fs filesystem 577 * @param fileStatus the file to verify 578 * @return true if the file has a valid HFile Trailer, otherwise false 579 * @throws IOException if failed to read from the underlying stream 580 */ 581 public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus) 582 throws IOException { 583 final Path path = fileStatus.getPath(); 584 final long size = fileStatus.getLen(); 585 try (FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path)) { 586 boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum(); 587 assert !isHBaseChecksum; // Initially we must read with FS checksum. 588 FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size); 589 return true; 590 } catch (IllegalArgumentException e) { 591 return false; 592 } 593 } 594 595 /** 596 * Get names of supported compression algorithms. The names are acceptable by 597 * HFile.Writer. 598 * 599 * @return Array of strings, each represents a supported compression 600 * algorithm. Currently, the following compression algorithms are 601 * supported. 602 * <ul> 603 * <li>"none" - No compression. 604 * <li>"gz" - GZIP compression. 605 * </ul> 606 */ 607 public static String[] getSupportedCompressionAlgorithms() { 608 return Compression.getSupportedAlgorithms(); 609 } 610 611 // Utility methods. 612 /* 613 * @param l Long to convert to an int. 614 * @return <code>l</code> cast as an int. 615 */ 616 static int longToInt(final long l) { 617 // Expecting the size() of a block not exceeding 4GB. Assuming the 618 // size() will wrap to negative integer if it exceeds 2GB (From tfile). 619 return (int)(l & 0x00000000ffffffffL); 620 } 621 622 /** 623 * Returns all HFiles belonging to the given region directory. Could return an 624 * empty list. 625 * 626 * @param fs The file system reference. 627 * @param regionDir The region directory to scan. 628 * @return The list of files found. 629 * @throws IOException When scanning the files fails. 630 */ 631 public static List<Path> getStoreFiles(FileSystem fs, Path regionDir) 632 throws IOException { 633 List<Path> regionHFiles = new ArrayList<>(); 634 PathFilter dirFilter = new FSUtils.DirFilter(fs); 635 FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter); 636 for(FileStatus dir : familyDirs) { 637 FileStatus[] files = fs.listStatus(dir.getPath()); 638 for (FileStatus file : files) { 639 if (!file.isDirectory() && 640 (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) && 641 (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))) { 642 regionHFiles.add(file.getPath()); 643 } 644 } 645 } 646 return regionHFiles; 647 } 648 649 /** 650 * Checks the given {@link HFile} format version, and throws an exception if 651 * invalid. Note that if the version number comes from an input file and has 652 * not been verified, the caller needs to re-throw an {@link IOException} to 653 * indicate that this is not a software error, but corrupted input. 654 * 655 * @param version an HFile version 656 * @throws IllegalArgumentException if the version is invalid 657 */ 658 public static void checkFormatVersion(int version) 659 throws IllegalArgumentException { 660 if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) { 661 throw new IllegalArgumentException("Invalid HFile version: " + version 662 + " (expected to be " + "between " + MIN_FORMAT_VERSION + " and " 663 + MAX_FORMAT_VERSION + ")"); 664 } 665 } 666 667 668 public static void checkHFileVersion(final Configuration c) { 669 int version = c.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION); 670 if (version < MAX_FORMAT_VERSION || version > MAX_FORMAT_VERSION) { 671 throw new IllegalArgumentException("The setting for " + FORMAT_VERSION_KEY + 672 " (in your hbase-*.xml files) is " + version + " which does not match " + 673 MAX_FORMAT_VERSION + 674 "; are you running with a configuration from an older or newer hbase install (an " + 675 "incompatible hbase-default.xml or hbase-site.xml on your CLASSPATH)?"); 676 } 677 } 678 679 public static void main(String[] args) throws Exception { 680 // delegate to preserve old behavior 681 HFilePrettyPrinter.main(args); 682 } 683}