001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import java.io.Closeable; 021import java.io.DataInput; 022import java.io.IOException; 023import java.net.InetSocketAddress; 024import java.util.ArrayList; 025import java.util.List; 026import java.util.Optional; 027import java.util.concurrent.atomic.LongAdder; 028import org.apache.commons.io.IOUtils; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FSDataOutputStream; 031import org.apache.hadoop.fs.FileStatus; 032import org.apache.hadoop.fs.FileSystem; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.fs.PathFilter; 035import org.apache.hadoop.hbase.Cell; 036import org.apache.hadoop.hbase.CellComparator; 037import org.apache.hadoop.hbase.HConstants; 038import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 039import org.apache.hadoop.hbase.io.MetricsIO; 040import org.apache.hadoop.hbase.io.MetricsIOWrapperImpl; 041import org.apache.hadoop.hbase.io.compress.Compression; 042import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 043import org.apache.hadoop.hbase.io.hfile.ReaderContext.ReaderType; 044import org.apache.hadoop.hbase.regionserver.CellSink; 045import org.apache.hadoop.hbase.regionserver.ShipperListener; 046import org.apache.hadoop.hbase.util.BloomFilterWriter; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.FSUtils; 049import org.apache.hadoop.io.Writable; 050import org.apache.yetus.audience.InterfaceAudience; 051import org.slf4j.Logger; 052import org.slf4j.LoggerFactory; 053 054import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 055 056/** 057 * File format for hbase. A file of sorted key/value pairs. Both keys and values are byte arrays. 058 * <p> 059 * The memory footprint of a HFile includes the following (below is taken from the <a 060 * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation but applies also 061 * to HFile): 062 * <ul> 063 * <li>Some constant overhead of reading or writing a compressed block. 064 * <ul> 065 * <li>Each compressed block requires one compression/decompression codec for I/O. 066 * <li>Temporary space to buffer the key. 067 * <li>Temporary space to buffer the value. 068 * </ul> 069 * <li>HFile index, which is proportional to the total number of Data Blocks. The total amount of 070 * memory needed to hold the index can be estimated as (56+AvgKeySize)*NumBlocks. 071 * </ul> 072 * Suggestions on performance optimization. 073 * <ul> 074 * <li>Minimum block size. We recommend a setting of minimum block size between 8KB to 1MB for 075 * general usage. Larger block size is preferred if files are primarily for sequential access. 076 * However, it would lead to inefficient random access (because there are more data to decompress). 077 * Smaller blocks are good for random access, but require more memory to hold the block index, and 078 * may be slower to create (because we must flush the compressor stream at the conclusion of each 079 * data block, which leads to an FS I/O flush). Further, due to the internal caching in Compression 080 * codec, the smallest possible block size would be around 20KB-30KB. 081 * <li>The current implementation does not offer true multi-threading for reading. The 082 * implementation uses FSDataInputStream seek()+read(), which is shown to be much faster than 083 * positioned-read call in single thread mode. However, it also means that if multiple threads 084 * attempt to access the same HFile (using multiple scanners) simultaneously, the actual I/O is 085 * carried out sequentially even if they access different DFS blocks (Reexamine! pread seems to be 086 * 10% faster than seek+read in my testing -- stack). 087 * <li>Compression codec. Use "none" if the data is not very compressable (by compressable, I mean a 088 * compression ratio at least 2:1). Generally, use "lzo" as the starting point for experimenting. 089 * "gz" overs slightly better compression ratio over "lzo" but requires 4x CPU to compress and 2x 090 * CPU to decompress, comparing to "lzo". 091 * </ul> 092 * For more on the background behind HFile, see <a 093 * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>. 094 * <p> 095 * File is made of data blocks followed by meta data blocks (if any), a fileinfo block, data block 096 * index, meta data block index, and a fixed size trailer which records the offsets at which file 097 * changes content type. 098 * 099 * <pre> 100 * <data blocks><meta blocks><fileinfo>< 101 * data index><meta index><trailer> 102 * </pre> 103 * 104 * Each block has a bit of magic at its start. Block are comprised of key/values. In data blocks, 105 * they are both byte arrays. Metadata blocks are a String key and a byte array value. An empty file 106 * looks like this: 107 * 108 * <pre> 109 * <fileinfo><trailer> 110 * </pre> 111 * 112 * . That is, there are not data nor meta blocks present. 113 * <p> 114 * TODO: Do scanners need to be able to take a start and end row? TODO: Should BlockIndex know the 115 * name of its file? Should it have a Path that points at its file say for the case where an index 116 * lives apart from an HFile instance? 117 */ 118@InterfaceAudience.Private 119public final class HFile { 120 // LOG is being used in HFileBlock and CheckSumUtil 121 static final Logger LOG = LoggerFactory.getLogger(HFile.class); 122 123 /** 124 * Maximum length of key in HFile. 125 */ 126 public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE; 127 128 /** 129 * Default compression: none. 130 */ 131 public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM = 132 Compression.Algorithm.NONE; 133 134 /** Minimum supported HFile format version */ 135 public static final int MIN_FORMAT_VERSION = 2; 136 137 /** 138 * Maximum supported HFile format version 139 */ 140 public static final int MAX_FORMAT_VERSION = 3; 141 142 /** 143 * Minimum HFile format version with support for persisting cell tags 144 */ 145 public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3; 146 147 /** Default compression name: none. */ 148 public final static String DEFAULT_COMPRESSION = DEFAULT_COMPRESSION_ALGORITHM.getName(); 149 150 /** Meta data block name for bloom filter bits. */ 151 public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA"; 152 153 /** 154 * We assume that HFile path ends with ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at 155 * least this many levels of nesting. This is needed for identifying table and CF name from an 156 * HFile path. 157 */ 158 public final static int MIN_NUM_HFILE_PATH_LEVELS = 5; 159 160 /** 161 * The number of bytes per checksum. 162 */ 163 public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024; 164 165 // For measuring number of checksum failures 166 static final LongAdder CHECKSUM_FAILURES = new LongAdder(); 167 168 // For tests. Gets incremented when we read a block whether from HDFS or from Cache. 169 public static final LongAdder DATABLOCK_READ_COUNT = new LongAdder(); 170 171 /** Static instance for the metrics so that HFileReaders access the same instance */ 172 static final MetricsIO metrics = new MetricsIO(new MetricsIOWrapperImpl()); 173 174 /** 175 * Shutdown constructor. 176 */ 177 private HFile() { 178 } 179 180 /** 181 * Number of checksum verification failures. It also clears the counter. 182 */ 183 public static final long getAndResetChecksumFailuresCount() { 184 return CHECKSUM_FAILURES.sumThenReset(); 185 } 186 187 /** 188 * Number of checksum verification failures. It also clears the counter. 189 */ 190 public static final long getChecksumFailuresCount() { 191 return CHECKSUM_FAILURES.sum(); 192 } 193 194 public static final void updateReadLatency(long latencyMillis, boolean pread) { 195 if (pread) { 196 metrics.updateFsPreadTime(latencyMillis); 197 } else { 198 metrics.updateFsReadTime(latencyMillis); 199 } 200 } 201 202 public static final void updateWriteLatency(long latencyMillis) { 203 metrics.updateFsWriteTime(latencyMillis); 204 } 205 206 /** API required to write an {@link HFile} */ 207 public interface Writer extends Closeable, CellSink, ShipperListener { 208 /** Max memstore (mvcc) timestamp in FileInfo */ 209 public static final byte[] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY"); 210 211 /** Add an element to the file info map. */ 212 void appendFileInfo(byte[] key, byte[] value) throws IOException; 213 214 /** Returns the path to this {@link HFile} */ 215 Path getPath(); 216 217 /** 218 * Adds an inline block writer such as a multi-level block index writer or a compound Bloom 219 * filter writer. 220 */ 221 void addInlineBlockWriter(InlineBlockWriter bloomWriter); 222 223 // The below three methods take Writables. We'd like to undo Writables but undoing the below 224 // would be pretty painful. Could take a byte [] or a Message but we want to be backward 225 // compatible around hfiles so would need to map between Message and Writable or byte [] and 226 // current Writable serialization. This would be a bit of work to little gain. Thats my 227 // thinking at moment. St.Ack 20121129 228 229 void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter); 230 231 /** 232 * Store general Bloom filter in the file. This does not deal with Bloom filter internals but is 233 * necessary, since Bloom filters are stored differently in HFile version 1 and version 2. 234 */ 235 void addGeneralBloomFilter(BloomFilterWriter bfw); 236 237 /** 238 * Store delete family Bloom filter in the file, which is only supported in HFile V2. 239 */ 240 void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException; 241 242 /** 243 * Return the file context for the HFile this writer belongs to 244 */ 245 HFileContext getFileContext(); 246 } 247 248 /** 249 * This variety of ways to construct writers is used throughout the code, and we want to be able 250 * to swap writer implementations. 251 */ 252 public static class WriterFactory { 253 protected final Configuration conf; 254 protected final CacheConfig cacheConf; 255 protected FileSystem fs; 256 protected Path path; 257 protected FSDataOutputStream ostream; 258 protected InetSocketAddress[] favoredNodes; 259 private HFileContext fileContext; 260 protected boolean shouldDropBehind = false; 261 262 WriterFactory(Configuration conf, CacheConfig cacheConf) { 263 this.conf = conf; 264 this.cacheConf = cacheConf; 265 } 266 267 public WriterFactory withPath(FileSystem fs, Path path) { 268 Preconditions.checkNotNull(fs); 269 Preconditions.checkNotNull(path); 270 this.fs = fs; 271 this.path = path; 272 return this; 273 } 274 275 public WriterFactory withOutputStream(FSDataOutputStream ostream) { 276 Preconditions.checkNotNull(ostream); 277 this.ostream = ostream; 278 return this; 279 } 280 281 public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) { 282 // Deliberately not checking for null here. 283 this.favoredNodes = favoredNodes; 284 return this; 285 } 286 287 public WriterFactory withFileContext(HFileContext fileContext) { 288 this.fileContext = fileContext; 289 return this; 290 } 291 292 public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) { 293 this.shouldDropBehind = shouldDropBehind; 294 return this; 295 } 296 297 public Writer create() throws IOException { 298 if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) { 299 throw new AssertionError("Please specify exactly one of " + "filesystem/path or path"); 300 } 301 if (path != null) { 302 ostream = HFileWriterImpl.createOutputStream(conf, fs, path, favoredNodes); 303 try { 304 ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction()); 305 } catch (UnsupportedOperationException uoe) { 306 LOG.trace("Unable to set drop behind on {}", path, uoe); 307 LOG.debug("Unable to set drop behind on {}", path.getName()); 308 } 309 } 310 return new HFileWriterImpl(conf, cacheConf, path, ostream, fileContext); 311 } 312 } 313 314 /** The configuration key for HFile version to use for new files */ 315 public static final String FORMAT_VERSION_KEY = "hfile.format.version"; 316 317 public static int getFormatVersion(Configuration conf) { 318 int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION); 319 checkFormatVersion(version); 320 return version; 321 } 322 323 /** 324 * Returns the factory to be used to create {@link HFile} writers. Disables block cache access for 325 * all writers created through the returned factory. 326 */ 327 public static final WriterFactory getWriterFactoryNoCache(Configuration conf) { 328 return HFile.getWriterFactory(conf, CacheConfig.DISABLED); 329 } 330 331 /** 332 * Returns the factory to be used to create {@link HFile} writers 333 */ 334 public static final WriterFactory getWriterFactory(Configuration conf, CacheConfig cacheConf) { 335 int version = getFormatVersion(conf); 336 switch (version) { 337 case 2: 338 throw new IllegalArgumentException("This should never happen. " 339 + "Did you change hfile.format.version to read v2? This version of the software writes v3" 340 + " hfiles only (but it can read v2 files without having to update hfile.format.version " 341 + "in hbase-site.xml)"); 342 case 3: 343 return new HFile.WriterFactory(conf, cacheConf); 344 default: 345 throw new IllegalArgumentException( 346 "Cannot create writer for HFile " + "format version " + version); 347 } 348 } 349 350 /** 351 * An abstraction used by the block index. Implementations will check cache for any asked-for 352 * block and return cached block if found. Otherwise, after reading from fs, will try and put 353 * block into cache before returning. 354 */ 355 public interface CachingBlockReader { 356 /** 357 * Read in a file block. 358 * @param offset offset to read. 359 * @param onDiskBlockSize size of the block 360 * @param isCompaction is this block being read as part of a compaction 361 * @param expectedBlockType the block type we are expecting to read with this read 362 * operation, or null to read whatever block type is available 363 * and avoid checking (that might reduce caching efficiency of 364 * encoded data blocks) 365 * @param expectedDataBlockEncoding the data block encoding the caller is expecting data blocks 366 * to be in, or null to not perform this check and return the 367 * block irrespective of the encoding. This check only applies 368 * to data blocks and can be set to null when the caller is 369 * expecting to read a non-data block and has set 370 * expectedBlockType accordingly. 371 * @return Block wrapped in a ByteBuffer. 372 */ 373 HFileBlock readBlock(long offset, long onDiskBlockSize, boolean cacheBlock, final boolean pread, 374 final boolean isCompaction, final boolean updateCacheMetrics, BlockType expectedBlockType, 375 DataBlockEncoding expectedDataBlockEncoding) throws IOException; 376 } 377 378 /** An interface used by clients to open and iterate an {@link HFile}. */ 379 public interface Reader extends Closeable, CachingBlockReader { 380 /** 381 * Returns this reader's "name". Usually the last component of the path. Needs to be constant as 382 * the file is being moved to support caching on write. 383 */ 384 String getName(); 385 386 CellComparator getComparator(); 387 388 HFileScanner getScanner(Configuration conf, boolean cacheBlocks, boolean pread, 389 boolean isCompaction); 390 391 HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException; 392 393 Optional<Cell> getLastKey(); 394 395 Optional<Cell> midKey() throws IOException; 396 397 long length(); 398 399 long getEntries(); 400 401 Optional<Cell> getFirstKey(); 402 403 long indexSize(); 404 405 Optional<byte[]> getFirstRowKey(); 406 407 Optional<byte[]> getLastRowKey(); 408 409 FixedFileTrailer getTrailer(); 410 411 void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader); 412 413 HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader(); 414 415 void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader); 416 417 HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader(); 418 419 HFileScanner getScanner(Configuration conf, boolean cacheBlocks, boolean pread); 420 421 /** 422 * Retrieves general Bloom filter metadata as appropriate for each {@link HFile} version. Knows 423 * nothing about how that metadata is structured. 424 */ 425 DataInput getGeneralBloomFilterMetadata() throws IOException; 426 427 /** 428 * Retrieves delete family Bloom filter metadata as appropriate for each {@link HFile} version. 429 * Knows nothing about how that metadata is structured. 430 */ 431 DataInput getDeleteBloomFilterMetadata() throws IOException; 432 433 Path getPath(); 434 435 /** Close method with optional evictOnClose */ 436 void close(boolean evictOnClose) throws IOException; 437 438 DataBlockEncoding getDataBlockEncoding(); 439 440 boolean hasMVCCInfo(); 441 442 /** 443 * Return the file context of the HFile this reader belongs to 444 */ 445 HFileContext getFileContext(); 446 447 boolean isPrimaryReplicaReader(); 448 449 DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction); 450 451 HFileBlock.FSReader getUncachedBlockReader(); 452 453 boolean prefetchComplete(); 454 455 /** 456 * To close the stream's socket. Note: This can be concurrently called from multiple threads and 457 * implementation should take care of thread safety. 458 */ 459 void unbufferStream(); 460 461 ReaderContext getContext(); 462 463 HFileInfo getHFileInfo(); 464 465 void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder); 466 } 467 468 /** 469 * Method returns the reader given the specified arguments. TODO This is a bad abstraction. See 470 * HBASE-6635. 471 * @param context Reader context info 472 * @param fileInfo HFile info 473 * @param cacheConf Cache configuation values, cannot be null. 474 * @param conf Configuration 475 * @return an appropriate instance of HFileReader 476 * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException 477 */ 478 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SF_SWITCH_FALLTHROUGH", 479 justification = "Intentional") 480 public static Reader createReader(ReaderContext context, HFileInfo fileInfo, 481 CacheConfig cacheConf, Configuration conf) throws IOException { 482 try { 483 if (context.getReaderType() == ReaderType.STREAM) { 484 // stream reader will share trailer with pread reader, see HFileStreamReader#copyFields 485 return new HFileStreamReader(context, fileInfo, cacheConf, conf); 486 } 487 FixedFileTrailer trailer = fileInfo.getTrailer(); 488 switch (trailer.getMajorVersion()) { 489 case 2: 490 LOG.debug("Opening HFile v2 with v3 reader"); 491 // Fall through. FindBugs: SF_SWITCH_FALLTHROUGH 492 case 3: 493 return new HFilePreadReader(context, fileInfo, cacheConf, conf); 494 default: 495 throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion()); 496 } 497 } catch (Throwable t) { 498 IOUtils.closeQuietly(context.getInputStreamWrapper(), 499 e -> LOG.warn("failed to close input stream wrapper", e)); 500 throw new CorruptHFileException( 501 "Problem reading HFile Trailer from file " + context.getFilePath(), t); 502 } finally { 503 context.getInputStreamWrapper().unbuffer(); 504 } 505 } 506 507 /** 508 * Creates reader with cache configuration disabled 509 * @param fs filesystem 510 * @param path Path to file to read 511 * @param conf Configuration 512 * @return an active Reader instance 513 * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile 514 * is corrupt/invalid. 515 */ 516 public static Reader createReader(FileSystem fs, Path path, Configuration conf) 517 throws IOException { 518 // The primaryReplicaReader is mainly used for constructing block cache key, so if we do not use 519 // block cache then it is OK to set it as any value. We use true here. 520 return createReader(fs, path, CacheConfig.DISABLED, true, conf); 521 } 522 523 /** 524 * @param fs filesystem 525 * @param path Path to file to read 526 * @param cacheConf This must not be null. 527 * @param primaryReplicaReader true if this is a reader for primary replica 528 * @param conf Configuration 529 * @return an active Reader instance 530 * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile 531 * is corrupt/invalid. 532 * @see CacheConfig#CacheConfig(Configuration) 533 */ 534 public static Reader createReader(FileSystem fs, Path path, CacheConfig cacheConf, 535 boolean primaryReplicaReader, Configuration conf) throws IOException { 536 Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf"); 537 FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path); 538 ReaderContext context = 539 new ReaderContextBuilder().withFilePath(path).withInputStreamWrapper(stream) 540 .withFileSize(fs.getFileStatus(path).getLen()).withFileSystem(stream.getHfs()) 541 .withPrimaryReplicaReader(primaryReplicaReader).withReaderType(ReaderType.PREAD).build(); 542 HFileInfo fileInfo = new HFileInfo(context, conf); 543 Reader reader = createReader(context, fileInfo, cacheConf, conf); 544 fileInfo.initMetaAndIndex(reader); 545 return reader; 546 } 547 548 /** 549 * Returns true if the specified file has a valid HFile Trailer. 550 * @param fs filesystem 551 * @param path Path to file to verify 552 * @return true if the file has a valid HFile Trailer, otherwise false 553 * @throws IOException if failed to read from the underlying stream 554 */ 555 public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException { 556 return isHFileFormat(fs, fs.getFileStatus(path)); 557 } 558 559 /** 560 * Returns true if the specified file has a valid HFile Trailer. 561 * @param fs filesystem 562 * @param fileStatus the file to verify 563 * @return true if the file has a valid HFile Trailer, otherwise false 564 * @throws IOException if failed to read from the underlying stream 565 */ 566 public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus) 567 throws IOException { 568 final Path path = fileStatus.getPath(); 569 final long size = fileStatus.getLen(); 570 try (FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path)) { 571 boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum(); 572 assert !isHBaseChecksum; // Initially we must read with FS checksum. 573 FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size); 574 return true; 575 } catch (IllegalArgumentException e) { 576 return false; 577 } 578 } 579 580 /** 581 * Get names of supported compression algorithms. The names are acceptable by HFile.Writer. 582 * @return Array of strings, each represents a supported compression algorithm. Currently, the 583 * following compression algorithms are supported. 584 * <ul> 585 * <li>"none" - No compression. 586 * <li>"gz" - GZIP compression. 587 * </ul> 588 */ 589 public static String[] getSupportedCompressionAlgorithms() { 590 return Compression.getSupportedAlgorithms(); 591 } 592 593 // Utility methods. 594 /* 595 * @param l Long to convert to an int. 596 * @return <code>l</code> cast as an int. 597 */ 598 static int longToInt(final long l) { 599 // Expecting the size() of a block not exceeding 4GB. Assuming the 600 // size() will wrap to negative integer if it exceeds 2GB (From tfile). 601 return (int) (l & 0x00000000ffffffffL); 602 } 603 604 /** 605 * Returns all HFiles belonging to the given region directory. Could return an empty list. 606 * @param fs The file system reference. 607 * @param regionDir The region directory to scan. 608 * @return The list of files found. 609 * @throws IOException When scanning the files fails. 610 */ 611 public static List<Path> getStoreFiles(FileSystem fs, Path regionDir) throws IOException { 612 List<Path> regionHFiles = new ArrayList<>(); 613 PathFilter dirFilter = new FSUtils.DirFilter(fs); 614 FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter); 615 for (FileStatus dir : familyDirs) { 616 FileStatus[] files = fs.listStatus(dir.getPath()); 617 for (FileStatus file : files) { 618 if ( 619 !file.isDirectory() 620 && (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) 621 && (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR)) 622 ) { 623 regionHFiles.add(file.getPath()); 624 } 625 } 626 } 627 return regionHFiles; 628 } 629 630 /** 631 * Checks the given {@link HFile} format version, and throws an exception if invalid. Note that if 632 * the version number comes from an input file and has not been verified, the caller needs to 633 * re-throw an {@link IOException} to indicate that this is not a software error, but corrupted 634 * input. 635 * @param version an HFile version 636 * @throws IllegalArgumentException if the version is invalid 637 */ 638 public static void checkFormatVersion(int version) throws IllegalArgumentException { 639 if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) { 640 throw new IllegalArgumentException("Invalid HFile version: " + version + " (expected to be " 641 + "between " + MIN_FORMAT_VERSION + " and " + MAX_FORMAT_VERSION + ")"); 642 } 643 } 644 645 public static void checkHFileVersion(final Configuration c) { 646 int version = c.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION); 647 if (version < MAX_FORMAT_VERSION || version > MAX_FORMAT_VERSION) { 648 throw new IllegalArgumentException( 649 "The setting for " + FORMAT_VERSION_KEY + " (in your hbase-*.xml files) is " + version 650 + " which does not match " + MAX_FORMAT_VERSION 651 + "; are you running with a configuration from an older or newer hbase install (an " 652 + "incompatible hbase-default.xml or hbase-site.xml on your CLASSPATH)?"); 653 } 654 } 655 656 public static void main(String[] args) throws Exception { 657 // delegate to preserve old behavior 658 HFilePrettyPrinter.main(args); 659 } 660}