001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import java.io.Closeable; 021import java.io.DataInput; 022import java.io.IOException; 023import java.net.InetSocketAddress; 024import java.util.ArrayList; 025import java.util.List; 026import java.util.Optional; 027import java.util.concurrent.atomic.LongAdder; 028import org.apache.commons.io.IOUtils; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FSDataOutputStream; 031import org.apache.hadoop.fs.FileStatus; 032import org.apache.hadoop.fs.FileSystem; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.fs.PathFilter; 035import org.apache.hadoop.hbase.CellComparator; 036import org.apache.hadoop.hbase.ExtendedCell; 037import org.apache.hadoop.hbase.HConstants; 038import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 039import org.apache.hadoop.hbase.io.MetricsIO; 040import org.apache.hadoop.hbase.io.compress.Compression; 041import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 042import org.apache.hadoop.hbase.io.hfile.ReaderContext.ReaderType; 043import org.apache.hadoop.hbase.ipc.RpcServer; 044import org.apache.hadoop.hbase.regionserver.CellSink; 045import org.apache.hadoop.hbase.regionserver.ShipperListener; 046import org.apache.hadoop.hbase.util.BloomFilterWriter; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.FSUtils; 049import org.apache.hadoop.io.Writable; 050import org.apache.yetus.audience.InterfaceAudience; 051import org.slf4j.Logger; 052import org.slf4j.LoggerFactory; 053 054import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 055 056/** 057 * File format for hbase. A file of sorted key/value pairs. Both keys and values are byte arrays. 058 * <p> 059 * The memory footprint of a HFile includes the following (below is taken from the <a 060 * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation but applies also 061 * to HFile): 062 * <ul> 063 * <li>Some constant overhead of reading or writing a compressed block. 064 * <ul> 065 * <li>Each compressed block requires one compression/decompression codec for I/O. 066 * <li>Temporary space to buffer the key. 067 * <li>Temporary space to buffer the value. 068 * </ul> 069 * <li>HFile index, which is proportional to the total number of Data Blocks. The total amount of 070 * memory needed to hold the index can be estimated as (56+AvgKeySize)*NumBlocks. 071 * </ul> 072 * Suggestions on performance optimization. 073 * <ul> 074 * <li>Minimum block size. We recommend a setting of minimum block size between 8KB to 1MB for 075 * general usage. Larger block size is preferred if files are primarily for sequential access. 076 * However, it would lead to inefficient random access (because there are more data to decompress). 077 * Smaller blocks are good for random access, but require more memory to hold the block index, and 078 * may be slower to create (because we must flush the compressor stream at the conclusion of each 079 * data block, which leads to an FS I/O flush). Further, due to the internal caching in Compression 080 * codec, the smallest possible block size would be around 20KB-30KB. 081 * <li>The current implementation does not offer true multi-threading for reading. The 082 * implementation uses FSDataInputStream seek()+read(), which is shown to be much faster than 083 * positioned-read call in single thread mode. However, it also means that if multiple threads 084 * attempt to access the same HFile (using multiple scanners) simultaneously, the actual I/O is 085 * carried out sequentially even if they access different DFS blocks (Reexamine! pread seems to be 086 * 10% faster than seek+read in my testing -- stack). 087 * <li>Compression codec. Use "none" if the data is not very compressable (by compressable, I mean a 088 * compression ratio at least 2:1). Generally, use "lzo" as the starting point for experimenting. 089 * "gz" overs slightly better compression ratio over "lzo" but requires 4x CPU to compress and 2x 090 * CPU to decompress, comparing to "lzo". 091 * </ul> 092 * For more on the background behind HFile, see <a 093 * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>. 094 * <p> 095 * File is made of data blocks followed by meta data blocks (if any), a fileinfo block, data block 096 * index, meta data block index, and a fixed size trailer which records the offsets at which file 097 * changes content type. 098 * 099 * <pre> 100 * <data blocks><meta blocks><fileinfo>< 101 * data index><meta index><trailer> 102 * </pre> 103 * 104 * Each block has a bit of magic at its start. Block are comprised of key/values. In data blocks, 105 * they are both byte arrays. Metadata blocks are a String key and a byte array value. An empty file 106 * looks like this: 107 * 108 * <pre> 109 * <fileinfo><trailer> 110 * </pre> 111 * 112 * . That is, there are not data nor meta blocks present. 113 * <p> 114 * TODO: Do scanners need to be able to take a start and end row? TODO: Should BlockIndex know the 115 * name of its file? Should it have a Path that points at its file say for the case where an index 116 * lives apart from an HFile instance? 117 */ 118@InterfaceAudience.Private 119public final class HFile { 120 // LOG is being used in HFileBlock and CheckSumUtil 121 static final Logger LOG = LoggerFactory.getLogger(HFile.class); 122 123 /** 124 * Maximum length of key in HFile. 125 */ 126 public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE; 127 128 /** 129 * Default compression: none. 130 */ 131 public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM = 132 Compression.Algorithm.NONE; 133 134 /** Minimum supported HFile format version */ 135 public static final int MIN_FORMAT_VERSION = 2; 136 137 /** 138 * Maximum supported HFile format version 139 */ 140 public static final int MAX_FORMAT_VERSION = 3; 141 142 /** 143 * Minimum HFile format version with support for persisting cell tags 144 */ 145 public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3; 146 147 /** Default compression name: none. */ 148 public final static String DEFAULT_COMPRESSION = DEFAULT_COMPRESSION_ALGORITHM.getName(); 149 150 /** Meta data block name for bloom filter bits. */ 151 public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA"; 152 153 /** 154 * We assume that HFile path ends with ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at 155 * least this many levels of nesting. This is needed for identifying table and CF name from an 156 * HFile path. 157 */ 158 public final static int MIN_NUM_HFILE_PATH_LEVELS = 5; 159 160 /** 161 * The number of bytes per checksum. 162 */ 163 public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024; 164 165 // For measuring number of checksum failures 166 static final LongAdder CHECKSUM_FAILURES = new LongAdder(); 167 168 // For tests. Gets incremented when we read a block whether from HDFS or from Cache. 169 public static final LongAdder DATABLOCK_READ_COUNT = new LongAdder(); 170 171 /** 172 * Shutdown constructor. 173 */ 174 private HFile() { 175 } 176 177 /** 178 * Number of checksum verification failures. It also clears the counter. 179 */ 180 public static final long getAndResetChecksumFailuresCount() { 181 return CHECKSUM_FAILURES.sumThenReset(); 182 } 183 184 /** 185 * Number of checksum verification failures. It also clears the counter. 186 */ 187 public static final long getChecksumFailuresCount() { 188 return CHECKSUM_FAILURES.sum(); 189 } 190 191 public static final void updateReadLatency(long latencyMillis, boolean pread, boolean tooSlow) { 192 RpcServer.getCurrentCall().ifPresent(call -> call.updateFsReadTime(latencyMillis)); 193 if (pread) { 194 MetricsIO.getInstance().updateFsPreadTime(latencyMillis); 195 } else { 196 MetricsIO.getInstance().updateFsReadTime(latencyMillis); 197 } 198 if (tooSlow) { 199 MetricsIO.getInstance().incrSlowFsRead(); 200 } 201 } 202 203 public static final void updateWriteLatency(long latencyMillis) { 204 MetricsIO.getInstance().updateFsWriteTime(latencyMillis); 205 } 206 207 /** API required to write an {@link HFile} */ 208 public interface Writer extends Closeable, CellSink, ShipperListener { 209 /** Max memstore (mvcc) timestamp in FileInfo */ 210 public static final byte[] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY"); 211 212 /** Add an element to the file info map. */ 213 void appendFileInfo(byte[] key, byte[] value) throws IOException; 214 215 /** Returns the path to this {@link HFile} */ 216 Path getPath(); 217 218 /** 219 * Adds an inline block writer such as a multi-level block index writer or a compound Bloom 220 * filter writer. 221 */ 222 void addInlineBlockWriter(InlineBlockWriter bloomWriter); 223 224 // The below three methods take Writables. We'd like to undo Writables but undoing the below 225 // would be pretty painful. Could take a byte [] or a Message but we want to be backward 226 // compatible around hfiles so would need to map between Message and Writable or byte [] and 227 // current Writable serialization. This would be a bit of work to little gain. Thats my 228 // thinking at moment. St.Ack 20121129 229 230 void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter); 231 232 /** 233 * Store general Bloom filter in the file. This does not deal with Bloom filter internals but is 234 * necessary, since Bloom filters are stored differently in HFile version 1 and version 2. 235 */ 236 void addGeneralBloomFilter(BloomFilterWriter bfw); 237 238 /** 239 * Store delete family Bloom filter in the file, which is only supported in HFile V2. 240 */ 241 void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException; 242 243 /** 244 * Return the file context for the HFile this writer belongs to 245 */ 246 HFileContext getFileContext(); 247 } 248 249 /** 250 * This variety of ways to construct writers is used throughout the code, and we want to be able 251 * to swap writer implementations. 252 */ 253 public static class WriterFactory { 254 protected final Configuration conf; 255 protected final CacheConfig cacheConf; 256 protected FileSystem fs; 257 protected Path path; 258 protected FSDataOutputStream ostream; 259 protected InetSocketAddress[] favoredNodes; 260 private HFileContext fileContext; 261 protected boolean shouldDropBehind = false; 262 263 WriterFactory(Configuration conf, CacheConfig cacheConf) { 264 this.conf = conf; 265 this.cacheConf = cacheConf; 266 } 267 268 public WriterFactory withPath(FileSystem fs, Path path) { 269 Preconditions.checkNotNull(fs); 270 Preconditions.checkNotNull(path); 271 this.fs = fs; 272 this.path = path; 273 return this; 274 } 275 276 public WriterFactory withOutputStream(FSDataOutputStream ostream) { 277 Preconditions.checkNotNull(ostream); 278 this.ostream = ostream; 279 return this; 280 } 281 282 public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) { 283 // Deliberately not checking for null here. 284 this.favoredNodes = favoredNodes; 285 return this; 286 } 287 288 public WriterFactory withFileContext(HFileContext fileContext) { 289 this.fileContext = fileContext; 290 return this; 291 } 292 293 public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) { 294 this.shouldDropBehind = shouldDropBehind; 295 return this; 296 } 297 298 public Writer create() throws IOException { 299 if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) { 300 throw new AssertionError("Please specify exactly one of " + "filesystem/path or path"); 301 } 302 if (path != null) { 303 ostream = HFileWriterImpl.createOutputStream(conf, fs, path, favoredNodes); 304 try { 305 ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction()); 306 } catch (UnsupportedOperationException uoe) { 307 LOG.trace("Unable to set drop behind on {}", path, uoe); 308 LOG.debug("Unable to set drop behind on {}", path.getName()); 309 } 310 } 311 return new HFileWriterImpl(conf, cacheConf, path, ostream, fileContext); 312 } 313 } 314 315 /** The configuration key for HFile version to use for new files */ 316 public static final String FORMAT_VERSION_KEY = "hfile.format.version"; 317 318 public static int getFormatVersion(Configuration conf) { 319 int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION); 320 checkFormatVersion(version); 321 return version; 322 } 323 324 /** 325 * Returns the factory to be used to create {@link HFile} writers. Disables block cache access for 326 * all writers created through the returned factory. 327 */ 328 public static final WriterFactory getWriterFactoryNoCache(Configuration conf) { 329 return HFile.getWriterFactory(conf, CacheConfig.DISABLED); 330 } 331 332 /** 333 * Returns the factory to be used to create {@link HFile} writers 334 */ 335 public static final WriterFactory getWriterFactory(Configuration conf, CacheConfig cacheConf) { 336 int version = getFormatVersion(conf); 337 switch (version) { 338 case 2: 339 throw new IllegalArgumentException("This should never happen. " 340 + "Did you change hfile.format.version to read v2? This version of the software writes v3" 341 + " hfiles only (but it can read v2 files without having to update hfile.format.version " 342 + "in hbase-site.xml)"); 343 case 3: 344 return new HFile.WriterFactory(conf, cacheConf); 345 default: 346 throw new IllegalArgumentException( 347 "Cannot create writer for HFile " + "format version " + version); 348 } 349 } 350 351 /** 352 * An abstraction used by the block index. Implementations will check cache for any asked-for 353 * block and return cached block if found. Otherwise, after reading from fs, will try and put 354 * block into cache before returning. 355 */ 356 public interface CachingBlockReader { 357 /** 358 * Read in a file block. 359 * @param offset offset to read. 360 * @param onDiskBlockSize size of the block 361 * @param isCompaction is this block being read as part of a compaction 362 * @param expectedBlockType the block type we are expecting to read with this read 363 * operation, or null to read whatever block type is available 364 * and avoid checking (that might reduce caching efficiency of 365 * encoded data blocks) 366 * @param expectedDataBlockEncoding the data block encoding the caller is expecting data blocks 367 * to be in, or null to not perform this check and return the 368 * block irrespective of the encoding. This check only applies 369 * to data blocks and can be set to null when the caller is 370 * expecting to read a non-data block and has set 371 * expectedBlockType accordingly. 372 * @return Block wrapped in a ByteBuffer. 373 */ 374 HFileBlock readBlock(long offset, long onDiskBlockSize, boolean cacheBlock, final boolean pread, 375 final boolean isCompaction, final boolean updateCacheMetrics, BlockType expectedBlockType, 376 DataBlockEncoding expectedDataBlockEncoding) throws IOException; 377 378 HFileBlock readBlock(long offset, long onDiskBlockSize, boolean cacheBlock, final boolean pread, 379 final boolean isCompaction, final boolean updateCacheMetrics, BlockType expectedBlockType, 380 DataBlockEncoding expectedDataBlockEncoding, boolean cacheOnly) throws IOException; 381 } 382 383 /** An interface used by clients to open and iterate an {@link HFile}. */ 384 public interface Reader extends Closeable, CachingBlockReader { 385 /** 386 * Returns this reader's "name". Usually the last component of the path. Needs to be constant as 387 * the file is being moved to support caching on write. 388 */ 389 String getName(); 390 391 CellComparator getComparator(); 392 393 HFileScanner getScanner(Configuration conf, boolean cacheBlocks, boolean pread, 394 boolean isCompaction); 395 396 HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException; 397 398 Optional<ExtendedCell> getLastKey(); 399 400 Optional<ExtendedCell> midKey() throws IOException; 401 402 long length(); 403 404 long getEntries(); 405 406 Optional<ExtendedCell> getFirstKey(); 407 408 long indexSize(); 409 410 Optional<byte[]> getFirstRowKey(); 411 412 Optional<byte[]> getLastRowKey(); 413 414 FixedFileTrailer getTrailer(); 415 416 void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader); 417 418 HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader(); 419 420 void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader); 421 422 HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader(); 423 424 HFileScanner getScanner(Configuration conf, boolean cacheBlocks, boolean pread); 425 426 /** 427 * Retrieves general Bloom filter metadata as appropriate for each {@link HFile} version. Knows 428 * nothing about how that metadata is structured. 429 */ 430 DataInput getGeneralBloomFilterMetadata() throws IOException; 431 432 /** 433 * Retrieves delete family Bloom filter metadata as appropriate for each {@link HFile} version. 434 * Knows nothing about how that metadata is structured. 435 */ 436 DataInput getDeleteBloomFilterMetadata() throws IOException; 437 438 Path getPath(); 439 440 /** Close method with optional evictOnClose */ 441 void close(boolean evictOnClose) throws IOException; 442 443 DataBlockEncoding getDataBlockEncoding(); 444 445 boolean hasMVCCInfo(); 446 447 /** 448 * Return the file context of the HFile this reader belongs to 449 */ 450 HFileContext getFileContext(); 451 452 boolean isPrimaryReplicaReader(); 453 454 DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction); 455 456 HFileBlock.FSReader getUncachedBlockReader(); 457 458 boolean prefetchComplete(); 459 460 boolean prefetchStarted(); 461 462 /** 463 * To close the stream's socket. Note: This can be concurrently called from multiple threads and 464 * implementation should take care of thread safety. 465 */ 466 void unbufferStream(); 467 468 ReaderContext getContext(); 469 470 HFileInfo getHFileInfo(); 471 472 void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder); 473 } 474 475 /** 476 * Method returns the reader given the specified arguments. TODO This is a bad abstraction. See 477 * HBASE-6635. 478 * @param context Reader context info 479 * @param fileInfo HFile info 480 * @param cacheConf Cache configuation values, cannot be null. 481 * @param conf Configuration 482 * @return an appropriate instance of HFileReader 483 * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException 484 */ 485 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SF_SWITCH_FALLTHROUGH", 486 justification = "Intentional") 487 public static Reader createReader(ReaderContext context, HFileInfo fileInfo, 488 CacheConfig cacheConf, Configuration conf) throws IOException { 489 try { 490 if (context.getReaderType() == ReaderType.STREAM) { 491 // stream reader will share trailer with pread reader, see HFileStreamReader#copyFields 492 return new HFileStreamReader(context, fileInfo, cacheConf, conf); 493 } 494 FixedFileTrailer trailer = fileInfo.getTrailer(); 495 switch (trailer.getMajorVersion()) { 496 case 2: 497 LOG.debug("Opening HFile v2 with v3 reader"); 498 // Fall through. FindBugs: SF_SWITCH_FALLTHROUGH 499 case 3: 500 return new HFilePreadReader(context, fileInfo, cacheConf, conf); 501 default: 502 throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion()); 503 } 504 } catch (Throwable t) { 505 IOUtils.closeQuietly(context.getInputStreamWrapper(), 506 e -> LOG.warn("failed to close input stream wrapper", e)); 507 throw new CorruptHFileException( 508 "Problem reading HFile Trailer from file " + context.getFilePath(), t); 509 } finally { 510 context.getInputStreamWrapper().unbuffer(); 511 } 512 } 513 514 /** 515 * Creates reader with cache configuration disabled 516 * @param fs filesystem 517 * @param path Path to file to read 518 * @param conf Configuration 519 * @return an active Reader instance 520 * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile 521 * is corrupt/invalid. 522 */ 523 public static Reader createReader(FileSystem fs, Path path, Configuration conf) 524 throws IOException { 525 // The primaryReplicaReader is mainly used for constructing block cache key, so if we do not use 526 // block cache then it is OK to set it as any value. We use true here. 527 return createReader(fs, path, CacheConfig.DISABLED, true, conf); 528 } 529 530 /** 531 * @param fs filesystem 532 * @param path Path to file to read 533 * @param cacheConf This must not be null. 534 * @param primaryReplicaReader true if this is a reader for primary replica 535 * @param conf Configuration 536 * @return an active Reader instance 537 * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile 538 * is corrupt/invalid. 539 * @see CacheConfig#CacheConfig(Configuration) 540 */ 541 public static Reader createReader(FileSystem fs, Path path, CacheConfig cacheConf, 542 boolean primaryReplicaReader, Configuration conf) throws IOException { 543 Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf"); 544 FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path); 545 ReaderContext context = 546 new ReaderContextBuilder().withFilePath(path).withInputStreamWrapper(stream) 547 .withFileSize(fs.getFileStatus(path).getLen()).withFileSystem(stream.getHfs()) 548 .withPrimaryReplicaReader(primaryReplicaReader).withReaderType(ReaderType.PREAD).build(); 549 HFileInfo fileInfo = new HFileInfo(context, conf); 550 Reader reader = createReader(context, fileInfo, cacheConf, conf); 551 fileInfo.initMetaAndIndex(reader); 552 return reader; 553 } 554 555 /** 556 * Returns true if the specified file has a valid HFile Trailer. 557 * @param fs filesystem 558 * @param path Path to file to verify 559 * @return true if the file has a valid HFile Trailer, otherwise false 560 * @throws IOException if failed to read from the underlying stream 561 */ 562 public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException { 563 return isHFileFormat(fs, fs.getFileStatus(path)); 564 } 565 566 /** 567 * Returns true if the specified file has a valid HFile Trailer. 568 * @param fs filesystem 569 * @param fileStatus the file to verify 570 * @return true if the file has a valid HFile Trailer, otherwise false 571 * @throws IOException if failed to read from the underlying stream 572 */ 573 public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus) 574 throws IOException { 575 final Path path = fileStatus.getPath(); 576 final long size = fileStatus.getLen(); 577 try (FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path)) { 578 boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum(); 579 assert !isHBaseChecksum; // Initially we must read with FS checksum. 580 FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size); 581 return true; 582 } catch (IllegalArgumentException e) { 583 return false; 584 } 585 } 586 587 /** 588 * Get names of supported compression algorithms. The names are acceptable by HFile.Writer. 589 * @return Array of strings, each represents a supported compression algorithm. Currently, the 590 * following compression algorithms are supported. 591 * <ul> 592 * <li>"none" - No compression. 593 * <li>"gz" - GZIP compression. 594 * </ul> 595 */ 596 public static String[] getSupportedCompressionAlgorithms() { 597 return Compression.getSupportedAlgorithms(); 598 } 599 600 // Utility methods. 601 /* 602 * @param l Long to convert to an int. 603 * @return <code>l</code> cast as an int. 604 */ 605 static int longToInt(final long l) { 606 // Expecting the size() of a block not exceeding 4GB. Assuming the 607 // size() will wrap to negative integer if it exceeds 2GB (From tfile). 608 return (int) (l & 0x00000000ffffffffL); 609 } 610 611 /** 612 * Returns all HFiles belonging to the given region directory. Could return an empty list. 613 * @param fs The file system reference. 614 * @param regionDir The region directory to scan. 615 * @return The list of files found. 616 * @throws IOException When scanning the files fails. 617 */ 618 public static List<Path> getStoreFiles(FileSystem fs, Path regionDir) throws IOException { 619 List<Path> regionHFiles = new ArrayList<>(); 620 PathFilter dirFilter = new FSUtils.DirFilter(fs); 621 FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter); 622 for (FileStatus dir : familyDirs) { 623 FileStatus[] files = fs.listStatus(dir.getPath()); 624 for (FileStatus file : files) { 625 if ( 626 !file.isDirectory() 627 && (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) 628 && (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR)) 629 ) { 630 regionHFiles.add(file.getPath()); 631 } 632 } 633 } 634 return regionHFiles; 635 } 636 637 /** 638 * Checks the given {@link HFile} format version, and throws an exception if invalid. Note that if 639 * the version number comes from an input file and has not been verified, the caller needs to 640 * re-throw an {@link IOException} to indicate that this is not a software error, but corrupted 641 * input. 642 * @param version an HFile version 643 * @throws IllegalArgumentException if the version is invalid 644 */ 645 public static void checkFormatVersion(int version) throws IllegalArgumentException { 646 if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) { 647 throw new IllegalArgumentException("Invalid HFile version: " + version + " (expected to be " 648 + "between " + MIN_FORMAT_VERSION + " and " + MAX_FORMAT_VERSION + ")"); 649 } 650 } 651 652 public static void checkHFileVersion(final Configuration c) { 653 int version = c.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION); 654 if (version < MAX_FORMAT_VERSION || version > MAX_FORMAT_VERSION) { 655 throw new IllegalArgumentException( 656 "The setting for " + FORMAT_VERSION_KEY + " (in your hbase-*.xml files) is " + version 657 + " which does not match " + MAX_FORMAT_VERSION 658 + "; are you running with a configuration from an older or newer hbase install (an " 659 + "incompatible hbase-default.xml or hbase-site.xml on your CLASSPATH)?"); 660 } 661 } 662 663 public static void main(String[] args) throws Exception { 664 // delegate to preserve old behavior 665 HFilePrettyPrinter.main(args); 666 } 667}