001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import java.io.Closeable; 021import java.io.DataInput; 022import java.io.IOException; 023import java.net.InetSocketAddress; 024import java.util.ArrayList; 025import java.util.List; 026import java.util.Optional; 027import java.util.concurrent.atomic.LongAdder; 028import org.apache.commons.io.IOUtils; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FSDataOutputStream; 031import org.apache.hadoop.fs.FileStatus; 032import org.apache.hadoop.fs.FileSystem; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.fs.PathFilter; 035import org.apache.hadoop.hbase.CellComparator; 036import org.apache.hadoop.hbase.ExtendedCell; 037import org.apache.hadoop.hbase.HConstants; 038import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 039import org.apache.hadoop.hbase.io.MetricsIO; 040import org.apache.hadoop.hbase.io.compress.Compression; 041import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 042import org.apache.hadoop.hbase.io.hfile.ReaderContext.ReaderType; 043import org.apache.hadoop.hbase.ipc.RpcServer; 044import org.apache.hadoop.hbase.regionserver.CellSink; 045import org.apache.hadoop.hbase.regionserver.ShipperListener; 046import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; 047import org.apache.hadoop.hbase.util.BloomFilterWriter; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.apache.hadoop.hbase.util.FSUtils; 050import org.apache.hadoop.io.Writable; 051import org.apache.yetus.audience.InterfaceAudience; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 056 057/** 058 * File format for hbase. A file of sorted key/value pairs. Both keys and values are byte arrays. 059 * <p> 060 * The memory footprint of a HFile includes the following (below is taken from the <a 061 * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation but applies also 062 * to HFile): 063 * <ul> 064 * <li>Some constant overhead of reading or writing a compressed block. 065 * <ul> 066 * <li>Each compressed block requires one compression/decompression codec for I/O. 067 * <li>Temporary space to buffer the key. 068 * <li>Temporary space to buffer the value. 069 * </ul> 070 * <li>HFile index, which is proportional to the total number of Data Blocks. The total amount of 071 * memory needed to hold the index can be estimated as (56+AvgKeySize)*NumBlocks. 072 * </ul> 073 * Suggestions on performance optimization. 074 * <ul> 075 * <li>Minimum block size. We recommend a setting of minimum block size between 8KB to 1MB for 076 * general usage. Larger block size is preferred if files are primarily for sequential access. 077 * However, it would lead to inefficient random access (because there are more data to decompress). 078 * Smaller blocks are good for random access, but require more memory to hold the block index, and 079 * may be slower to create (because we must flush the compressor stream at the conclusion of each 080 * data block, which leads to an FS I/O flush). Further, due to the internal caching in Compression 081 * codec, the smallest possible block size would be around 20KB-30KB. 082 * <li>The current implementation does not offer true multi-threading for reading. The 083 * implementation uses FSDataInputStream seek()+read(), which is shown to be much faster than 084 * positioned-read call in single thread mode. However, it also means that if multiple threads 085 * attempt to access the same HFile (using multiple scanners) simultaneously, the actual I/O is 086 * carried out sequentially even if they access different DFS blocks (Reexamine! pread seems to be 087 * 10% faster than seek+read in my testing -- stack). 088 * <li>Compression codec. Use "none" if the data is not very compressable (by compressable, I mean a 089 * compression ratio at least 2:1). Generally, use "lzo" as the starting point for experimenting. 090 * "gz" overs slightly better compression ratio over "lzo" but requires 4x CPU to compress and 2x 091 * CPU to decompress, comparing to "lzo". 092 * </ul> 093 * For more on the background behind HFile, see <a 094 * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>. 095 * <p> 096 * File is made of data blocks followed by meta data blocks (if any), a fileinfo block, data block 097 * index, meta data block index, and a fixed size trailer which records the offsets at which file 098 * changes content type. 099 * 100 * <pre> 101 * <data blocks><meta blocks><fileinfo>< 102 * data index><meta index><trailer> 103 * </pre> 104 * 105 * Each block has a bit of magic at its start. Block are comprised of key/values. In data blocks, 106 * they are both byte arrays. Metadata blocks are a String key and a byte array value. An empty file 107 * looks like this: 108 * 109 * <pre> 110 * <fileinfo><trailer> 111 * </pre> 112 * 113 * . That is, there are not data nor meta blocks present. 114 * <p> 115 * TODO: Do scanners need to be able to take a start and end row? TODO: Should BlockIndex know the 116 * name of its file? Should it have a Path that points at its file say for the case where an index 117 * lives apart from an HFile instance? 118 */ 119@InterfaceAudience.Private 120public final class HFile { 121 // LOG is being used in HFileBlock and CheckSumUtil 122 static final Logger LOG = LoggerFactory.getLogger(HFile.class); 123 124 /** 125 * Maximum length of key in HFile. 126 */ 127 public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE; 128 129 /** 130 * Default compression: none. 131 */ 132 public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM = 133 Compression.Algorithm.NONE; 134 135 /** Minimum supported HFile format version */ 136 public static final int MIN_FORMAT_VERSION = 2; 137 138 /** 139 * Maximum supported HFile format version 140 */ 141 public static final int MAX_FORMAT_VERSION = 3; 142 143 /** 144 * Minimum HFile format version with support for persisting cell tags 145 */ 146 public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3; 147 148 /** Default compression name: none. */ 149 public final static String DEFAULT_COMPRESSION = DEFAULT_COMPRESSION_ALGORITHM.getName(); 150 151 /** Meta data block name for bloom filter bits. */ 152 public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA"; 153 154 /** 155 * We assume that HFile path ends with ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at 156 * least this many levels of nesting. This is needed for identifying table and CF name from an 157 * HFile path. 158 */ 159 public final static int MIN_NUM_HFILE_PATH_LEVELS = 5; 160 161 /** 162 * The number of bytes per checksum. 163 */ 164 public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024; 165 166 // For measuring number of checksum failures 167 static final LongAdder CHECKSUM_FAILURES = new LongAdder(); 168 169 // For tests. Gets incremented when we read a block whether from HDFS or from Cache. 170 public static final LongAdder DATABLOCK_READ_COUNT = new LongAdder(); 171 172 /** 173 * Shutdown constructor. 174 */ 175 private HFile() { 176 } 177 178 /** 179 * Number of checksum verification failures. It also clears the counter. 180 */ 181 public static final long getAndResetChecksumFailuresCount() { 182 return CHECKSUM_FAILURES.sumThenReset(); 183 } 184 185 /** 186 * Number of checksum verification failures. It also clears the counter. 187 */ 188 public static final long getChecksumFailuresCount() { 189 return CHECKSUM_FAILURES.sum(); 190 } 191 192 public static final void updateReadLatency(long latencyMillis, boolean pread, boolean tooSlow) { 193 RpcServer.getCurrentCall().ifPresent(call -> call.updateFsReadTime(latencyMillis)); 194 if (pread) { 195 MetricsIO.getInstance().updateFsPreadTime(latencyMillis); 196 } else { 197 MetricsIO.getInstance().updateFsReadTime(latencyMillis); 198 } 199 if (tooSlow) { 200 MetricsIO.getInstance().incrSlowFsRead(); 201 } 202 } 203 204 public static final void updateWriteLatency(long latencyMillis) { 205 MetricsIO.getInstance().updateFsWriteTime(latencyMillis); 206 } 207 208 /** API required to write an {@link HFile} */ 209 public interface Writer extends Closeable, CellSink, ShipperListener { 210 /** Max memstore (mvcc) timestamp in FileInfo */ 211 public static final byte[] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY"); 212 213 /** Add an element to the file info map. */ 214 void appendFileInfo(byte[] key, byte[] value) throws IOException; 215 216 /** 217 * Add TimestampRange and earliest put timestamp to Metadata 218 */ 219 void appendTrackedTimestampsToMetadata() throws IOException; 220 221 /** 222 * Add Custom cell timestamp to Metadata 223 */ 224 public void appendCustomCellTimestampsToMetadata(TimeRangeTracker timeRangeTracker) 225 throws IOException; 226 227 /** Returns the path to this {@link HFile} */ 228 Path getPath(); 229 230 /** 231 * Adds an inline block writer such as a multi-level block index writer or a compound Bloom 232 * filter writer. 233 */ 234 void addInlineBlockWriter(InlineBlockWriter bloomWriter); 235 236 // The below three methods take Writables. We'd like to undo Writables but undoing the below 237 // would be pretty painful. Could take a byte [] or a Message but we want to be backward 238 // compatible around hfiles so would need to map between Message and Writable or byte [] and 239 // current Writable serialization. This would be a bit of work to little gain. Thats my 240 // thinking at moment. St.Ack 20121129 241 242 void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter); 243 244 /** 245 * Store general Bloom filter in the file. This does not deal with Bloom filter internals but is 246 * necessary, since Bloom filters are stored differently in HFile version 1 and version 2. 247 */ 248 void addGeneralBloomFilter(BloomFilterWriter bfw); 249 250 /** 251 * Store delete family Bloom filter in the file, which is only supported in HFile V2. 252 */ 253 void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException; 254 255 /** 256 * Return the file context for the HFile this writer belongs to 257 */ 258 HFileContext getFileContext(); 259 } 260 261 /** 262 * This variety of ways to construct writers is used throughout the code, and we want to be able 263 * to swap writer implementations. 264 */ 265 public static class WriterFactory { 266 protected final Configuration conf; 267 protected final CacheConfig cacheConf; 268 protected FileSystem fs; 269 protected Path path; 270 protected FSDataOutputStream ostream; 271 protected InetSocketAddress[] favoredNodes; 272 private HFileContext fileContext; 273 protected boolean shouldDropBehind = false; 274 275 WriterFactory(Configuration conf, CacheConfig cacheConf) { 276 this.conf = conf; 277 this.cacheConf = cacheConf; 278 } 279 280 public WriterFactory withPath(FileSystem fs, Path path) { 281 Preconditions.checkNotNull(fs); 282 Preconditions.checkNotNull(path); 283 this.fs = fs; 284 this.path = path; 285 return this; 286 } 287 288 public WriterFactory withOutputStream(FSDataOutputStream ostream) { 289 Preconditions.checkNotNull(ostream); 290 this.ostream = ostream; 291 return this; 292 } 293 294 public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) { 295 // Deliberately not checking for null here. 296 this.favoredNodes = favoredNodes; 297 return this; 298 } 299 300 public WriterFactory withFileContext(HFileContext fileContext) { 301 this.fileContext = fileContext; 302 return this; 303 } 304 305 public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) { 306 this.shouldDropBehind = shouldDropBehind; 307 return this; 308 } 309 310 public Writer create() throws IOException { 311 if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) { 312 throw new AssertionError("Please specify exactly one of " + "filesystem/path or path"); 313 } 314 if (path != null) { 315 ostream = HFileWriterImpl.createOutputStream(conf, fs, path, favoredNodes); 316 try { 317 ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction()); 318 } catch (UnsupportedOperationException uoe) { 319 LOG.trace("Unable to set drop behind on {}", path, uoe); 320 LOG.debug("Unable to set drop behind on {}", path.getName()); 321 } 322 } 323 return new HFileWriterImpl(conf, cacheConf, path, ostream, fileContext); 324 } 325 } 326 327 /** The configuration key for HFile version to use for new files */ 328 public static final String FORMAT_VERSION_KEY = "hfile.format.version"; 329 330 public static int getFormatVersion(Configuration conf) { 331 int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION); 332 checkFormatVersion(version); 333 return version; 334 } 335 336 /** 337 * Returns the factory to be used to create {@link HFile} writers. Disables block cache access for 338 * all writers created through the returned factory. 339 */ 340 public static final WriterFactory getWriterFactoryNoCache(Configuration conf) { 341 return HFile.getWriterFactory(conf, CacheConfig.DISABLED); 342 } 343 344 /** 345 * Returns the factory to be used to create {@link HFile} writers 346 */ 347 public static final WriterFactory getWriterFactory(Configuration conf, CacheConfig cacheConf) { 348 int version = getFormatVersion(conf); 349 switch (version) { 350 case 2: 351 throw new IllegalArgumentException("This should never happen. " 352 + "Did you change hfile.format.version to read v2? This version of the software writes v3" 353 + " hfiles only (but it can read v2 files without having to update hfile.format.version " 354 + "in hbase-site.xml)"); 355 case 3: 356 return new HFile.WriterFactory(conf, cacheConf); 357 default: 358 throw new IllegalArgumentException( 359 "Cannot create writer for HFile " + "format version " + version); 360 } 361 } 362 363 /** 364 * An abstraction used by the block index. Implementations will check cache for any asked-for 365 * block and return cached block if found. Otherwise, after reading from fs, will try and put 366 * block into cache before returning. 367 */ 368 public interface CachingBlockReader { 369 /** 370 * Read in a file block. 371 * @param offset offset to read. 372 * @param onDiskBlockSize size of the block 373 * @param isCompaction is this block being read as part of a compaction 374 * @param expectedBlockType the block type we are expecting to read with this read 375 * operation, or null to read whatever block type is available 376 * and avoid checking (that might reduce caching efficiency of 377 * encoded data blocks) 378 * @param expectedDataBlockEncoding the data block encoding the caller is expecting data blocks 379 * to be in, or null to not perform this check and return the 380 * block irrespective of the encoding. This check only applies 381 * to data blocks and can be set to null when the caller is 382 * expecting to read a non-data block and has set 383 * expectedBlockType accordingly. 384 * @return Block wrapped in a ByteBuffer. 385 */ 386 HFileBlock readBlock(long offset, long onDiskBlockSize, boolean cacheBlock, final boolean pread, 387 final boolean isCompaction, final boolean updateCacheMetrics, BlockType expectedBlockType, 388 DataBlockEncoding expectedDataBlockEncoding) throws IOException; 389 390 HFileBlock readBlock(long offset, long onDiskBlockSize, boolean cacheBlock, final boolean pread, 391 final boolean isCompaction, final boolean updateCacheMetrics, BlockType expectedBlockType, 392 DataBlockEncoding expectedDataBlockEncoding, boolean cacheOnly) throws IOException; 393 } 394 395 /** An interface used by clients to open and iterate an {@link HFile}. */ 396 public interface Reader extends Closeable, CachingBlockReader { 397 /** 398 * Returns this reader's "name". Usually the last component of the path. Needs to be constant as 399 * the file is being moved to support caching on write. 400 */ 401 String getName(); 402 403 CellComparator getComparator(); 404 405 HFileScanner getScanner(Configuration conf, boolean cacheBlocks, boolean pread, 406 boolean isCompaction); 407 408 HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException; 409 410 Optional<ExtendedCell> getLastKey(); 411 412 Optional<ExtendedCell> midKey() throws IOException; 413 414 long length(); 415 416 long getEntries(); 417 418 Optional<ExtendedCell> getFirstKey(); 419 420 long indexSize(); 421 422 Optional<byte[]> getFirstRowKey(); 423 424 Optional<byte[]> getLastRowKey(); 425 426 FixedFileTrailer getTrailer(); 427 428 void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader); 429 430 HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader(); 431 432 void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader); 433 434 HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader(); 435 436 HFileScanner getScanner(Configuration conf, boolean cacheBlocks, boolean pread); 437 438 /** 439 * Retrieves general Bloom filter metadata as appropriate for each {@link HFile} version. Knows 440 * nothing about how that metadata is structured. 441 */ 442 DataInput getGeneralBloomFilterMetadata() throws IOException; 443 444 /** 445 * Retrieves delete family Bloom filter metadata as appropriate for each {@link HFile} version. 446 * Knows nothing about how that metadata is structured. 447 */ 448 DataInput getDeleteBloomFilterMetadata() throws IOException; 449 450 Path getPath(); 451 452 /** Close method with optional evictOnClose */ 453 void close(boolean evictOnClose) throws IOException; 454 455 DataBlockEncoding getDataBlockEncoding(); 456 457 boolean hasMVCCInfo(); 458 459 /** 460 * Return the file context of the HFile this reader belongs to 461 */ 462 HFileContext getFileContext(); 463 464 boolean isPrimaryReplicaReader(); 465 466 DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction); 467 468 HFileBlock.FSReader getUncachedBlockReader(); 469 470 boolean prefetchComplete(); 471 472 boolean prefetchStarted(); 473 474 /** 475 * To close the stream's socket. Note: This can be concurrently called from multiple threads and 476 * implementation should take care of thread safety. 477 */ 478 void unbufferStream(); 479 480 ReaderContext getContext(); 481 482 HFileInfo getHFileInfo(); 483 484 void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder); 485 } 486 487 /** 488 * Method returns the reader given the specified arguments. TODO This is a bad abstraction. See 489 * HBASE-6635. 490 * @param context Reader context info 491 * @param fileInfo HFile info 492 * @param cacheConf Cache configuation values, cannot be null. 493 * @param conf Configuration 494 * @return an appropriate instance of HFileReader 495 * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException 496 */ 497 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SF_SWITCH_FALLTHROUGH", 498 justification = "Intentional") 499 public static Reader createReader(ReaderContext context, HFileInfo fileInfo, 500 CacheConfig cacheConf, Configuration conf) throws IOException { 501 try { 502 if (context.getReaderType() == ReaderType.STREAM) { 503 // stream reader will share trailer with pread reader, see HFileStreamReader#copyFields 504 return new HFileStreamReader(context, fileInfo, cacheConf, conf); 505 } 506 FixedFileTrailer trailer = fileInfo.getTrailer(); 507 switch (trailer.getMajorVersion()) { 508 case 2: 509 LOG.debug("Opening HFile v2 with v3 reader"); 510 // Fall through. FindBugs: SF_SWITCH_FALLTHROUGH 511 case 3: 512 return new HFilePreadReader(context, fileInfo, cacheConf, conf); 513 default: 514 throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion()); 515 } 516 } catch (Throwable t) { 517 IOUtils.closeQuietly(context.getInputStreamWrapper(), 518 e -> LOG.warn("failed to close input stream wrapper", e)); 519 throw new CorruptHFileException( 520 "Problem reading HFile Trailer from file " + context.getFilePath(), t); 521 } finally { 522 context.getInputStreamWrapper().unbuffer(); 523 } 524 } 525 526 /** 527 * Creates reader with cache configuration disabled 528 * @param fs filesystem 529 * @param path Path to file to read 530 * @param conf Configuration 531 * @return an active Reader instance 532 * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile 533 * is corrupt/invalid. 534 */ 535 public static Reader createReader(FileSystem fs, Path path, Configuration conf) 536 throws IOException { 537 // The primaryReplicaReader is mainly used for constructing block cache key, so if we do not use 538 // block cache then it is OK to set it as any value. We use true here. 539 return createReader(fs, path, CacheConfig.DISABLED, true, conf); 540 } 541 542 /** 543 * @param fs filesystem 544 * @param path Path to file to read 545 * @param cacheConf This must not be null. 546 * @param primaryReplicaReader true if this is a reader for primary replica 547 * @param conf Configuration 548 * @return an active Reader instance 549 * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile 550 * is corrupt/invalid. 551 * @see CacheConfig#CacheConfig(Configuration) 552 */ 553 public static Reader createReader(FileSystem fs, Path path, CacheConfig cacheConf, 554 boolean primaryReplicaReader, Configuration conf) throws IOException { 555 Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf"); 556 FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path); 557 ReaderContext context = 558 new ReaderContextBuilder().withFilePath(path).withInputStreamWrapper(stream) 559 .withFileSize(fs.getFileStatus(path).getLen()).withFileSystem(stream.getHfs()) 560 .withPrimaryReplicaReader(primaryReplicaReader).withReaderType(ReaderType.PREAD).build(); 561 HFileInfo fileInfo = new HFileInfo(context, conf); 562 Reader reader = createReader(context, fileInfo, cacheConf, conf); 563 fileInfo.initMetaAndIndex(reader); 564 return reader; 565 } 566 567 /** 568 * Returns true if the specified file has a valid HFile Trailer. 569 * @param fs filesystem 570 * @param path Path to file to verify 571 * @return true if the file has a valid HFile Trailer, otherwise false 572 * @throws IOException if failed to read from the underlying stream 573 */ 574 public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException { 575 return isHFileFormat(fs, fs.getFileStatus(path)); 576 } 577 578 /** 579 * Returns true if the specified file has a valid HFile Trailer. 580 * @param fs filesystem 581 * @param fileStatus the file to verify 582 * @return true if the file has a valid HFile Trailer, otherwise false 583 * @throws IOException if failed to read from the underlying stream 584 */ 585 public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus) 586 throws IOException { 587 final Path path = fileStatus.getPath(); 588 final long size = fileStatus.getLen(); 589 try (FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path)) { 590 boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum(); 591 assert !isHBaseChecksum; // Initially we must read with FS checksum. 592 FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size); 593 return true; 594 } catch (IllegalArgumentException e) { 595 return false; 596 } 597 } 598 599 /** 600 * Get names of supported compression algorithms. The names are acceptable by HFile.Writer. 601 * @return Array of strings, each represents a supported compression algorithm. Currently, the 602 * following compression algorithms are supported. 603 * <ul> 604 * <li>"none" - No compression. 605 * <li>"gz" - GZIP compression. 606 * </ul> 607 */ 608 public static String[] getSupportedCompressionAlgorithms() { 609 return Compression.getSupportedAlgorithms(); 610 } 611 612 // Utility methods. 613 /* 614 * @param l Long to convert to an int. 615 * @return <code>l</code> cast as an int. 616 */ 617 static int longToInt(final long l) { 618 // Expecting the size() of a block not exceeding 4GB. Assuming the 619 // size() will wrap to negative integer if it exceeds 2GB (From tfile). 620 return (int) (l & 0x00000000ffffffffL); 621 } 622 623 /** 624 * Returns all HFiles belonging to the given region directory. Could return an empty list. 625 * @param fs The file system reference. 626 * @param regionDir The region directory to scan. 627 * @return The list of files found. 628 * @throws IOException When scanning the files fails. 629 */ 630 public static List<Path> getStoreFiles(FileSystem fs, Path regionDir) throws IOException { 631 List<Path> regionHFiles = new ArrayList<>(); 632 PathFilter dirFilter = new FSUtils.DirFilter(fs); 633 FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter); 634 for (FileStatus dir : familyDirs) { 635 FileStatus[] files = fs.listStatus(dir.getPath()); 636 for (FileStatus file : files) { 637 if ( 638 !file.isDirectory() 639 && (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) 640 && (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR)) 641 ) { 642 regionHFiles.add(file.getPath()); 643 } 644 } 645 } 646 return regionHFiles; 647 } 648 649 /** 650 * Checks the given {@link HFile} format version, and throws an exception if invalid. Note that if 651 * the version number comes from an input file and has not been verified, the caller needs to 652 * re-throw an {@link IOException} to indicate that this is not a software error, but corrupted 653 * input. 654 * @param version an HFile version 655 * @throws IllegalArgumentException if the version is invalid 656 */ 657 public static void checkFormatVersion(int version) throws IllegalArgumentException { 658 if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) { 659 throw new IllegalArgumentException("Invalid HFile version: " + version + " (expected to be " 660 + "between " + MIN_FORMAT_VERSION + " and " + MAX_FORMAT_VERSION + ")"); 661 } 662 } 663 664 public static void checkHFileVersion(final Configuration c) { 665 int version = c.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION); 666 if (version < MAX_FORMAT_VERSION || version > MAX_FORMAT_VERSION) { 667 throw new IllegalArgumentException( 668 "The setting for " + FORMAT_VERSION_KEY + " (in your hbase-*.xml files) is " + version 669 + " which does not match " + MAX_FORMAT_VERSION 670 + "; are you running with a configuration from an older or newer hbase install (an " 671 + "incompatible hbase-default.xml or hbase-site.xml on your CLASSPATH)?"); 672 } 673 } 674 675 public static void main(String[] args) throws Exception { 676 // delegate to preserve old behavior 677 HFilePrettyPrinter.main(args); 678 } 679}