001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import java.io.Closeable;
021import java.io.DataInput;
022import java.io.IOException;
023import java.net.InetSocketAddress;
024import java.util.ArrayList;
025import java.util.List;
026import java.util.Optional;
027import java.util.concurrent.atomic.LongAdder;
028import org.apache.commons.io.IOUtils;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FSDataOutputStream;
031import org.apache.hadoop.fs.FileStatus;
032import org.apache.hadoop.fs.FileSystem;
033import org.apache.hadoop.fs.Path;
034import org.apache.hadoop.fs.PathFilter;
035import org.apache.hadoop.hbase.Cell;
036import org.apache.hadoop.hbase.CellComparator;
037import org.apache.hadoop.hbase.HConstants;
038import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
039import org.apache.hadoop.hbase.io.MetricsIO;
040import org.apache.hadoop.hbase.io.MetricsIOWrapperImpl;
041import org.apache.hadoop.hbase.io.compress.Compression;
042import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
043import org.apache.hadoop.hbase.io.hfile.ReaderContext.ReaderType;
044import org.apache.hadoop.hbase.regionserver.CellSink;
045import org.apache.hadoop.hbase.regionserver.ShipperListener;
046import org.apache.hadoop.hbase.util.BloomFilterWriter;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.FSUtils;
049import org.apache.hadoop.io.Writable;
050import org.apache.yetus.audience.InterfaceAudience;
051import org.slf4j.Logger;
052import org.slf4j.LoggerFactory;
053
054import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
055
056/**
057 * File format for hbase. A file of sorted key/value pairs. Both keys and values are byte arrays.
058 * <p>
059 * The memory footprint of a HFile includes the following (below is taken from the <a
060 * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation but applies also
061 * to HFile):
062 * <ul>
063 * <li>Some constant overhead of reading or writing a compressed block.
064 * <ul>
065 * <li>Each compressed block requires one compression/decompression codec for I/O.
066 * <li>Temporary space to buffer the key.
067 * <li>Temporary space to buffer the value.
068 * </ul>
069 * <li>HFile index, which is proportional to the total number of Data Blocks. The total amount of
070 * memory needed to hold the index can be estimated as (56+AvgKeySize)*NumBlocks.
071 * </ul>
072 * Suggestions on performance optimization.
073 * <ul>
074 * <li>Minimum block size. We recommend a setting of minimum block size between 8KB to 1MB for
075 * general usage. Larger block size is preferred if files are primarily for sequential access.
076 * However, it would lead to inefficient random access (because there are more data to decompress).
077 * Smaller blocks are good for random access, but require more memory to hold the block index, and
078 * may be slower to create (because we must flush the compressor stream at the conclusion of each
079 * data block, which leads to an FS I/O flush). Further, due to the internal caching in Compression
080 * codec, the smallest possible block size would be around 20KB-30KB.
081 * <li>The current implementation does not offer true multi-threading for reading. The
082 * implementation uses FSDataInputStream seek()+read(), which is shown to be much faster than
083 * positioned-read call in single thread mode. However, it also means that if multiple threads
084 * attempt to access the same HFile (using multiple scanners) simultaneously, the actual I/O is
085 * carried out sequentially even if they access different DFS blocks (Reexamine! pread seems to be
086 * 10% faster than seek+read in my testing -- stack).
087 * <li>Compression codec. Use "none" if the data is not very compressable (by compressable, I mean a
088 * compression ratio at least 2:1). Generally, use "lzo" as the starting point for experimenting.
089 * "gz" overs slightly better compression ratio over "lzo" but requires 4x CPU to compress and 2x
090 * CPU to decompress, comparing to "lzo".
091 * </ul>
092 * For more on the background behind HFile, see <a
093 * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>.
094 * <p>
095 * File is made of data blocks followed by meta data blocks (if any), a fileinfo block, data block
096 * index, meta data block index, and a fixed size trailer which records the offsets at which file
097 * changes content type.
098 *
099 * <pre>
100 * &lt;data blocks&gt;&lt;meta blocks&gt;&lt;fileinfo&gt;&lt;
101 * data index&gt;&lt;meta index&gt;&lt;trailer&gt;
102 * </pre>
103 *
104 * Each block has a bit of magic at its start. Block are comprised of key/values. In data blocks,
105 * they are both byte arrays. Metadata blocks are a String key and a byte array value. An empty file
106 * looks like this:
107 *
108 * <pre>
109 * &lt;fileinfo&gt;&lt;trailer&gt;
110 * </pre>
111 *
112 * . That is, there are not data nor meta blocks present.
113 * <p>
114 * TODO: Do scanners need to be able to take a start and end row? TODO: Should BlockIndex know the
115 * name of its file? Should it have a Path that points at its file say for the case where an index
116 * lives apart from an HFile instance?
117 */
118@InterfaceAudience.Private
119public final class HFile {
120  // LOG is being used in HFileBlock and CheckSumUtil
121  static final Logger LOG = LoggerFactory.getLogger(HFile.class);
122
123  /**
124   * Maximum length of key in HFile.
125   */
126  public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
127
128  /**
129   * Default compression: none.
130   */
131  public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
132    Compression.Algorithm.NONE;
133
134  /** Minimum supported HFile format version */
135  public static final int MIN_FORMAT_VERSION = 2;
136
137  /**
138   * Maximum supported HFile format version
139   */
140  public static final int MAX_FORMAT_VERSION = 3;
141
142  /**
143   * Minimum HFile format version with support for persisting cell tags
144   */
145  public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3;
146
147  /** Default compression name: none. */
148  public final static String DEFAULT_COMPRESSION = DEFAULT_COMPRESSION_ALGORITHM.getName();
149
150  /** Meta data block name for bloom filter bits. */
151  public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";
152
153  /**
154   * We assume that HFile path ends with ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at
155   * least this many levels of nesting. This is needed for identifying table and CF name from an
156   * HFile path.
157   */
158  public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;
159
160  /**
161   * The number of bytes per checksum.
162   */
163  public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
164
165  // For measuring number of checksum failures
166  static final LongAdder CHECKSUM_FAILURES = new LongAdder();
167
168  // For tests. Gets incremented when we read a block whether from HDFS or from Cache.
169  public static final LongAdder DATABLOCK_READ_COUNT = new LongAdder();
170
171  /** Static instance for the metrics so that HFileReaders access the same instance */
172  static final MetricsIO metrics = new MetricsIO(new MetricsIOWrapperImpl());
173
174  /**
175   * Shutdown constructor.
176   */
177  private HFile() {
178  }
179
180  /**
181   * Number of checksum verification failures. It also clears the counter.
182   */
183  public static final long getAndResetChecksumFailuresCount() {
184    return CHECKSUM_FAILURES.sumThenReset();
185  }
186
187  /**
188   * Number of checksum verification failures. It also clears the counter.
189   */
190  public static final long getChecksumFailuresCount() {
191    return CHECKSUM_FAILURES.sum();
192  }
193
194  public static final void updateReadLatency(long latencyMillis, boolean pread) {
195    if (pread) {
196      metrics.updateFsPreadTime(latencyMillis);
197    } else {
198      metrics.updateFsReadTime(latencyMillis);
199    }
200  }
201
202  public static final void updateWriteLatency(long latencyMillis) {
203    metrics.updateFsWriteTime(latencyMillis);
204  }
205
206  /** API required to write an {@link HFile} */
207  public interface Writer extends Closeable, CellSink, ShipperListener {
208    /** Max memstore (mvcc) timestamp in FileInfo */
209    public static final byte[] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY");
210
211    /** Add an element to the file info map. */
212    void appendFileInfo(byte[] key, byte[] value) throws IOException;
213
214    /** Returns the path to this {@link HFile} */
215    Path getPath();
216
217    /**
218     * Adds an inline block writer such as a multi-level block index writer or a compound Bloom
219     * filter writer.
220     */
221    void addInlineBlockWriter(InlineBlockWriter bloomWriter);
222
223    // The below three methods take Writables. We'd like to undo Writables but undoing the below
224    // would be pretty painful. Could take a byte [] or a Message but we want to be backward
225    // compatible around hfiles so would need to map between Message and Writable or byte [] and
226    // current Writable serialization. This would be a bit of work to little gain. Thats my
227    // thinking at moment. St.Ack 20121129
228
229    void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);
230
231    /**
232     * Store general Bloom filter in the file. This does not deal with Bloom filter internals but is
233     * necessary, since Bloom filters are stored differently in HFile version 1 and version 2.
234     */
235    void addGeneralBloomFilter(BloomFilterWriter bfw);
236
237    /**
238     * Store delete family Bloom filter in the file, which is only supported in HFile V2.
239     */
240    void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;
241
242    /**
243     * Return the file context for the HFile this writer belongs to
244     */
245    HFileContext getFileContext();
246  }
247
248  /**
249   * This variety of ways to construct writers is used throughout the code, and we want to be able
250   * to swap writer implementations.
251   */
252  public static class WriterFactory {
253    protected final Configuration conf;
254    protected final CacheConfig cacheConf;
255    protected FileSystem fs;
256    protected Path path;
257    protected FSDataOutputStream ostream;
258    protected InetSocketAddress[] favoredNodes;
259    private HFileContext fileContext;
260    protected boolean shouldDropBehind = false;
261
262    WriterFactory(Configuration conf, CacheConfig cacheConf) {
263      this.conf = conf;
264      this.cacheConf = cacheConf;
265    }
266
267    public WriterFactory withPath(FileSystem fs, Path path) {
268      Preconditions.checkNotNull(fs);
269      Preconditions.checkNotNull(path);
270      this.fs = fs;
271      this.path = path;
272      return this;
273    }
274
275    public WriterFactory withOutputStream(FSDataOutputStream ostream) {
276      Preconditions.checkNotNull(ostream);
277      this.ostream = ostream;
278      return this;
279    }
280
281    public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) {
282      // Deliberately not checking for null here.
283      this.favoredNodes = favoredNodes;
284      return this;
285    }
286
287    public WriterFactory withFileContext(HFileContext fileContext) {
288      this.fileContext = fileContext;
289      return this;
290    }
291
292    public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) {
293      this.shouldDropBehind = shouldDropBehind;
294      return this;
295    }
296
297    public Writer create() throws IOException {
298      if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
299        throw new AssertionError("Please specify exactly one of " + "filesystem/path or path");
300      }
301      if (path != null) {
302        ostream = HFileWriterImpl.createOutputStream(conf, fs, path, favoredNodes);
303        try {
304          ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction());
305        } catch (UnsupportedOperationException uoe) {
306          LOG.trace("Unable to set drop behind on {}", path, uoe);
307          LOG.debug("Unable to set drop behind on {}", path.getName());
308        }
309      }
310      return new HFileWriterImpl(conf, cacheConf, path, ostream, fileContext);
311    }
312  }
313
314  /** The configuration key for HFile version to use for new files */
315  public static final String FORMAT_VERSION_KEY = "hfile.format.version";
316
317  public static int getFormatVersion(Configuration conf) {
318    int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
319    checkFormatVersion(version);
320    return version;
321  }
322
323  /**
324   * Returns the factory to be used to create {@link HFile} writers. Disables block cache access for
325   * all writers created through the returned factory.
326   */
327  public static final WriterFactory getWriterFactoryNoCache(Configuration conf) {
328    return HFile.getWriterFactory(conf, CacheConfig.DISABLED);
329  }
330
331  /**
332   * Returns the factory to be used to create {@link HFile} writers
333   */
334  public static final WriterFactory getWriterFactory(Configuration conf, CacheConfig cacheConf) {
335    int version = getFormatVersion(conf);
336    switch (version) {
337      case 2:
338        throw new IllegalArgumentException("This should never happen. "
339          + "Did you change hfile.format.version to read v2? This version of the software writes v3"
340          + " hfiles only (but it can read v2 files without having to update hfile.format.version "
341          + "in hbase-site.xml)");
342      case 3:
343        return new HFile.WriterFactory(conf, cacheConf);
344      default:
345        throw new IllegalArgumentException(
346          "Cannot create writer for HFile " + "format version " + version);
347    }
348  }
349
350  /**
351   * An abstraction used by the block index. Implementations will check cache for any asked-for
352   * block and return cached block if found. Otherwise, after reading from fs, will try and put
353   * block into cache before returning.
354   */
355  public interface CachingBlockReader {
356    /**
357     * Read in a file block.
358     * @param offset                    offset to read.
359     * @param onDiskBlockSize           size of the block
360     * @param isCompaction              is this block being read as part of a compaction
361     * @param expectedBlockType         the block type we are expecting to read with this read
362     *                                  operation, or null to read whatever block type is available
363     *                                  and avoid checking (that might reduce caching efficiency of
364     *                                  encoded data blocks)
365     * @param expectedDataBlockEncoding the data block encoding the caller is expecting data blocks
366     *                                  to be in, or null to not perform this check and return the
367     *                                  block irrespective of the encoding. This check only applies
368     *                                  to data blocks and can be set to null when the caller is
369     *                                  expecting to read a non-data block and has set
370     *                                  expectedBlockType accordingly.
371     * @return Block wrapped in a ByteBuffer.
372     */
373    HFileBlock readBlock(long offset, long onDiskBlockSize, boolean cacheBlock, final boolean pread,
374      final boolean isCompaction, final boolean updateCacheMetrics, BlockType expectedBlockType,
375      DataBlockEncoding expectedDataBlockEncoding) throws IOException;
376  }
377
378  /** An interface used by clients to open and iterate an {@link HFile}. */
379  public interface Reader extends Closeable, CachingBlockReader {
380    /**
381     * Returns this reader's "name". Usually the last component of the path. Needs to be constant as
382     * the file is being moved to support caching on write.
383     */
384    String getName();
385
386    CellComparator getComparator();
387
388    HFileScanner getScanner(Configuration conf, boolean cacheBlocks, boolean pread,
389      boolean isCompaction);
390
391    HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;
392
393    Optional<Cell> getLastKey();
394
395    Optional<Cell> midKey() throws IOException;
396
397    long length();
398
399    long getEntries();
400
401    Optional<Cell> getFirstKey();
402
403    long indexSize();
404
405    Optional<byte[]> getFirstRowKey();
406
407    Optional<byte[]> getLastRowKey();
408
409    FixedFileTrailer getTrailer();
410
411    void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader);
412
413    HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader();
414
415    void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader);
416
417    HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader();
418
419    HFileScanner getScanner(Configuration conf, boolean cacheBlocks, boolean pread);
420
421    /**
422     * Retrieves general Bloom filter metadata as appropriate for each {@link HFile} version. Knows
423     * nothing about how that metadata is structured.
424     */
425    DataInput getGeneralBloomFilterMetadata() throws IOException;
426
427    /**
428     * Retrieves delete family Bloom filter metadata as appropriate for each {@link HFile} version.
429     * Knows nothing about how that metadata is structured.
430     */
431    DataInput getDeleteBloomFilterMetadata() throws IOException;
432
433    Path getPath();
434
435    /** Close method with optional evictOnClose */
436    void close(boolean evictOnClose) throws IOException;
437
438    DataBlockEncoding getDataBlockEncoding();
439
440    boolean hasMVCCInfo();
441
442    /**
443     * Return the file context of the HFile this reader belongs to
444     */
445    HFileContext getFileContext();
446
447    boolean isPrimaryReplicaReader();
448
449    DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction);
450
451    HFileBlock.FSReader getUncachedBlockReader();
452
453    boolean prefetchComplete();
454
455    /**
456     * To close the stream's socket. Note: This can be concurrently called from multiple threads and
457     * implementation should take care of thread safety.
458     */
459    void unbufferStream();
460
461    ReaderContext getContext();
462
463    HFileInfo getHFileInfo();
464
465    void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder);
466  }
467
468  /**
469   * Method returns the reader given the specified arguments. TODO This is a bad abstraction. See
470   * HBASE-6635.
471   * @param context   Reader context info
472   * @param fileInfo  HFile info
473   * @param cacheConf Cache configuation values, cannot be null.
474   * @param conf      Configuration
475   * @return an appropriate instance of HFileReader
476   * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
477   */
478  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SF_SWITCH_FALLTHROUGH",
479      justification = "Intentional")
480  public static Reader createReader(ReaderContext context, HFileInfo fileInfo,
481    CacheConfig cacheConf, Configuration conf) throws IOException {
482    try {
483      if (context.getReaderType() == ReaderType.STREAM) {
484        // stream reader will share trailer with pread reader, see HFileStreamReader#copyFields
485        return new HFileStreamReader(context, fileInfo, cacheConf, conf);
486      }
487      FixedFileTrailer trailer = fileInfo.getTrailer();
488      switch (trailer.getMajorVersion()) {
489        case 2:
490          LOG.debug("Opening HFile v2 with v3 reader");
491          // Fall through. FindBugs: SF_SWITCH_FALLTHROUGH
492        case 3:
493          return new HFilePreadReader(context, fileInfo, cacheConf, conf);
494        default:
495          throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());
496      }
497    } catch (Throwable t) {
498      IOUtils.closeQuietly(context.getInputStreamWrapper(),
499        e -> LOG.warn("failed to close input stream wrapper", e));
500      throw new CorruptHFileException(
501        "Problem reading HFile Trailer from file " + context.getFilePath(), t);
502    } finally {
503      context.getInputStreamWrapper().unbuffer();
504    }
505  }
506
507  /**
508   * Creates reader with cache configuration disabled
509   * @param fs   filesystem
510   * @param path Path to file to read
511   * @param conf Configuration
512   * @return an active Reader instance
513   * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile
514   *                     is corrupt/invalid.
515   */
516  public static Reader createReader(FileSystem fs, Path path, Configuration conf)
517    throws IOException {
518    // The primaryReplicaReader is mainly used for constructing block cache key, so if we do not use
519    // block cache then it is OK to set it as any value. We use true here.
520    return createReader(fs, path, CacheConfig.DISABLED, true, conf);
521  }
522
523  /**
524   * @param fs                   filesystem
525   * @param path                 Path to file to read
526   * @param cacheConf            This must not be null.
527   * @param primaryReplicaReader true if this is a reader for primary replica
528   * @param conf                 Configuration
529   * @return an active Reader instance
530   * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile
531   *                     is corrupt/invalid.
532   * @see CacheConfig#CacheConfig(Configuration)
533   */
534  public static Reader createReader(FileSystem fs, Path path, CacheConfig cacheConf,
535    boolean primaryReplicaReader, Configuration conf) throws IOException {
536    Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
537    FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);
538    ReaderContext context =
539      new ReaderContextBuilder().withFilePath(path).withInputStreamWrapper(stream)
540        .withFileSize(fs.getFileStatus(path).getLen()).withFileSystem(stream.getHfs())
541        .withPrimaryReplicaReader(primaryReplicaReader).withReaderType(ReaderType.PREAD).build();
542    HFileInfo fileInfo = new HFileInfo(context, conf);
543    Reader reader = createReader(context, fileInfo, cacheConf, conf);
544    fileInfo.initMetaAndIndex(reader);
545    return reader;
546  }
547
548  /**
549   * Returns true if the specified file has a valid HFile Trailer.
550   * @param fs   filesystem
551   * @param path Path to file to verify
552   * @return true if the file has a valid HFile Trailer, otherwise false
553   * @throws IOException if failed to read from the underlying stream
554   */
555  public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException {
556    return isHFileFormat(fs, fs.getFileStatus(path));
557  }
558
559  /**
560   * Returns true if the specified file has a valid HFile Trailer.
561   * @param fs         filesystem
562   * @param fileStatus the file to verify
563   * @return true if the file has a valid HFile Trailer, otherwise false
564   * @throws IOException if failed to read from the underlying stream
565   */
566  public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus)
567    throws IOException {
568    final Path path = fileStatus.getPath();
569    final long size = fileStatus.getLen();
570    try (FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path)) {
571      boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
572      assert !isHBaseChecksum; // Initially we must read with FS checksum.
573      FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
574      return true;
575    } catch (IllegalArgumentException e) {
576      return false;
577    }
578  }
579
580  /**
581   * Get names of supported compression algorithms. The names are acceptable by HFile.Writer.
582   * @return Array of strings, each represents a supported compression algorithm. Currently, the
583   *         following compression algorithms are supported.
584   *         <ul>
585   *         <li>"none" - No compression.
586   *         <li>"gz" - GZIP compression.
587   *         </ul>
588   */
589  public static String[] getSupportedCompressionAlgorithms() {
590    return Compression.getSupportedAlgorithms();
591  }
592
593  // Utility methods.
594  /*
595   * @param l Long to convert to an int.
596   * @return <code>l</code> cast as an int.
597   */
598  static int longToInt(final long l) {
599    // Expecting the size() of a block not exceeding 4GB. Assuming the
600    // size() will wrap to negative integer if it exceeds 2GB (From tfile).
601    return (int) (l & 0x00000000ffffffffL);
602  }
603
604  /**
605   * Returns all HFiles belonging to the given region directory. Could return an empty list.
606   * @param fs        The file system reference.
607   * @param regionDir The region directory to scan.
608   * @return The list of files found.
609   * @throws IOException When scanning the files fails.
610   */
611  public static List<Path> getStoreFiles(FileSystem fs, Path regionDir) throws IOException {
612    List<Path> regionHFiles = new ArrayList<>();
613    PathFilter dirFilter = new FSUtils.DirFilter(fs);
614    FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
615    for (FileStatus dir : familyDirs) {
616      FileStatus[] files = fs.listStatus(dir.getPath());
617      for (FileStatus file : files) {
618        if (
619          !file.isDirectory()
620            && (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME))
621            && (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))
622        ) {
623          regionHFiles.add(file.getPath());
624        }
625      }
626    }
627    return regionHFiles;
628  }
629
630  /**
631   * Checks the given {@link HFile} format version, and throws an exception if invalid. Note that if
632   * the version number comes from an input file and has not been verified, the caller needs to
633   * re-throw an {@link IOException} to indicate that this is not a software error, but corrupted
634   * input.
635   * @param version an HFile version
636   * @throws IllegalArgumentException if the version is invalid
637   */
638  public static void checkFormatVersion(int version) throws IllegalArgumentException {
639    if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
640      throw new IllegalArgumentException("Invalid HFile version: " + version + " (expected to be "
641        + "between " + MIN_FORMAT_VERSION + " and " + MAX_FORMAT_VERSION + ")");
642    }
643  }
644
645  public static void checkHFileVersion(final Configuration c) {
646    int version = c.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
647    if (version < MAX_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
648      throw new IllegalArgumentException(
649        "The setting for " + FORMAT_VERSION_KEY + " (in your hbase-*.xml files) is " + version
650          + " which does not match " + MAX_FORMAT_VERSION
651          + "; are you running with a configuration from an older or newer hbase install (an "
652          + "incompatible hbase-default.xml or hbase-site.xml on your CLASSPATH)?");
653    }
654  }
655
656  public static void main(String[] args) throws Exception {
657    // delegate to preserve old behavior
658    HFilePrettyPrinter.main(args);
659  }
660}