001/*
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.io.hfile;
020
021import java.io.Closeable;
022import java.io.DataInput;
023import java.io.IOException;
024import java.net.InetSocketAddress;
025import java.util.ArrayList;
026import java.util.List;
027import java.util.Optional;
028import java.util.concurrent.atomic.LongAdder;
029import org.apache.commons.io.IOUtils;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.fs.FSDataOutputStream;
032import org.apache.hadoop.fs.FileStatus;
033import org.apache.hadoop.fs.FileSystem;
034import org.apache.hadoop.fs.Path;
035import org.apache.hadoop.fs.PathFilter;
036import org.apache.hadoop.hbase.Cell;
037import org.apache.hadoop.hbase.CellComparator;
038import org.apache.hadoop.hbase.HConstants;
039import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
040import org.apache.hadoop.hbase.io.MetricsIO;
041import org.apache.hadoop.hbase.io.MetricsIOWrapperImpl;
042import org.apache.hadoop.hbase.io.compress.Compression;
043import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
044import org.apache.hadoop.hbase.io.hfile.ReaderContext.ReaderType;
045import org.apache.hadoop.hbase.regionserver.CellSink;
046import org.apache.hadoop.hbase.regionserver.ShipperListener;
047import org.apache.hadoop.hbase.util.BloomFilterWriter;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.hbase.util.FSUtils;
050import org.apache.hadoop.io.Writable;
051import org.apache.yetus.audience.InterfaceAudience;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
055import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
056
057/**
058 * File format for hbase.
059 * A file of sorted key/value pairs. Both keys and values are byte arrays.
060 * <p>
061 * The memory footprint of a HFile includes the following (below is taken from the
062 * <a
063 * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation
064 * but applies also to HFile):
065 * <ul>
066 * <li>Some constant overhead of reading or writing a compressed block.
067 * <ul>
068 * <li>Each compressed block requires one compression/decompression codec for
069 * I/O.
070 * <li>Temporary space to buffer the key.
071 * <li>Temporary space to buffer the value.
072 * </ul>
073 * <li>HFile index, which is proportional to the total number of Data Blocks.
074 * The total amount of memory needed to hold the index can be estimated as
075 * (56+AvgKeySize)*NumBlocks.
076 * </ul>
077 * Suggestions on performance optimization.
078 * <ul>
079 * <li>Minimum block size. We recommend a setting of minimum block size between
080 * 8KB to 1MB for general usage. Larger block size is preferred if files are
081 * primarily for sequential access. However, it would lead to inefficient random
082 * access (because there are more data to decompress). Smaller blocks are good
083 * for random access, but require more memory to hold the block index, and may
084 * be slower to create (because we must flush the compressor stream at the
085 * conclusion of each data block, which leads to an FS I/O flush). Further, due
086 * to the internal caching in Compression codec, the smallest possible block
087 * size would be around 20KB-30KB.
088 * <li>The current implementation does not offer true multi-threading for
089 * reading. The implementation uses FSDataInputStream seek()+read(), which is
090 * shown to be much faster than positioned-read call in single thread mode.
091 * However, it also means that if multiple threads attempt to access the same
092 * HFile (using multiple scanners) simultaneously, the actual I/O is carried out
093 * sequentially even if they access different DFS blocks (Reexamine! pread seems
094 * to be 10% faster than seek+read in my testing -- stack).
095 * <li>Compression codec. Use "none" if the data is not very compressable (by
096 * compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
097 * as the starting point for experimenting. "gz" overs slightly better
098 * compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
099 * decompress, comparing to "lzo".
100 * </ul>
101 *
102 * For more on the background behind HFile, see <a
103 * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>.
104 * <p>
105 * File is made of data blocks followed by meta data blocks (if any), a fileinfo
106 * block, data block index, meta data block index, and a fixed size trailer
107 * which records the offsets at which file changes content type.
108 * <pre>&lt;data blocks&gt;&lt;meta blocks&gt;&lt;fileinfo&gt;&lt;
109 * data index&gt;&lt;meta index&gt;&lt;trailer&gt;</pre>
110 * Each block has a bit of magic at its start.  Block are comprised of
111 * key/values.  In data blocks, they are both byte arrays.  Metadata blocks are
112 * a String key and a byte array value.  An empty file looks like this:
113 * <pre>&lt;fileinfo&gt;&lt;trailer&gt;</pre>.  That is, there are not data nor meta
114 * blocks present.
115 * <p>
116 * TODO: Do scanners need to be able to take a start and end row?
117 * TODO: Should BlockIndex know the name of its file?  Should it have a Path
118 * that points at its file say for the case where an index lives apart from
119 * an HFile instance?
120 */
121@InterfaceAudience.Private
122public final class HFile {
123  // LOG is being used in HFileBlock and CheckSumUtil
124  static final Logger LOG = LoggerFactory.getLogger(HFile.class);
125
126  /**
127   * Maximum length of key in HFile.
128   */
129  public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
130
131  /**
132   * Default compression: none.
133   */
134  public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
135    Compression.Algorithm.NONE;
136
137  /** Minimum supported HFile format version */
138  public static final int MIN_FORMAT_VERSION = 2;
139
140  /** Maximum supported HFile format version
141   */
142  public static final int MAX_FORMAT_VERSION = 3;
143
144  /**
145   * Minimum HFile format version with support for persisting cell tags
146   */
147  public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3;
148
149  /** Default compression name: none. */
150  public final static String DEFAULT_COMPRESSION =
151    DEFAULT_COMPRESSION_ALGORITHM.getName();
152
153  /** Meta data block name for bloom filter bits. */
154  public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";
155
156  /**
157   * We assume that HFile path ends with
158   * ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at least this
159   * many levels of nesting. This is needed for identifying table and CF name
160   * from an HFile path.
161   */
162  public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;
163
164  /**
165   * The number of bytes per checksum.
166   */
167  public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
168
169  // For measuring number of checksum failures
170  static final LongAdder CHECKSUM_FAILURES = new LongAdder();
171
172  // For tests. Gets incremented when we read a block whether from HDFS or from Cache.
173  public static final LongAdder DATABLOCK_READ_COUNT = new LongAdder();
174
175  /** Static instance for the metrics so that HFileReaders access the same instance */
176  static final MetricsIO metrics = new MetricsIO(new MetricsIOWrapperImpl());
177
178  /**
179   * Shutdown constructor.
180   */
181  private HFile() {}
182
183  /**
184   * Number of checksum verification failures. It also
185   * clears the counter.
186   */
187  public static final long getAndResetChecksumFailuresCount() {
188    return CHECKSUM_FAILURES.sumThenReset();
189  }
190
191  /**
192   * Number of checksum verification failures. It also
193   * clears the counter.
194   */
195  public static final long getChecksumFailuresCount() {
196    return CHECKSUM_FAILURES.sum();
197  }
198
199  public static final void updateReadLatency(long latencyMillis, boolean pread) {
200    if (pread) {
201      metrics.updateFsPreadTime(latencyMillis);
202    } else {
203      metrics.updateFsReadTime(latencyMillis);
204    }
205  }
206
207  public static final void updateWriteLatency(long latencyMillis) {
208    metrics.updateFsWriteTime(latencyMillis);
209  }
210
211  /** API required to write an {@link HFile} */
212  public interface Writer extends Closeable, CellSink, ShipperListener {
213    /** Max memstore (mvcc) timestamp in FileInfo */
214    public static final byte [] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY");
215
216    /** Add an element to the file info map. */
217    void appendFileInfo(byte[] key, byte[] value) throws IOException;
218
219    /** @return the path to this {@link HFile} */
220    Path getPath();
221
222    /**
223     * Adds an inline block writer such as a multi-level block index writer or
224     * a compound Bloom filter writer.
225     */
226    void addInlineBlockWriter(InlineBlockWriter bloomWriter);
227
228    // The below three methods take Writables.  We'd like to undo Writables but undoing the below
229    // would be pretty painful.  Could take a byte [] or a Message but we want to be backward
230    // compatible around hfiles so would need to map between Message and Writable or byte [] and
231    // current Writable serialization.  This would be a bit of work to little gain.  Thats my
232    // thinking at moment.  St.Ack 20121129
233
234    void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);
235
236    /**
237     * Store general Bloom filter in the file. This does not deal with Bloom filter
238     * internals but is necessary, since Bloom filters are stored differently
239     * in HFile version 1 and version 2.
240     */
241    void addGeneralBloomFilter(BloomFilterWriter bfw);
242
243    /**
244     * Store delete family Bloom filter in the file, which is only supported in
245     * HFile V2.
246     */
247    void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;
248
249    /**
250     * Return the file context for the HFile this writer belongs to
251     */
252    HFileContext getFileContext();
253  }
254
255  /**
256   * This variety of ways to construct writers is used throughout the code, and
257   * we want to be able to swap writer implementations.
258   */
259  public static class WriterFactory {
260    protected final Configuration conf;
261    protected final CacheConfig cacheConf;
262    protected FileSystem fs;
263    protected Path path;
264    protected FSDataOutputStream ostream;
265    protected InetSocketAddress[] favoredNodes;
266    private HFileContext fileContext;
267    protected boolean shouldDropBehind = false;
268
269    WriterFactory(Configuration conf, CacheConfig cacheConf) {
270      this.conf = conf;
271      this.cacheConf = cacheConf;
272    }
273
274    public WriterFactory withPath(FileSystem fs, Path path) {
275      Preconditions.checkNotNull(fs);
276      Preconditions.checkNotNull(path);
277      this.fs = fs;
278      this.path = path;
279      return this;
280    }
281
282    public WriterFactory withOutputStream(FSDataOutputStream ostream) {
283      Preconditions.checkNotNull(ostream);
284      this.ostream = ostream;
285      return this;
286    }
287
288    public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) {
289      // Deliberately not checking for null here.
290      this.favoredNodes = favoredNodes;
291      return this;
292    }
293
294    public WriterFactory withFileContext(HFileContext fileContext) {
295      this.fileContext = fileContext;
296      return this;
297    }
298
299    public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) {
300      this.shouldDropBehind = shouldDropBehind;
301      return this;
302    }
303
304
305    public Writer create() throws IOException {
306      if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
307        throw new AssertionError("Please specify exactly one of " +
308            "filesystem/path or path");
309      }
310      if (path != null) {
311        ostream = HFileWriterImpl.createOutputStream(conf, fs, path, favoredNodes);
312        try {
313          ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction());
314        } catch (UnsupportedOperationException uoe) {
315          LOG.trace("Unable to set drop behind on {}", path, uoe);
316          LOG.debug("Unable to set drop behind on {}", path.getName());
317        }
318      }
319      return new HFileWriterImpl(conf, cacheConf, path, ostream, fileContext);
320    }
321  }
322
323  /** The configuration key for HFile version to use for new files */
324  public static final String FORMAT_VERSION_KEY = "hfile.format.version";
325
326  public static int getFormatVersion(Configuration conf) {
327    int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
328    checkFormatVersion(version);
329    return version;
330  }
331
332  /**
333   * Returns the factory to be used to create {@link HFile} writers.
334   * Disables block cache access for all writers created through the
335   * returned factory.
336   */
337  public static final WriterFactory getWriterFactoryNoCache(Configuration
338       conf) {
339    return HFile.getWriterFactory(conf, CacheConfig.DISABLED);
340  }
341
342  /**
343   * Returns the factory to be used to create {@link HFile} writers
344   */
345  public static final WriterFactory getWriterFactory(Configuration conf,
346      CacheConfig cacheConf) {
347    int version = getFormatVersion(conf);
348    switch (version) {
349      case 2:
350        throw new IllegalArgumentException("This should never happen. " +
351          "Did you change hfile.format.version to read v2? This version of the software writes v3" +
352          " hfiles only (but it can read v2 files without having to update hfile.format.version " +
353          "in hbase-site.xml)");
354      case 3:
355        return new HFile.WriterFactory(conf, cacheConf);
356      default:
357        throw new IllegalArgumentException("Cannot create writer for HFile " +
358            "format version " + version);
359    }
360  }
361
362  /**
363   * An abstraction used by the block index.
364   * Implementations will check cache for any asked-for block and return cached block if found.
365   * Otherwise, after reading from fs, will try and put block into cache before returning.
366   */
367  public interface CachingBlockReader {
368    /**
369     * Read in a file block.
370     * @param offset offset to read.
371     * @param onDiskBlockSize size of the block
372     * @param isCompaction is this block being read as part of a compaction
373     * @param expectedBlockType the block type we are expecting to read with this read operation,
374     *   or null to read whatever block type is available and avoid checking (that might reduce
375     *   caching efficiency of encoded data blocks)
376     * @param expectedDataBlockEncoding the data block encoding the caller is expecting data blocks
377     *   to be in, or null to not perform this check and return the block irrespective of the
378     *   encoding. This check only applies to data blocks and can be set to null when the caller is
379     *   expecting to read a non-data block and has set expectedBlockType accordingly.
380     * @return Block wrapped in a ByteBuffer.
381     */
382    HFileBlock readBlock(long offset, long onDiskBlockSize,
383        boolean cacheBlock, final boolean pread, final boolean isCompaction,
384        final boolean updateCacheMetrics, BlockType expectedBlockType,
385        DataBlockEncoding expectedDataBlockEncoding)
386        throws IOException;
387  }
388
389  /** An interface used by clients to open and iterate an {@link HFile}. */
390  public interface Reader extends Closeable, CachingBlockReader {
391    /**
392     * Returns this reader's "name". Usually the last component of the path.
393     * Needs to be constant as the file is being moved to support caching on
394     * write.
395     */
396    String getName();
397
398    CellComparator getComparator();
399
400    HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction);
401
402    HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;
403
404    Optional<Cell> getLastKey();
405
406    Optional<Cell> midKey() throws IOException;
407
408    long length();
409
410    long getEntries();
411
412    Optional<Cell> getFirstKey();
413
414    long indexSize();
415
416    Optional<byte[]> getFirstRowKey();
417
418    Optional<byte[]> getLastRowKey();
419
420    FixedFileTrailer getTrailer();
421
422    void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader);
423    HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader();
424
425    void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader);
426    HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader();
427
428    HFileScanner getScanner(boolean cacheBlocks, boolean pread);
429
430    /**
431     * Retrieves general Bloom filter metadata as appropriate for each
432     * {@link HFile} version.
433     * Knows nothing about how that metadata is structured.
434     */
435    DataInput getGeneralBloomFilterMetadata() throws IOException;
436
437    /**
438     * Retrieves delete family Bloom filter metadata as appropriate for each
439     * {@link HFile}  version.
440     * Knows nothing about how that metadata is structured.
441     */
442    DataInput getDeleteBloomFilterMetadata() throws IOException;
443
444    Path getPath();
445
446    /** Close method with optional evictOnClose */
447    void close(boolean evictOnClose) throws IOException;
448
449    DataBlockEncoding getDataBlockEncoding();
450
451    boolean hasMVCCInfo();
452
453    /**
454     * Return the file context of the HFile this reader belongs to
455     */
456    HFileContext getFileContext();
457
458    boolean isPrimaryReplicaReader();
459
460    DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction);
461
462    @VisibleForTesting
463    HFileBlock.FSReader getUncachedBlockReader();
464
465    @VisibleForTesting
466    boolean prefetchComplete();
467
468    /**
469     * To close the stream's socket. Note: This can be concurrently called from multiple threads and
470     * implementation should take care of thread safety.
471     */
472    void unbufferStream();
473
474    ReaderContext getContext();
475    HFileInfo getHFileInfo();
476    void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder);
477  }
478
479  /**
480   * Method returns the reader given the specified arguments.
481   * TODO This is a bad abstraction.  See HBASE-6635.
482   *
483   * @param context Reader context info
484   * @param fileInfo HFile info
485   * @param cacheConf Cache configuation values, cannot be null.
486   * @param conf Configuration
487   * @return an appropriate instance of HFileReader
488   * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
489   */
490  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SF_SWITCH_FALLTHROUGH",
491      justification="Intentional")
492  public static Reader createReader(ReaderContext context, HFileInfo fileInfo,
493      CacheConfig cacheConf, Configuration conf) throws IOException {
494    try {
495      if (context.getReaderType() == ReaderType.STREAM) {
496        // stream reader will share trailer with pread reader, see HFileStreamReader#copyFields
497        return new HFileStreamReader(context, fileInfo, cacheConf, conf);
498      }
499      FixedFileTrailer trailer = fileInfo.getTrailer();
500      switch (trailer.getMajorVersion()) {
501        case 2:
502          LOG.debug("Opening HFile v2 with v3 reader");
503          // Fall through. FindBugs: SF_SWITCH_FALLTHROUGH
504        case 3:
505          return new HFilePreadReader(context, fileInfo, cacheConf, conf);
506        default:
507          throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());
508      }
509    } catch (Throwable t) {
510      IOUtils.closeQuietly(context.getInputStreamWrapper());
511      throw new CorruptHFileException("Problem reading HFile Trailer from file "
512          + context.getFilePath(), t);
513    } finally {
514      context.getInputStreamWrapper().unbuffer();
515    }
516  }
517
518  /**
519   * Creates reader with cache configuration disabled
520   * @param fs filesystem
521   * @param path Path to file to read
522   * @param conf Configuration
523   * @return an active Reader instance
524   * @throws IOException Will throw a CorruptHFileException
525   *   (DoNotRetryIOException subtype) if hfile is corrupt/invalid.
526   */
527  public static Reader createReader(FileSystem fs, Path path, Configuration conf)
528      throws IOException {
529    // The primaryReplicaReader is mainly used for constructing block cache key, so if we do not use
530    // block cache then it is OK to set it as any value. We use true here.
531    return createReader(fs, path, CacheConfig.DISABLED, true, conf);
532  }
533
534  /**
535   * @param fs filesystem
536   * @param path Path to file to read
537   * @param cacheConf This must not be null. @see
538   *          {@link org.apache.hadoop.hbase.io.hfile.CacheConfig#CacheConfig(Configuration)}
539   * @param primaryReplicaReader true if this is a reader for primary replica
540   * @param conf Configuration
541   * @return an active Reader instance
542   * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile
543   *           is corrupt/invalid.
544   */
545  public static Reader createReader(FileSystem fs, Path path, CacheConfig cacheConf,
546      boolean primaryReplicaReader, Configuration conf) throws IOException {
547    Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
548    FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);
549    ReaderContext context = new ReaderContextBuilder()
550        .withFilePath(path)
551        .withInputStreamWrapper(stream)
552        .withFileSize(fs.getFileStatus(path).getLen())
553        .withFileSystem(stream.getHfs())
554        .withPrimaryReplicaReader(primaryReplicaReader)
555        .withReaderType(ReaderType.PREAD)
556        .build();
557    HFileInfo fileInfo = new HFileInfo(context, conf);
558    Reader reader = createReader(context, fileInfo, cacheConf, conf);
559    fileInfo.initMetaAndIndex(reader);
560    return reader;
561  }
562
563  /**
564   * Returns true if the specified file has a valid HFile Trailer.
565   * @param fs filesystem
566   * @param path Path to file to verify
567   * @return true if the file has a valid HFile Trailer, otherwise false
568   * @throws IOException if failed to read from the underlying stream
569   */
570  public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException {
571    return isHFileFormat(fs, fs.getFileStatus(path));
572  }
573
574  /**
575   * Returns true if the specified file has a valid HFile Trailer.
576   * @param fs filesystem
577   * @param fileStatus the file to verify
578   * @return true if the file has a valid HFile Trailer, otherwise false
579   * @throws IOException if failed to read from the underlying stream
580   */
581  public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus)
582      throws IOException {
583    final Path path = fileStatus.getPath();
584    final long size = fileStatus.getLen();
585    try (FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path)) {
586      boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
587      assert !isHBaseChecksum; // Initially we must read with FS checksum.
588      FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
589      return true;
590    } catch (IllegalArgumentException e) {
591      return false;
592    }
593  }
594
595  /**
596   * Get names of supported compression algorithms. The names are acceptable by
597   * HFile.Writer.
598   *
599   * @return Array of strings, each represents a supported compression
600   *         algorithm. Currently, the following compression algorithms are
601   *         supported.
602   *         <ul>
603   *         <li>"none" - No compression.
604   *         <li>"gz" - GZIP compression.
605   *         </ul>
606   */
607  public static String[] getSupportedCompressionAlgorithms() {
608    return Compression.getSupportedAlgorithms();
609  }
610
611  // Utility methods.
612  /*
613   * @param l Long to convert to an int.
614   * @return <code>l</code> cast as an int.
615   */
616  static int longToInt(final long l) {
617    // Expecting the size() of a block not exceeding 4GB. Assuming the
618    // size() will wrap to negative integer if it exceeds 2GB (From tfile).
619    return (int)(l & 0x00000000ffffffffL);
620  }
621
622  /**
623   * Returns all HFiles belonging to the given region directory. Could return an
624   * empty list.
625   *
626   * @param fs  The file system reference.
627   * @param regionDir  The region directory to scan.
628   * @return The list of files found.
629   * @throws IOException When scanning the files fails.
630   */
631  public static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
632      throws IOException {
633    List<Path> regionHFiles = new ArrayList<>();
634    PathFilter dirFilter = new FSUtils.DirFilter(fs);
635    FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
636    for(FileStatus dir : familyDirs) {
637      FileStatus[] files = fs.listStatus(dir.getPath());
638      for (FileStatus file : files) {
639        if (!file.isDirectory() &&
640            (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) &&
641            (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))) {
642          regionHFiles.add(file.getPath());
643        }
644      }
645    }
646    return regionHFiles;
647  }
648
649  /**
650   * Checks the given {@link HFile} format version, and throws an exception if
651   * invalid. Note that if the version number comes from an input file and has
652   * not been verified, the caller needs to re-throw an {@link IOException} to
653   * indicate that this is not a software error, but corrupted input.
654   *
655   * @param version an HFile version
656   * @throws IllegalArgumentException if the version is invalid
657   */
658  public static void checkFormatVersion(int version)
659      throws IllegalArgumentException {
660    if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
661      throw new IllegalArgumentException("Invalid HFile version: " + version
662          + " (expected to be " + "between " + MIN_FORMAT_VERSION + " and "
663          + MAX_FORMAT_VERSION + ")");
664    }
665  }
666
667
668  public static void checkHFileVersion(final Configuration c) {
669    int version = c.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
670    if (version < MAX_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
671      throw new IllegalArgumentException("The setting for " + FORMAT_VERSION_KEY +
672        " (in your hbase-*.xml files) is " + version + " which does not match " +
673        MAX_FORMAT_VERSION +
674        "; are you running with a configuration from an older or newer hbase install (an " +
675        "incompatible hbase-default.xml or hbase-site.xml on your CLASSPATH)?");
676    }
677  }
678
679  public static void main(String[] args) throws Exception {
680    // delegate to preserve old behavior
681    HFilePrettyPrinter.main(args);
682  }
683}