001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import java.io.Closeable;
021import java.io.DataInput;
022import java.io.IOException;
023import java.net.InetSocketAddress;
024import java.util.ArrayList;
025import java.util.List;
026import java.util.Optional;
027import java.util.concurrent.atomic.LongAdder;
028import org.apache.commons.io.IOUtils;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FSDataOutputStream;
031import org.apache.hadoop.fs.FileStatus;
032import org.apache.hadoop.fs.FileSystem;
033import org.apache.hadoop.fs.Path;
034import org.apache.hadoop.fs.PathFilter;
035import org.apache.hadoop.hbase.CellComparator;
036import org.apache.hadoop.hbase.ExtendedCell;
037import org.apache.hadoop.hbase.HConstants;
038import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
039import org.apache.hadoop.hbase.io.MetricsIO;
040import org.apache.hadoop.hbase.io.compress.Compression;
041import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
042import org.apache.hadoop.hbase.io.hfile.ReaderContext.ReaderType;
043import org.apache.hadoop.hbase.ipc.RpcServer;
044import org.apache.hadoop.hbase.regionserver.CellSink;
045import org.apache.hadoop.hbase.regionserver.ShipperListener;
046import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
047import org.apache.hadoop.hbase.util.BloomFilterWriter;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.hbase.util.FSUtils;
050import org.apache.hadoop.io.Writable;
051import org.apache.yetus.audience.InterfaceAudience;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054
055import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
056
057/**
058 * File format for hbase. A file of sorted key/value pairs. Both keys and values are byte arrays.
059 * <p>
060 * The memory footprint of a HFile includes the following (below is taken from the <a
061 * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation but applies also
062 * to HFile):
063 * <ul>
064 * <li>Some constant overhead of reading or writing a compressed block.
065 * <ul>
066 * <li>Each compressed block requires one compression/decompression codec for I/O.
067 * <li>Temporary space to buffer the key.
068 * <li>Temporary space to buffer the value.
069 * </ul>
070 * <li>HFile index, which is proportional to the total number of Data Blocks. The total amount of
071 * memory needed to hold the index can be estimated as (56+AvgKeySize)*NumBlocks.
072 * </ul>
073 * Suggestions on performance optimization.
074 * <ul>
075 * <li>Minimum block size. We recommend a setting of minimum block size between 8KB to 1MB for
076 * general usage. Larger block size is preferred if files are primarily for sequential access.
077 * However, it would lead to inefficient random access (because there are more data to decompress).
078 * Smaller blocks are good for random access, but require more memory to hold the block index, and
079 * may be slower to create (because we must flush the compressor stream at the conclusion of each
080 * data block, which leads to an FS I/O flush). Further, due to the internal caching in Compression
081 * codec, the smallest possible block size would be around 20KB-30KB.
082 * <li>The current implementation does not offer true multi-threading for reading. The
083 * implementation uses FSDataInputStream seek()+read(), which is shown to be much faster than
084 * positioned-read call in single thread mode. However, it also means that if multiple threads
085 * attempt to access the same HFile (using multiple scanners) simultaneously, the actual I/O is
086 * carried out sequentially even if they access different DFS blocks (Reexamine! pread seems to be
087 * 10% faster than seek+read in my testing -- stack).
088 * <li>Compression codec. Use "none" if the data is not very compressable (by compressable, I mean a
089 * compression ratio at least 2:1). Generally, use "lzo" as the starting point for experimenting.
090 * "gz" overs slightly better compression ratio over "lzo" but requires 4x CPU to compress and 2x
091 * CPU to decompress, comparing to "lzo".
092 * </ul>
093 * For more on the background behind HFile, see <a
094 * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>.
095 * <p>
096 * File is made of data blocks followed by meta data blocks (if any), a fileinfo block, data block
097 * index, meta data block index, and a fixed size trailer which records the offsets at which file
098 * changes content type.
099 *
100 * <pre>
101 * &lt;data blocks&gt;&lt;meta blocks&gt;&lt;fileinfo&gt;&lt;
102 * data index&gt;&lt;meta index&gt;&lt;trailer&gt;
103 * </pre>
104 *
105 * Each block has a bit of magic at its start. Block are comprised of key/values. In data blocks,
106 * they are both byte arrays. Metadata blocks are a String key and a byte array value. An empty file
107 * looks like this:
108 *
109 * <pre>
110 * &lt;fileinfo&gt;&lt;trailer&gt;
111 * </pre>
112 *
113 * . That is, there are not data nor meta blocks present.
114 * <p>
115 * TODO: Do scanners need to be able to take a start and end row? TODO: Should BlockIndex know the
116 * name of its file? Should it have a Path that points at its file say for the case where an index
117 * lives apart from an HFile instance?
118 */
119@InterfaceAudience.Private
120public final class HFile {
121  // LOG is being used in HFileBlock and CheckSumUtil
122  static final Logger LOG = LoggerFactory.getLogger(HFile.class);
123
124  /**
125   * Maximum length of key in HFile.
126   */
127  public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
128
129  /**
130   * Default compression: none.
131   */
132  public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
133    Compression.Algorithm.NONE;
134
135  /** Minimum supported HFile format version */
136  public static final int MIN_FORMAT_VERSION = 2;
137
138  /**
139   * Maximum supported HFile format version
140   */
141  public static final int MAX_FORMAT_VERSION = 3;
142
143  /**
144   * Minimum HFile format version with support for persisting cell tags
145   */
146  public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3;
147
148  /** Default compression name: none. */
149  public final static String DEFAULT_COMPRESSION = DEFAULT_COMPRESSION_ALGORITHM.getName();
150
151  /** Meta data block name for bloom filter bits. */
152  public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";
153
154  /**
155   * We assume that HFile path ends with ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at
156   * least this many levels of nesting. This is needed for identifying table and CF name from an
157   * HFile path.
158   */
159  public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;
160
161  /**
162   * The number of bytes per checksum.
163   */
164  public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
165
166  // For measuring number of checksum failures
167  static final LongAdder CHECKSUM_FAILURES = new LongAdder();
168
169  // For tests. Gets incremented when we read a block whether from HDFS or from Cache.
170  public static final LongAdder DATABLOCK_READ_COUNT = new LongAdder();
171
172  /**
173   * Shutdown constructor.
174   */
175  private HFile() {
176  }
177
178  /**
179   * Number of checksum verification failures. It also clears the counter.
180   */
181  public static final long getAndResetChecksumFailuresCount() {
182    return CHECKSUM_FAILURES.sumThenReset();
183  }
184
185  /**
186   * Number of checksum verification failures. It also clears the counter.
187   */
188  public static final long getChecksumFailuresCount() {
189    return CHECKSUM_FAILURES.sum();
190  }
191
192  public static final void updateReadLatency(long latencyMillis, boolean pread, boolean tooSlow) {
193    RpcServer.getCurrentCall().ifPresent(call -> call.updateFsReadTime(latencyMillis));
194    if (pread) {
195      MetricsIO.getInstance().updateFsPreadTime(latencyMillis);
196    } else {
197      MetricsIO.getInstance().updateFsReadTime(latencyMillis);
198    }
199    if (tooSlow) {
200      MetricsIO.getInstance().incrSlowFsRead();
201    }
202  }
203
204  public static final void updateWriteLatency(long latencyMillis) {
205    MetricsIO.getInstance().updateFsWriteTime(latencyMillis);
206  }
207
208  /** API required to write an {@link HFile} */
209  public interface Writer extends Closeable, CellSink, ShipperListener {
210    /** Max memstore (mvcc) timestamp in FileInfo */
211    public static final byte[] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY");
212
213    /** Add an element to the file info map. */
214    void appendFileInfo(byte[] key, byte[] value) throws IOException;
215
216    /**
217     * Add TimestampRange and earliest put timestamp to Metadata
218     */
219    void appendTrackedTimestampsToMetadata() throws IOException;
220
221    /**
222     * Add Custom cell timestamp to Metadata
223     */
224    public void appendCustomCellTimestampsToMetadata(TimeRangeTracker timeRangeTracker)
225      throws IOException;
226
227    /** Returns the path to this {@link HFile} */
228    Path getPath();
229
230    /**
231     * Adds an inline block writer such as a multi-level block index writer or a compound Bloom
232     * filter writer.
233     */
234    void addInlineBlockWriter(InlineBlockWriter bloomWriter);
235
236    // The below three methods take Writables. We'd like to undo Writables but undoing the below
237    // would be pretty painful. Could take a byte [] or a Message but we want to be backward
238    // compatible around hfiles so would need to map between Message and Writable or byte [] and
239    // current Writable serialization. This would be a bit of work to little gain. Thats my
240    // thinking at moment. St.Ack 20121129
241
242    void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);
243
244    /**
245     * Store general Bloom filter in the file. This does not deal with Bloom filter internals but is
246     * necessary, since Bloom filters are stored differently in HFile version 1 and version 2.
247     */
248    void addGeneralBloomFilter(BloomFilterWriter bfw);
249
250    /**
251     * Store delete family Bloom filter in the file, which is only supported in HFile V2.
252     */
253    void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;
254
255    /**
256     * Return the file context for the HFile this writer belongs to
257     */
258    HFileContext getFileContext();
259  }
260
261  /**
262   * This variety of ways to construct writers is used throughout the code, and we want to be able
263   * to swap writer implementations.
264   */
265  public static class WriterFactory {
266    protected final Configuration conf;
267    protected final CacheConfig cacheConf;
268    protected FileSystem fs;
269    protected Path path;
270    protected FSDataOutputStream ostream;
271    protected InetSocketAddress[] favoredNodes;
272    private HFileContext fileContext;
273    protected boolean shouldDropBehind = false;
274
275    WriterFactory(Configuration conf, CacheConfig cacheConf) {
276      this.conf = conf;
277      this.cacheConf = cacheConf;
278    }
279
280    public WriterFactory withPath(FileSystem fs, Path path) {
281      Preconditions.checkNotNull(fs);
282      Preconditions.checkNotNull(path);
283      this.fs = fs;
284      this.path = path;
285      return this;
286    }
287
288    public WriterFactory withOutputStream(FSDataOutputStream ostream) {
289      Preconditions.checkNotNull(ostream);
290      this.ostream = ostream;
291      return this;
292    }
293
294    public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) {
295      // Deliberately not checking for null here.
296      this.favoredNodes = favoredNodes;
297      return this;
298    }
299
300    public WriterFactory withFileContext(HFileContext fileContext) {
301      this.fileContext = fileContext;
302      return this;
303    }
304
305    public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) {
306      this.shouldDropBehind = shouldDropBehind;
307      return this;
308    }
309
310    public Writer create() throws IOException {
311      if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
312        throw new AssertionError("Please specify exactly one of " + "filesystem/path or path");
313      }
314      if (path != null) {
315        ostream = HFileWriterImpl.createOutputStream(conf, fs, path, favoredNodes);
316        try {
317          ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction());
318        } catch (UnsupportedOperationException uoe) {
319          LOG.trace("Unable to set drop behind on {}", path, uoe);
320          LOG.debug("Unable to set drop behind on {}", path.getName());
321        }
322      }
323      return new HFileWriterImpl(conf, cacheConf, path, ostream, fileContext);
324    }
325  }
326
327  /** The configuration key for HFile version to use for new files */
328  public static final String FORMAT_VERSION_KEY = "hfile.format.version";
329
330  public static int getFormatVersion(Configuration conf) {
331    int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
332    checkFormatVersion(version);
333    return version;
334  }
335
336  /**
337   * Returns the factory to be used to create {@link HFile} writers. Disables block cache access for
338   * all writers created through the returned factory.
339   */
340  public static final WriterFactory getWriterFactoryNoCache(Configuration conf) {
341    return HFile.getWriterFactory(conf, CacheConfig.DISABLED);
342  }
343
344  /**
345   * Returns the factory to be used to create {@link HFile} writers
346   */
347  public static final WriterFactory getWriterFactory(Configuration conf, CacheConfig cacheConf) {
348    int version = getFormatVersion(conf);
349    switch (version) {
350      case 2:
351        throw new IllegalArgumentException("This should never happen. "
352          + "Did you change hfile.format.version to read v2? This version of the software writes v3"
353          + " hfiles only (but it can read v2 files without having to update hfile.format.version "
354          + "in hbase-site.xml)");
355      case 3:
356        return new HFile.WriterFactory(conf, cacheConf);
357      default:
358        throw new IllegalArgumentException(
359          "Cannot create writer for HFile " + "format version " + version);
360    }
361  }
362
363  /**
364   * An abstraction used by the block index. Implementations will check cache for any asked-for
365   * block and return cached block if found. Otherwise, after reading from fs, will try and put
366   * block into cache before returning.
367   */
368  public interface CachingBlockReader {
369    /**
370     * Read in a file block.
371     * @param offset                    offset to read.
372     * @param onDiskBlockSize           size of the block
373     * @param isCompaction              is this block being read as part of a compaction
374     * @param expectedBlockType         the block type we are expecting to read with this read
375     *                                  operation, or null to read whatever block type is available
376     *                                  and avoid checking (that might reduce caching efficiency of
377     *                                  encoded data blocks)
378     * @param expectedDataBlockEncoding the data block encoding the caller is expecting data blocks
379     *                                  to be in, or null to not perform this check and return the
380     *                                  block irrespective of the encoding. This check only applies
381     *                                  to data blocks and can be set to null when the caller is
382     *                                  expecting to read a non-data block and has set
383     *                                  expectedBlockType accordingly.
384     * @return Block wrapped in a ByteBuffer.
385     */
386    HFileBlock readBlock(long offset, long onDiskBlockSize, boolean cacheBlock, final boolean pread,
387      final boolean isCompaction, final boolean updateCacheMetrics, BlockType expectedBlockType,
388      DataBlockEncoding expectedDataBlockEncoding) throws IOException;
389
390    HFileBlock readBlock(long offset, long onDiskBlockSize, boolean cacheBlock, final boolean pread,
391      final boolean isCompaction, final boolean updateCacheMetrics, BlockType expectedBlockType,
392      DataBlockEncoding expectedDataBlockEncoding, boolean cacheOnly) throws IOException;
393  }
394
395  /** An interface used by clients to open and iterate an {@link HFile}. */
396  public interface Reader extends Closeable, CachingBlockReader {
397    /**
398     * Returns this reader's "name". Usually the last component of the path. Needs to be constant as
399     * the file is being moved to support caching on write.
400     */
401    String getName();
402
403    CellComparator getComparator();
404
405    HFileScanner getScanner(Configuration conf, boolean cacheBlocks, boolean pread,
406      boolean isCompaction);
407
408    HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;
409
410    Optional<ExtendedCell> getLastKey();
411
412    Optional<ExtendedCell> midKey() throws IOException;
413
414    long length();
415
416    long getEntries();
417
418    Optional<ExtendedCell> getFirstKey();
419
420    long indexSize();
421
422    Optional<byte[]> getFirstRowKey();
423
424    Optional<byte[]> getLastRowKey();
425
426    FixedFileTrailer getTrailer();
427
428    void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader);
429
430    HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader();
431
432    void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader);
433
434    HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader();
435
436    HFileScanner getScanner(Configuration conf, boolean cacheBlocks, boolean pread);
437
438    /**
439     * Retrieves general Bloom filter metadata as appropriate for each {@link HFile} version. Knows
440     * nothing about how that metadata is structured.
441     */
442    DataInput getGeneralBloomFilterMetadata() throws IOException;
443
444    /**
445     * Retrieves delete family Bloom filter metadata as appropriate for each {@link HFile} version.
446     * Knows nothing about how that metadata is structured.
447     */
448    DataInput getDeleteBloomFilterMetadata() throws IOException;
449
450    Path getPath();
451
452    /** Close method with optional evictOnClose */
453    void close(boolean evictOnClose) throws IOException;
454
455    DataBlockEncoding getDataBlockEncoding();
456
457    boolean hasMVCCInfo();
458
459    /**
460     * Return the file context of the HFile this reader belongs to
461     */
462    HFileContext getFileContext();
463
464    boolean isPrimaryReplicaReader();
465
466    DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction);
467
468    HFileBlock.FSReader getUncachedBlockReader();
469
470    boolean prefetchComplete();
471
472    boolean prefetchStarted();
473
474    /**
475     * To close the stream's socket. Note: This can be concurrently called from multiple threads and
476     * implementation should take care of thread safety.
477     */
478    void unbufferStream();
479
480    ReaderContext getContext();
481
482    HFileInfo getHFileInfo();
483
484    void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder);
485  }
486
487  /**
488   * Method returns the reader given the specified arguments. TODO This is a bad abstraction. See
489   * HBASE-6635.
490   * @param context   Reader context info
491   * @param fileInfo  HFile info
492   * @param cacheConf Cache configuation values, cannot be null.
493   * @param conf      Configuration
494   * @return an appropriate instance of HFileReader
495   * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
496   */
497  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SF_SWITCH_FALLTHROUGH",
498      justification = "Intentional")
499  public static Reader createReader(ReaderContext context, HFileInfo fileInfo,
500    CacheConfig cacheConf, Configuration conf) throws IOException {
501    try {
502      if (context.getReaderType() == ReaderType.STREAM) {
503        // stream reader will share trailer with pread reader, see HFileStreamReader#copyFields
504        return new HFileStreamReader(context, fileInfo, cacheConf, conf);
505      }
506      FixedFileTrailer trailer = fileInfo.getTrailer();
507      switch (trailer.getMajorVersion()) {
508        case 2:
509          LOG.debug("Opening HFile v2 with v3 reader");
510          // Fall through. FindBugs: SF_SWITCH_FALLTHROUGH
511        case 3:
512          return new HFilePreadReader(context, fileInfo, cacheConf, conf);
513        default:
514          throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());
515      }
516    } catch (Throwable t) {
517      IOUtils.closeQuietly(context.getInputStreamWrapper(),
518        e -> LOG.warn("failed to close input stream wrapper", e));
519      throw new CorruptHFileException(
520        "Problem reading HFile Trailer from file " + context.getFilePath(), t);
521    } finally {
522      context.getInputStreamWrapper().unbuffer();
523    }
524  }
525
526  /**
527   * Creates reader with cache configuration disabled
528   * @param fs   filesystem
529   * @param path Path to file to read
530   * @param conf Configuration
531   * @return an active Reader instance
532   * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile
533   *                     is corrupt/invalid.
534   */
535  public static Reader createReader(FileSystem fs, Path path, Configuration conf)
536    throws IOException {
537    // The primaryReplicaReader is mainly used for constructing block cache key, so if we do not use
538    // block cache then it is OK to set it as any value. We use true here.
539    return createReader(fs, path, CacheConfig.DISABLED, true, conf);
540  }
541
542  /**
543   * @param fs                   filesystem
544   * @param path                 Path to file to read
545   * @param cacheConf            This must not be null.
546   * @param primaryReplicaReader true if this is a reader for primary replica
547   * @param conf                 Configuration
548   * @return an active Reader instance
549   * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile
550   *                     is corrupt/invalid.
551   * @see CacheConfig#CacheConfig(Configuration)
552   */
553  public static Reader createReader(FileSystem fs, Path path, CacheConfig cacheConf,
554    boolean primaryReplicaReader, Configuration conf) throws IOException {
555    Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
556    FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);
557    ReaderContext context =
558      new ReaderContextBuilder().withFilePath(path).withInputStreamWrapper(stream)
559        .withFileSize(fs.getFileStatus(path).getLen()).withFileSystem(stream.getHfs())
560        .withPrimaryReplicaReader(primaryReplicaReader).withReaderType(ReaderType.PREAD).build();
561    HFileInfo fileInfo = new HFileInfo(context, conf);
562    Reader reader = createReader(context, fileInfo, cacheConf, conf);
563    fileInfo.initMetaAndIndex(reader);
564    return reader;
565  }
566
567  /**
568   * Returns true if the specified file has a valid HFile Trailer.
569   * @param fs   filesystem
570   * @param path Path to file to verify
571   * @return true if the file has a valid HFile Trailer, otherwise false
572   * @throws IOException if failed to read from the underlying stream
573   */
574  public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException {
575    return isHFileFormat(fs, fs.getFileStatus(path));
576  }
577
578  /**
579   * Returns true if the specified file has a valid HFile Trailer.
580   * @param fs         filesystem
581   * @param fileStatus the file to verify
582   * @return true if the file has a valid HFile Trailer, otherwise false
583   * @throws IOException if failed to read from the underlying stream
584   */
585  public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus)
586    throws IOException {
587    final Path path = fileStatus.getPath();
588    final long size = fileStatus.getLen();
589    try (FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path)) {
590      boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
591      assert !isHBaseChecksum; // Initially we must read with FS checksum.
592      FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
593      return true;
594    } catch (IllegalArgumentException e) {
595      return false;
596    }
597  }
598
599  /**
600   * Get names of supported compression algorithms. The names are acceptable by HFile.Writer.
601   * @return Array of strings, each represents a supported compression algorithm. Currently, the
602   *         following compression algorithms are supported.
603   *         <ul>
604   *         <li>"none" - No compression.
605   *         <li>"gz" - GZIP compression.
606   *         </ul>
607   */
608  public static String[] getSupportedCompressionAlgorithms() {
609    return Compression.getSupportedAlgorithms();
610  }
611
612  // Utility methods.
613  /*
614   * @param l Long to convert to an int.
615   * @return <code>l</code> cast as an int.
616   */
617  static int longToInt(final long l) {
618    // Expecting the size() of a block not exceeding 4GB. Assuming the
619    // size() will wrap to negative integer if it exceeds 2GB (From tfile).
620    return (int) (l & 0x00000000ffffffffL);
621  }
622
623  /**
624   * Returns all HFiles belonging to the given region directory. Could return an empty list.
625   * @param fs        The file system reference.
626   * @param regionDir The region directory to scan.
627   * @return The list of files found.
628   * @throws IOException When scanning the files fails.
629   */
630  public static List<Path> getStoreFiles(FileSystem fs, Path regionDir) throws IOException {
631    List<Path> regionHFiles = new ArrayList<>();
632    PathFilter dirFilter = new FSUtils.DirFilter(fs);
633    FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
634    for (FileStatus dir : familyDirs) {
635      FileStatus[] files = fs.listStatus(dir.getPath());
636      for (FileStatus file : files) {
637        if (
638          !file.isDirectory()
639            && (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME))
640            && (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))
641        ) {
642          regionHFiles.add(file.getPath());
643        }
644      }
645    }
646    return regionHFiles;
647  }
648
649  /**
650   * Checks the given {@link HFile} format version, and throws an exception if invalid. Note that if
651   * the version number comes from an input file and has not been verified, the caller needs to
652   * re-throw an {@link IOException} to indicate that this is not a software error, but corrupted
653   * input.
654   * @param version an HFile version
655   * @throws IllegalArgumentException if the version is invalid
656   */
657  public static void checkFormatVersion(int version) throws IllegalArgumentException {
658    if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
659      throw new IllegalArgumentException("Invalid HFile version: " + version + " (expected to be "
660        + "between " + MIN_FORMAT_VERSION + " and " + MAX_FORMAT_VERSION + ")");
661    }
662  }
663
664  public static void checkHFileVersion(final Configuration c) {
665    int version = c.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
666    if (version < MAX_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
667      throw new IllegalArgumentException(
668        "The setting for " + FORMAT_VERSION_KEY + " (in your hbase-*.xml files) is " + version
669          + " which does not match " + MAX_FORMAT_VERSION
670          + "; are you running with a configuration from an older or newer hbase install (an "
671          + "incompatible hbase-default.xml or hbase-site.xml on your CLASSPATH)?");
672    }
673  }
674
675  public static void main(String[] args) throws Exception {
676    // delegate to preserve old behavior
677    HFilePrettyPrinter.main(args);
678  }
679}