View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.Closeable;
23  import java.io.DataInput;
24  import java.io.DataInputStream;
25  import java.io.DataOutputStream;
26  import java.io.IOException;
27  import java.io.SequenceInputStream;
28  import java.net.InetSocketAddress;
29  import java.util.ArrayList;
30  import java.util.Collection;
31  import java.util.Comparator;
32  import java.util.List;
33  import java.util.Map;
34  import java.util.Set;
35  import java.util.SortedMap;
36  import java.util.TreeMap;
37  
38  import org.apache.hadoop.hbase.util.ByteStringer;
39  import org.apache.commons.logging.Log;
40  import org.apache.commons.logging.LogFactory;
41  import org.apache.hadoop.hbase.classification.InterfaceAudience;
42  import org.apache.hadoop.conf.Configuration;
43  import org.apache.hadoop.fs.FSDataInputStream;
44  import org.apache.hadoop.fs.FSDataOutputStream;
45  import org.apache.hadoop.fs.FileStatus;
46  import org.apache.hadoop.fs.FileSystem;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.fs.PathFilter;
49  import org.apache.hadoop.hbase.Cell;
50  import org.apache.hadoop.hbase.CellComparator;
51  import org.apache.hadoop.hbase.HConstants;
52  import org.apache.hadoop.hbase.fs.HFileSystem;
53  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
54  import org.apache.hadoop.hbase.io.compress.Compression;
55  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
56  import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
57  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
58  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
59  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
60  import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
61  import org.apache.hadoop.hbase.util.BloomFilterWriter;
62  import org.apache.hadoop.hbase.util.Bytes;
63  import org.apache.hadoop.hbase.util.Counter;
64  import org.apache.hadoop.hbase.util.FSUtils;
65  import org.apache.hadoop.io.Writable;
66  
67  import com.google.common.annotations.VisibleForTesting;
68  import com.google.common.base.Preconditions;
69  
70  /**
71   * File format for hbase.
72   * A file of sorted key/value pairs. Both keys and values are byte arrays.
73   * <p>
74   * The memory footprint of a HFile includes the following (below is taken from the
75   * <a
76   * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation
77   * but applies also to HFile):
78   * <ul>
79   * <li>Some constant overhead of reading or writing a compressed block.
80   * <ul>
81   * <li>Each compressed block requires one compression/decompression codec for
82   * I/O.
83   * <li>Temporary space to buffer the key.
84   * <li>Temporary space to buffer the value.
85   * </ul>
86   * <li>HFile index, which is proportional to the total number of Data Blocks.
87   * The total amount of memory needed to hold the index can be estimated as
88   * (56+AvgKeySize)*NumBlocks.
89   * </ul>
90   * Suggestions on performance optimization.
91   * <ul>
92   * <li>Minimum block size. We recommend a setting of minimum block size between
93   * 8KB to 1MB for general usage. Larger block size is preferred if files are
94   * primarily for sequential access. However, it would lead to inefficient random
95   * access (because there are more data to decompress). Smaller blocks are good
96   * for random access, but require more memory to hold the block index, and may
97   * be slower to create (because we must flush the compressor stream at the
98   * conclusion of each data block, which leads to an FS I/O flush). Further, due
99   * to the internal caching in Compression codec, the smallest possible block
100  * size would be around 20KB-30KB.
101  * <li>The current implementation does not offer true multi-threading for
102  * reading. The implementation uses FSDataInputStream seek()+read(), which is
103  * shown to be much faster than positioned-read call in single thread mode.
104  * However, it also means that if multiple threads attempt to access the same
105  * HFile (using multiple scanners) simultaneously, the actual I/O is carried out
106  * sequentially even if they access different DFS blocks (Reexamine! pread seems
107  * to be 10% faster than seek+read in my testing -- stack).
108  * <li>Compression codec. Use "none" if the data is not very compressable (by
109  * compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
110  * as the starting point for experimenting. "gz" overs slightly better
111  * compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
112  * decompress, comparing to "lzo".
113  * </ul>
114  *
115  * For more on the background behind HFile, see <a
116  * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>.
117  * <p>
118  * File is made of data blocks followed by meta data blocks (if any), a fileinfo
119  * block, data block index, meta data block index, and a fixed size trailer
120  * which records the offsets at which file changes content type.
121  * <pre>&lt;data blocks&gt;&lt;meta blocks&gt;&lt;fileinfo&gt;&lt;
122  * data index&gt;&lt;meta index&gt;&lt;trailer&gt;</pre>
123  * Each block has a bit of magic at its start.  Block are comprised of
124  * key/values.  In data blocks, they are both byte arrays.  Metadata blocks are
125  * a String key and a byte array value.  An empty file looks like this:
126  * <pre>&lt;fileinfo&gt;&lt;trailer&gt;</pre>.  That is, there are not data nor meta
127  * blocks present.
128  * <p>
129  * TODO: Do scanners need to be able to take a start and end row?
130  * TODO: Should BlockIndex know the name of its file?  Should it have a Path
131  * that points at its file say for the case where an index lives apart from
132  * an HFile instance?
133  */
134 @InterfaceAudience.Private
135 public class HFile {
136   // LOG is being used in HFileBlock and CheckSumUtil
137   static final Log LOG = LogFactory.getLog(HFile.class);
138 
139   /**
140    * Maximum length of key in HFile.
141    */
142   public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
143 
144   /**
145    * Default compression: none.
146    */
147   public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
148     Compression.Algorithm.NONE;
149 
150   /** Minimum supported HFile format version */
151   public static final int MIN_FORMAT_VERSION = 2;
152 
153   /** Maximum supported HFile format version
154    */
155   public static final int MAX_FORMAT_VERSION = 3;
156 
157   /**
158    * Minimum HFile format version with support for persisting cell tags
159    */
160   public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3;
161 
162   /** Default compression name: none. */
163   public final static String DEFAULT_COMPRESSION =
164     DEFAULT_COMPRESSION_ALGORITHM.getName();
165 
166   /** Meta data block name for bloom filter bits. */
167   public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";
168 
169   /**
170    * We assume that HFile path ends with
171    * ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at least this
172    * many levels of nesting. This is needed for identifying table and CF name
173    * from an HFile path.
174    */
175   public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;
176 
177   /**
178    * The number of bytes per checksum.
179    */
180   public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
181 
182   // For measuring number of checksum failures
183   static final Counter CHECKSUM_FAILURES = new Counter();
184 
185   // For tests. Gets incremented when we read a block whether from HDFS or from Cache.
186   public static final Counter DATABLOCK_READ_COUNT = new Counter();
187 
188   /**
189    * Number of checksum verification failures. It also
190    * clears the counter.
191    */
192   public static final long getChecksumFailuresCount() {
193     long count = CHECKSUM_FAILURES.get();
194     CHECKSUM_FAILURES.set(0);
195     return count;
196   }
197 
198   /** API required to write an {@link HFile} */
199   public interface Writer extends Closeable {
200     /** Max memstore (mvcc) timestamp in FileInfo */
201     public static final byte [] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY");
202 
203     /** Add an element to the file info map. */
204     void appendFileInfo(byte[] key, byte[] value) throws IOException;
205 
206     void append(Cell cell) throws IOException;
207 
208     /** @return the path to this {@link HFile} */
209     Path getPath();
210 
211     /**
212      * Adds an inline block writer such as a multi-level block index writer or
213      * a compound Bloom filter writer.
214      */
215     void addInlineBlockWriter(InlineBlockWriter bloomWriter);
216 
217     // The below three methods take Writables.  We'd like to undo Writables but undoing the below would be pretty
218     // painful.  Could take a byte [] or a Message but we want to be backward compatible around hfiles so would need
219     // to map between Message and Writable or byte [] and current Writable serialization.  This would be a bit of work
220     // to little gain.  Thats my thinking at moment.  St.Ack 20121129
221 
222     void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);
223 
224     /**
225      * Store general Bloom filter in the file. This does not deal with Bloom filter
226      * internals but is necessary, since Bloom filters are stored differently
227      * in HFile version 1 and version 2.
228      */
229     void addGeneralBloomFilter(BloomFilterWriter bfw);
230 
231     /**
232      * Store delete family Bloom filter in the file, which is only supported in
233      * HFile V2.
234      */
235     void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;
236 
237     /**
238      * Return the file context for the HFile this writer belongs to
239      */
240     HFileContext getFileContext();
241   }
242 
243   /**
244    * This variety of ways to construct writers is used throughout the code, and
245    * we want to be able to swap writer implementations.
246    */
247   public static class WriterFactory {
248     protected final Configuration conf;
249     protected final CacheConfig cacheConf;
250     protected FileSystem fs;
251     protected Path path;
252     protected FSDataOutputStream ostream;
253     protected CellComparator comparator = 
254         CellComparator.COMPARATOR;
255     protected InetSocketAddress[] favoredNodes;
256     private HFileContext fileContext;
257     protected boolean shouldDropBehind = false;
258 
259     WriterFactory(Configuration conf, CacheConfig cacheConf) {
260       this.conf = conf;
261       this.cacheConf = cacheConf;
262     }
263 
264     public WriterFactory withPath(FileSystem fs, Path path) {
265       Preconditions.checkNotNull(fs);
266       Preconditions.checkNotNull(path);
267       this.fs = fs;
268       this.path = path;
269       return this;
270     }
271 
272     public WriterFactory withOutputStream(FSDataOutputStream ostream) {
273       Preconditions.checkNotNull(ostream);
274       this.ostream = ostream;
275       return this;
276     }
277 
278     public WriterFactory withComparator(CellComparator comparator) {
279       Preconditions.checkNotNull(comparator);
280       this.comparator = comparator;
281       return this;
282     }
283 
284     public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) {
285       // Deliberately not checking for null here.
286       this.favoredNodes = favoredNodes;
287       return this;
288     }
289 
290     public WriterFactory withFileContext(HFileContext fileContext) {
291       this.fileContext = fileContext;
292       return this;
293     }
294 
295     public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) {
296       this.shouldDropBehind = shouldDropBehind;
297       return this;
298     }
299 
300 
301     public Writer create() throws IOException {
302       if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
303         throw new AssertionError("Please specify exactly one of " +
304             "filesystem/path or path");
305       }
306       if (path != null) {
307         ostream = HFileWriterImpl.createOutputStream(conf, fs, path, favoredNodes);
308         try {
309           ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction());
310         } catch (UnsupportedOperationException uoe) {
311           if (LOG.isTraceEnabled()) LOG.trace("Unable to set drop behind on " + path, uoe);
312           else if (LOG.isDebugEnabled()) LOG.debug("Unable to set drop behind on " + path);
313         }
314       }
315       return new HFileWriterImpl(conf, cacheConf, path, ostream, comparator, fileContext);
316     }
317   }
318 
319   /** The configuration key for HFile version to use for new files */
320   public static final String FORMAT_VERSION_KEY = "hfile.format.version";
321 
322   public static int getFormatVersion(Configuration conf) {
323     int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
324     checkFormatVersion(version);
325     return version;
326   }
327 
328   /**
329    * Returns the factory to be used to create {@link HFile} writers.
330    * Disables block cache access for all writers created through the
331    * returned factory.
332    */
333   public static final WriterFactory getWriterFactoryNoCache(Configuration
334        conf) {
335     Configuration tempConf = new Configuration(conf);
336     tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
337     return HFile.getWriterFactory(conf, new CacheConfig(tempConf));
338   }
339 
340   /**
341    * Returns the factory to be used to create {@link HFile} writers
342    */
343   public static final WriterFactory getWriterFactory(Configuration conf,
344       CacheConfig cacheConf) {
345     int version = getFormatVersion(conf);
346     switch (version) {
347     case 2:
348       throw new IllegalArgumentException("This should never happen. " +
349         "Did you change hfile.format.version to read v2? This version of the software writes v3" +
350         " hfiles only (but it can read v2 files without having to update hfile.format.version " +
351         "in hbase-site.xml)");
352     case 3:
353       return new HFile.WriterFactory(conf, cacheConf);
354     default:
355       throw new IllegalArgumentException("Cannot create writer for HFile " +
356           "format version " + version);
357     }
358   }
359 
360   /**
361    * An abstraction used by the block index.
362    * Implementations will check cache for any asked-for block and return cached block if found.
363    * Otherwise, after reading from fs, will try and put block into cache before returning.
364    */
365   public interface CachingBlockReader {
366     /**
367      * Read in a file block.
368      * @param offset offset to read.
369      * @param onDiskBlockSize size of the block
370      * @param cacheBlock
371      * @param pread
372      * @param isCompaction is this block being read as part of a compaction
373      * @param expectedBlockType the block type we are expecting to read with this read operation,
374      *  or null to read whatever block type is available and avoid checking (that might reduce
375      *  caching efficiency of encoded data blocks)
376      * @param expectedDataBlockEncoding the data block encoding the caller is expecting data blocks
377      *  to be in, or null to not perform this check and return the block irrespective of the
378      *  encoding. This check only applies to data blocks and can be set to null when the caller is
379      *  expecting to read a non-data block and has set expectedBlockType accordingly.
380      * @return Block wrapped in a ByteBuffer.
381      * @throws IOException
382      */
383     HFileBlock readBlock(long offset, long onDiskBlockSize,
384         boolean cacheBlock, final boolean pread, final boolean isCompaction,
385         final boolean updateCacheMetrics, BlockType expectedBlockType,
386         DataBlockEncoding expectedDataBlockEncoding)
387         throws IOException;
388 
389     /**
390      * Return the given block back to the cache, if it was obtained from cache.
391      * @param block Block to be returned.
392      */
393     void returnBlock(HFileBlock block);
394   }
395 
396   /** An interface used by clients to open and iterate an {@link HFile}. */
397   public interface Reader extends Closeable, CachingBlockReader {
398     /**
399      * Returns this reader's "name". Usually the last component of the path.
400      * Needs to be constant as the file is being moved to support caching on
401      * write.
402      */
403     String getName();
404 
405     CellComparator getComparator();
406 
407     HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction);
408 
409     HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;
410 
411     Map<byte[], byte[]> loadFileInfo() throws IOException;
412 
413     Cell getLastKey();
414 
415     Cell midkey() throws IOException;
416 
417     long length();
418 
419     long getEntries();
420 
421     Cell getFirstKey();
422 
423     long indexSize();
424 
425     byte[] getFirstRowKey();
426 
427     byte[] getLastRowKey();
428 
429     FixedFileTrailer getTrailer();
430 
431     HFileBlockIndex.BlockIndexReader getDataBlockIndexReader();
432 
433     HFileScanner getScanner(boolean cacheBlocks, boolean pread);
434 
435     Compression.Algorithm getCompressionAlgorithm();
436 
437     /**
438      * Retrieves general Bloom filter metadata as appropriate for each
439      * {@link HFile} version.
440      * Knows nothing about how that metadata is structured.
441      */
442     DataInput getGeneralBloomFilterMetadata() throws IOException;
443 
444     /**
445      * Retrieves delete family Bloom filter metadata as appropriate for each
446      * {@link HFile}  version.
447      * Knows nothing about how that metadata is structured.
448      */
449     DataInput getDeleteBloomFilterMetadata() throws IOException;
450 
451     Path getPath();
452 
453     /** Close method with optional evictOnClose */
454     void close(boolean evictOnClose) throws IOException;
455 
456     DataBlockEncoding getDataBlockEncoding();
457 
458     boolean hasMVCCInfo();
459 
460     /**
461      * Return the file context of the HFile this reader belongs to
462      */
463     HFileContext getFileContext();
464     
465     boolean isPrimaryReplicaReader();
466     
467     void setPrimaryReplicaReader(boolean isPrimaryReplicaReader);
468 
469     boolean shouldIncludeMemstoreTS();
470 
471     boolean isDecodeMemstoreTS();
472 
473     DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction);
474 
475     @VisibleForTesting
476     HFileBlock.FSReader getUncachedBlockReader();
477 
478     @VisibleForTesting
479     boolean prefetchComplete();
480   }
481 
482   /**
483    * Method returns the reader given the specified arguments.
484    * TODO This is a bad abstraction.  See HBASE-6635.
485    *
486    * @param path hfile's path
487    * @param fsdis stream of path's file
488    * @param size max size of the trailer.
489    * @param cacheConf Cache configuation values, cannot be null.
490    * @param hfs
491    * @return an appropriate instance of HFileReader
492    * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
493    */
494   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SF_SWITCH_FALLTHROUGH",
495       justification="Intentional")
496   private static Reader pickReaderVersion(Path path, FSDataInputStreamWrapper fsdis,
497       long size, CacheConfig cacheConf, HFileSystem hfs, Configuration conf) throws IOException {
498     FixedFileTrailer trailer = null;
499     try {
500       boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
501       assert !isHBaseChecksum; // Initially we must read with FS checksum.
502       trailer = FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
503       switch (trailer.getMajorVersion()) {
504       case 2:
505         LOG.debug("Opening HFile v2 with v3 reader");
506         // Fall through. FindBugs: SF_SWITCH_FALLTHROUGH
507       case 3 :
508         return new HFileReaderImpl(path, trailer, fsdis, size, cacheConf, hfs, conf);
509       default:
510         throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());
511       }
512     } catch (Throwable t) {
513       try {
514         fsdis.close();
515       } catch (Throwable t2) {
516         LOG.warn("Error closing fsdis FSDataInputStreamWrapper", t2);
517       }
518       throw new CorruptHFileException("Problem reading HFile Trailer from file " + path, t);
519     }
520   }
521 
522   /**
523    * @param fs A file system
524    * @param path Path to HFile
525    * @param fsdis a stream of path's file
526    * @param size max size of the trailer.
527    * @param cacheConf Cache configuration for hfile's contents
528    * @param conf Configuration
529    * @return A version specific Hfile Reader
530    * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
531    */
532   @SuppressWarnings("resource")
533   public static Reader createReader(FileSystem fs, Path path,
534       FSDataInputStreamWrapper fsdis, long size, CacheConfig cacheConf, Configuration conf)
535       throws IOException {
536     HFileSystem hfs = null;
537 
538     // If the fs is not an instance of HFileSystem, then create an
539     // instance of HFileSystem that wraps over the specified fs.
540     // In this case, we will not be able to avoid checksumming inside
541     // the filesystem.
542     if (!(fs instanceof HFileSystem)) {
543       hfs = new HFileSystem(fs);
544     } else {
545       hfs = (HFileSystem)fs;
546     }
547     return pickReaderVersion(path, fsdis, size, cacheConf, hfs, conf);
548   }
549 
550   /**
551    *
552    * @param fs filesystem
553    * @param path Path to file to read
554    * @param cacheConf This must not be null.  @see {@link org.apache.hadoop.hbase.io.hfile.CacheConfig#CacheConfig(Configuration)}
555    * @return an active Reader instance
556    * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile is corrupt/invalid.
557    */
558   public static Reader createReader(
559       FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf) throws IOException {
560     Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
561     FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);
562     return pickReaderVersion(path, stream, fs.getFileStatus(path).getLen(),
563       cacheConf, stream.getHfs(), conf);
564   }
565 
566   /**
567    * This factory method is used only by unit tests
568    */
569   static Reader createReaderFromStream(Path path,
570       FSDataInputStream fsdis, long size, CacheConfig cacheConf, Configuration conf)
571       throws IOException {
572     FSDataInputStreamWrapper wrapper = new FSDataInputStreamWrapper(fsdis);
573     return pickReaderVersion(path, wrapper, size, cacheConf, null, conf);
574   }
575 
576   /**
577    * Returns true if the specified file has a valid HFile Trailer.
578    * @param fs filesystem
579    * @param path Path to file to verify
580    * @return true if the file has a valid HFile Trailer, otherwise false
581    * @throws IOException if failed to read from the underlying stream
582    */
583   public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException {
584     return isHFileFormat(fs, fs.getFileStatus(path));
585   }
586 
587   /**
588    * Returns true if the specified file has a valid HFile Trailer.
589    * @param fs filesystem
590    * @param fileStatus the file to verify
591    * @return true if the file has a valid HFile Trailer, otherwise false
592    * @throws IOException if failed to read from the underlying stream
593    */
594   public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus)
595       throws IOException {
596     final Path path = fileStatus.getPath();
597     final long size = fileStatus.getLen();
598     FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path);
599     try {
600       boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
601       assert !isHBaseChecksum; // Initially we must read with FS checksum.
602       FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
603       return true;
604     } catch (IllegalArgumentException e) {
605       return false;
606     } catch (IOException e) {
607       throw e;
608     } finally {
609       try {
610         fsdis.close();
611       } catch (Throwable t) {
612         LOG.warn("Error closing fsdis FSDataInputStreamWrapper: " + path, t);
613       }
614     }
615   }
616 
617   /**
618    * Metadata for this file. Conjured by the writer. Read in by the reader.
619    */
620   public static class FileInfo implements SortedMap<byte[], byte[]> {
621     static final String RESERVED_PREFIX = "hfile.";
622     static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
623     static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
624     static final byte [] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
625     static final byte [] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
626     static final byte [] CREATE_TIME_TS = Bytes.toBytes(RESERVED_PREFIX + "CREATE_TIME_TS");
627     static final byte [] COMPARATOR = Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR");
628     static final byte [] TAGS_COMPRESSED = Bytes.toBytes(RESERVED_PREFIX + "TAGS_COMPRESSED");
629     public static final byte [] MAX_TAGS_LEN = Bytes.toBytes(RESERVED_PREFIX + "MAX_TAGS_LEN");
630     private final SortedMap<byte [], byte []> map = new TreeMap<byte [], byte []>(Bytes.BYTES_COMPARATOR);
631 
632     public FileInfo() {
633       super();
634     }
635 
636     /**
637      * Append the given key/value pair to the file info, optionally checking the
638      * key prefix.
639      *
640      * @param k key to add
641      * @param v value to add
642      * @param checkPrefix whether to check that the provided key does not start
643      *          with the reserved prefix
644      * @return this file info object
645      * @throws IOException if the key or value is invalid
646      */
647     public FileInfo append(final byte[] k, final byte[] v,
648         final boolean checkPrefix) throws IOException {
649       if (k == null || v == null) {
650         throw new NullPointerException("Key nor value may be null");
651       }
652       if (checkPrefix && isReservedFileInfoKey(k)) {
653         throw new IOException("Keys with a " + FileInfo.RESERVED_PREFIX
654             + " are reserved");
655       }
656       put(k, v);
657       return this;
658     }
659 
660     public void clear() {
661       this.map.clear();
662     }
663 
664     public Comparator<? super byte[]> comparator() {
665       return map.comparator();
666     }
667 
668     public boolean containsKey(Object key) {
669       return map.containsKey(key);
670     }
671 
672     public boolean containsValue(Object value) {
673       return map.containsValue(value);
674     }
675 
676     public Set<java.util.Map.Entry<byte[], byte[]>> entrySet() {
677       return map.entrySet();
678     }
679 
680     public boolean equals(Object o) {
681       return map.equals(o);
682     }
683 
684     public byte[] firstKey() {
685       return map.firstKey();
686     }
687 
688     public byte[] get(Object key) {
689       return map.get(key);
690     }
691 
692     public int hashCode() {
693       return map.hashCode();
694     }
695 
696     public SortedMap<byte[], byte[]> headMap(byte[] toKey) {
697       return this.map.headMap(toKey);
698     }
699 
700     public boolean isEmpty() {
701       return map.isEmpty();
702     }
703 
704     public Set<byte[]> keySet() {
705       return map.keySet();
706     }
707 
708     public byte[] lastKey() {
709       return map.lastKey();
710     }
711 
712     public byte[] put(byte[] key, byte[] value) {
713       return this.map.put(key, value);
714     }
715 
716     public void putAll(Map<? extends byte[], ? extends byte[]> m) {
717       this.map.putAll(m);
718     }
719 
720     public byte[] remove(Object key) {
721       return this.map.remove(key);
722     }
723 
724     public int size() {
725       return map.size();
726     }
727 
728     public SortedMap<byte[], byte[]> subMap(byte[] fromKey, byte[] toKey) {
729       return this.map.subMap(fromKey, toKey);
730     }
731 
732     public SortedMap<byte[], byte[]> tailMap(byte[] fromKey) {
733       return this.map.tailMap(fromKey);
734     }
735 
736     public Collection<byte[]> values() {
737       return map.values();
738     }
739 
740     /**
741      * Write out this instance on the passed in <code>out</code> stream.
742      * We write it as a protobuf.
743      * @param out
744      * @throws IOException
745      * @see #read(DataInputStream)
746      */
747     void write(final DataOutputStream out) throws IOException {
748       HFileProtos.FileInfoProto.Builder builder = HFileProtos.FileInfoProto.newBuilder();
749       for (Map.Entry<byte [], byte[]> e: this.map.entrySet()) {
750         HBaseProtos.BytesBytesPair.Builder bbpBuilder = HBaseProtos.BytesBytesPair.newBuilder();
751         bbpBuilder.setFirst(ByteStringer.wrap(e.getKey()));
752         bbpBuilder.setSecond(ByteStringer.wrap(e.getValue()));
753         builder.addMapEntry(bbpBuilder.build());
754       }
755       out.write(ProtobufMagic.PB_MAGIC);
756       builder.build().writeDelimitedTo(out);
757     }
758 
759     /**
760      * Populate this instance with what we find on the passed in <code>in</code> stream.
761      * Can deserialize protobuf of old Writables format.
762      * @param in
763      * @throws IOException
764      * @see #write(DataOutputStream)
765      */
766     void read(final DataInputStream in) throws IOException {
767       // This code is tested over in TestHFileReaderV1 where we read an old hfile w/ this new code.
768       int pblen = ProtobufUtil.lengthOfPBMagic();
769       byte [] pbuf = new byte[pblen];
770       if (in.markSupported()) in.mark(pblen);
771       int read = in.read(pbuf);
772       if (read != pblen) throw new IOException("read=" + read + ", wanted=" + pblen);
773       if (ProtobufUtil.isPBMagicPrefix(pbuf)) {
774         parsePB(HFileProtos.FileInfoProto.parseDelimitedFrom(in));
775       } else {
776         if (in.markSupported()) {
777           in.reset();
778           parseWritable(in);
779         } else {
780           // We cannot use BufferedInputStream, it consumes more than we read from the underlying IS
781           ByteArrayInputStream bais = new ByteArrayInputStream(pbuf);
782           SequenceInputStream sis = new SequenceInputStream(bais, in); // Concatenate input streams
783           // TODO: Am I leaking anything here wrapping the passed in stream?  We are not calling close on the wrapped
784           // streams but they should be let go after we leave this context?  I see that we keep a reference to the
785           // passed in inputstream but since we no longer have a reference to this after we leave, we should be ok.
786           parseWritable(new DataInputStream(sis));
787         }
788       }
789     }
790 
791     /** Now parse the old Writable format.  It was a list of Map entries.  Each map entry was a key and a value of
792      * a byte [].  The old map format had a byte before each entry that held a code which was short for the key or
793      * value type.  We know it was a byte [] so in below we just read and dump it.
794      * @throws IOException
795      */
796     void parseWritable(final DataInputStream in) throws IOException {
797       // First clear the map.  Otherwise we will just accumulate entries every time this method is called.
798       this.map.clear();
799       // Read the number of entries in the map
800       int entries = in.readInt();
801       // Then read each key/value pair
802       for (int i = 0; i < entries; i++) {
803         byte [] key = Bytes.readByteArray(in);
804         // We used to read a byte that encoded the class type.  Read and ignore it because it is always byte [] in hfile
805         in.readByte();
806         byte [] value = Bytes.readByteArray(in);
807         this.map.put(key, value);
808       }
809     }
810 
811     /**
812      * Fill our map with content of the pb we read off disk
813      * @param fip protobuf message to read
814      */
815     void parsePB(final HFileProtos.FileInfoProto fip) {
816       this.map.clear();
817       for (BytesBytesPair pair: fip.getMapEntryList()) {
818         this.map.put(pair.getFirst().toByteArray(), pair.getSecond().toByteArray());
819       }
820     }
821   }
822 
823   /** Return true if the given file info key is reserved for internal use. */
824   public static boolean isReservedFileInfoKey(byte[] key) {
825     return Bytes.startsWith(key, FileInfo.RESERVED_PREFIX_BYTES);
826   }
827 
828   /**
829    * Get names of supported compression algorithms. The names are acceptable by
830    * HFile.Writer.
831    *
832    * @return Array of strings, each represents a supported compression
833    *         algorithm. Currently, the following compression algorithms are
834    *         supported.
835    *         <ul>
836    *         <li>"none" - No compression.
837    *         <li>"gz" - GZIP compression.
838    *         </ul>
839    */
840   public static String[] getSupportedCompressionAlgorithms() {
841     return Compression.getSupportedAlgorithms();
842   }
843 
844   // Utility methods.
845   /*
846    * @param l Long to convert to an int.
847    * @return <code>l</code> cast as an int.
848    */
849   static int longToInt(final long l) {
850     // Expecting the size() of a block not exceeding 4GB. Assuming the
851     // size() will wrap to negative integer if it exceeds 2GB (From tfile).
852     return (int)(l & 0x00000000ffffffffL);
853   }
854 
855   /**
856    * Returns all HFiles belonging to the given region directory. Could return an
857    * empty list.
858    *
859    * @param fs  The file system reference.
860    * @param regionDir  The region directory to scan.
861    * @return The list of files found.
862    * @throws IOException When scanning the files fails.
863    */
864   static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
865       throws IOException {
866     List<Path> regionHFiles = new ArrayList<Path>();
867     PathFilter dirFilter = new FSUtils.DirFilter(fs);
868     FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
869     for(FileStatus dir : familyDirs) {
870       FileStatus[] files = fs.listStatus(dir.getPath());
871       for (FileStatus file : files) {
872         if (!file.isDirectory() &&
873             (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) &&
874             (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))) {
875           regionHFiles.add(file.getPath());
876         }
877       }
878     }
879     return regionHFiles;
880   }
881 
882   /**
883    * Checks the given {@link HFile} format version, and throws an exception if
884    * invalid. Note that if the version number comes from an input file and has
885    * not been verified, the caller needs to re-throw an {@link IOException} to
886    * indicate that this is not a software error, but corrupted input.
887    *
888    * @param version an HFile version
889    * @throws IllegalArgumentException if the version is invalid
890    */
891   public static void checkFormatVersion(int version)
892       throws IllegalArgumentException {
893     if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
894       throw new IllegalArgumentException("Invalid HFile version: " + version
895           + " (expected to be " + "between " + MIN_FORMAT_VERSION + " and "
896           + MAX_FORMAT_VERSION + ")");
897     }
898   }
899 
900 
901   public static void checkHFileVersion(final Configuration c) {
902     int version = c.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
903     if (version < MAX_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
904       throw new IllegalArgumentException("The setting for " + FORMAT_VERSION_KEY +
905         " (in your hbase-*.xml files) is " + version + " which does not match " +
906         MAX_FORMAT_VERSION +
907         "; are you running with a configuration from an older or newer hbase install (an " +
908         "incompatible hbase-default.xml or hbase-site.xml on your CLASSPATH)?");
909     }
910   }
911 
912   public static void main(String[] args) throws Exception {
913     // delegate to preserve old behavior
914     HFilePrettyPrinter.main(args);
915   }
916 }