View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.Closeable;
23  import java.io.DataInput;
24  import java.io.DataInputStream;
25  import java.io.DataOutputStream;
26  import java.io.IOException;
27  import java.io.SequenceInputStream;
28  import java.net.InetSocketAddress;
29  import java.nio.ByteBuffer;
30  import java.util.ArrayList;
31  import java.util.Collection;
32  import java.util.Comparator;
33  import java.util.List;
34  import java.util.Map;
35  import java.util.Set;
36  import java.util.SortedMap;
37  import java.util.TreeMap;
38  import java.util.concurrent.atomic.AtomicLong;
39  
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.hbase.classification.InterfaceAudience;
43  import org.apache.hadoop.conf.Configuration;
44  import org.apache.hadoop.fs.FSDataInputStream;
45  import org.apache.hadoop.fs.FSDataOutputStream;
46  import org.apache.hadoop.fs.FileStatus;
47  import org.apache.hadoop.fs.FileSystem;
48  import org.apache.hadoop.fs.Path;
49  import org.apache.hadoop.fs.PathFilter;
50  import org.apache.hadoop.hbase.Cell;
51  import org.apache.hadoop.hbase.HConstants;
52  import org.apache.hadoop.hbase.KeyValue;
53  import org.apache.hadoop.hbase.KeyValue.KVComparator;
54  import org.apache.hadoop.hbase.fs.HFileSystem;
55  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
56  import org.apache.hadoop.hbase.io.compress.Compression;
57  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
58  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
59  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
60  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
61  import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
62  import org.apache.hadoop.hbase.util.BloomFilterWriter;
63  import org.apache.hadoop.hbase.util.ByteStringer;
64  import org.apache.hadoop.hbase.util.Bytes;
65  import org.apache.hadoop.hbase.util.ChecksumType;
66  import org.apache.hadoop.hbase.util.FSUtils;
67  import org.apache.hadoop.io.Writable;
68  
69  import com.google.common.base.Preconditions;
70  
71  /**
72   * File format for hbase.
73   * A file of sorted key/value pairs. Both keys and values are byte arrays.
74   * <p>
75   * The memory footprint of a HFile includes the following (below is taken from the
76   * <a
77   * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation
78   * but applies also to HFile):
79   * <ul>
80   * <li>Some constant overhead of reading or writing a compressed block.
81   * <ul>
82   * <li>Each compressed block requires one compression/decompression codec for
83   * I/O.
84   * <li>Temporary space to buffer the key.
85   * <li>Temporary space to buffer the value.
86   * </ul>
87   * <li>HFile index, which is proportional to the total number of Data Blocks.
88   * The total amount of memory needed to hold the index can be estimated as
89   * (56+AvgKeySize)*NumBlocks.
90   * </ul>
91   * Suggestions on performance optimization.
92   * <ul>
93   * <li>Minimum block size. We recommend a setting of minimum block size between
94   * 8KB to 1MB for general usage. Larger block size is preferred if files are
95   * primarily for sequential access. However, it would lead to inefficient random
96   * access (because there are more data to decompress). Smaller blocks are good
97   * for random access, but require more memory to hold the block index, and may
98   * be slower to create (because we must flush the compressor stream at the
99   * conclusion of each data block, which leads to an FS I/O flush). Further, due
100  * to the internal caching in Compression codec, the smallest possible block
101  * size would be around 20KB-30KB.
102  * <li>The current implementation does not offer true multi-threading for
103  * reading. The implementation uses FSDataInputStream seek()+read(), which is
104  * shown to be much faster than positioned-read call in single thread mode.
105  * However, it also means that if multiple threads attempt to access the same
106  * HFile (using multiple scanners) simultaneously, the actual I/O is carried out
107  * sequentially even if they access different DFS blocks (Reexamine! pread seems
108  * to be 10% faster than seek+read in my testing -- stack).
109  * <li>Compression codec. Use "none" if the data is not very compressable (by
110  * compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
111  * as the starting point for experimenting. "gz" overs slightly better
112  * compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
113  * decompress, comparing to "lzo".
114  * </ul>
115  *
116  * For more on the background behind HFile, see <a
117  * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>.
118  * <p>
119  * File is made of data blocks followed by meta data blocks (if any), a fileinfo
120  * block, data block index, meta data block index, and a fixed size trailer
121  * which records the offsets at which file changes content type.
122  * <pre>&lt;data blocks>&lt;meta blocks>&lt;fileinfo>&lt;data index>&lt;meta index>&lt;trailer></pre>
123  * Each block has a bit of magic at its start.  Block are comprised of
124  * key/values.  In data blocks, they are both byte arrays.  Metadata blocks are
125  * a String key and a byte array value.  An empty file looks like this:
126  * <pre>&lt;fileinfo>&lt;trailer></pre>.  That is, there are not data nor meta
127  * blocks present.
128  * <p>
129  * TODO: Do scanners need to be able to take a start and end row?
130  * TODO: Should BlockIndex know the name of its file?  Should it have a Path
131  * that points at its file say for the case where an index lives apart from
132  * an HFile instance?
133  */
134 @InterfaceAudience.Private
135 public class HFile {
136   static final Log LOG = LogFactory.getLog(HFile.class);
137 
138   /**
139    * Maximum length of key in HFile.
140    */
141   public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
142 
143   /**
144    * Default compression: none.
145    */
146   public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
147     Compression.Algorithm.NONE;
148 
149   /** Minimum supported HFile format version */
150   public static final int MIN_FORMAT_VERSION = 2;
151 
152   /** Maximum supported HFile format version
153    */
154   public static final int MAX_FORMAT_VERSION = 3;
155 
156   /**
157    * Minimum HFile format version with support for persisting cell tags
158    */
159   public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3;
160 
161   /** Default compression name: none. */
162   public final static String DEFAULT_COMPRESSION =
163     DEFAULT_COMPRESSION_ALGORITHM.getName();
164 
165   /** Meta data block name for bloom filter bits. */
166   public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";
167 
168   /**
169    * We assume that HFile path ends with
170    * ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at least this
171    * many levels of nesting. This is needed for identifying table and CF name
172    * from an HFile path.
173    */
174   public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;
175 
176   /**
177    * The number of bytes per checksum.
178    */
179   public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
180   // TODO: This define is done in three places.  Fix.
181   public static final ChecksumType DEFAULT_CHECKSUM_TYPE = ChecksumType.CRC32;
182 
183   // For measuring number of checksum failures
184   static final AtomicLong checksumFailures = new AtomicLong();
185 
186   // for test purpose
187   public static final AtomicLong dataBlockReadCnt = new AtomicLong(0);
188 
189   /**
190    * Number of checksum verification failures. It also
191    * clears the counter.
192    */
193   public static final long getChecksumFailuresCount() {
194     return checksumFailures.getAndSet(0);
195   }
196 
197   /** API required to write an {@link HFile} */
198   public interface Writer extends Closeable {
199 
200     /** Add an element to the file info map. */
201     void appendFileInfo(byte[] key, byte[] value) throws IOException;
202 
203     void append(Cell cell) throws IOException;
204 
205     /** @return the path to this {@link HFile} */
206     Path getPath();
207 
208     /**
209      * Adds an inline block writer such as a multi-level block index writer or
210      * a compound Bloom filter writer.
211      */
212     void addInlineBlockWriter(InlineBlockWriter bloomWriter);
213 
214     // The below three methods take Writables.  We'd like to undo Writables but undoing the below would be pretty
215     // painful.  Could take a byte [] or a Message but we want to be backward compatible around hfiles so would need
216     // to map between Message and Writable or byte [] and current Writable serialization.  This would be a bit of work
217     // to little gain.  Thats my thinking at moment.  St.Ack 20121129
218 
219     void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);
220 
221     /**
222      * Store general Bloom filter in the file. This does not deal with Bloom filter
223      * internals but is necessary, since Bloom filters are stored differently
224      * in HFile version 1 and version 2.
225      */
226     void addGeneralBloomFilter(BloomFilterWriter bfw);
227 
228     /**
229      * Store delete family Bloom filter in the file, which is only supported in
230      * HFile V2.
231      */
232     void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;
233 
234     /**
235      * Return the file context for the HFile this writer belongs to
236      */
237     HFileContext getFileContext();
238   }
239 
240   /**
241    * This variety of ways to construct writers is used throughout the code, and
242    * we want to be able to swap writer implementations.
243    */
244   public static abstract class WriterFactory {
245     protected final Configuration conf;
246     protected final CacheConfig cacheConf;
247     protected FileSystem fs;
248     protected Path path;
249     protected FSDataOutputStream ostream;
250     protected KVComparator comparator = KeyValue.COMPARATOR;
251     protected InetSocketAddress[] favoredNodes;
252     private HFileContext fileContext;
253 
254     WriterFactory(Configuration conf, CacheConfig cacheConf) {
255       this.conf = conf;
256       this.cacheConf = cacheConf;
257     }
258 
259     public WriterFactory withPath(FileSystem fs, Path path) {
260       Preconditions.checkNotNull(fs);
261       Preconditions.checkNotNull(path);
262       this.fs = fs;
263       this.path = path;
264       return this;
265     }
266 
267     public WriterFactory withOutputStream(FSDataOutputStream ostream) {
268       Preconditions.checkNotNull(ostream);
269       this.ostream = ostream;
270       return this;
271     }
272 
273     public WriterFactory withComparator(KVComparator comparator) {
274       Preconditions.checkNotNull(comparator);
275       this.comparator = comparator;
276       return this;
277     }
278 
279     public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) {
280       // Deliberately not checking for null here.
281       this.favoredNodes = favoredNodes;
282       return this;
283     }
284 
285     public WriterFactory withFileContext(HFileContext fileContext) {
286       this.fileContext = fileContext;
287       return this;
288     }
289 
290     public Writer create() throws IOException {
291       if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
292         throw new AssertionError("Please specify exactly one of " +
293             "filesystem/path or path");
294       }
295       if (path != null) {
296         ostream = AbstractHFileWriter.createOutputStream(conf, fs, path, favoredNodes);
297       }
298       return createWriter(fs, path, ostream,
299                    comparator, fileContext);
300     }
301 
302     protected abstract Writer createWriter(FileSystem fs, Path path, FSDataOutputStream ostream,
303         KVComparator comparator, HFileContext fileContext) throws IOException;
304   }
305 
306   /** The configuration key for HFile version to use for new files */
307   public static final String FORMAT_VERSION_KEY = "hfile.format.version";
308 
309   public static int getFormatVersion(Configuration conf) {
310     int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
311     checkFormatVersion(version);
312     return version;
313   }
314 
315   /**
316    * Returns the factory to be used to create {@link HFile} writers.
317    * Disables block cache access for all writers created through the
318    * returned factory.
319    */
320   public static final WriterFactory getWriterFactoryNoCache(Configuration
321        conf) {
322     Configuration tempConf = new Configuration(conf);
323     tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
324     return HFile.getWriterFactory(conf, new CacheConfig(tempConf));
325   }
326 
327   /**
328    * Returns the factory to be used to create {@link HFile} writers
329    */
330   public static final WriterFactory getWriterFactory(Configuration conf,
331       CacheConfig cacheConf) {
332     int version = getFormatVersion(conf);
333     switch (version) {
334     case 2:
335       return new HFileWriterV2.WriterFactoryV2(conf, cacheConf);
336     case 3:
337       return new HFileWriterV3.WriterFactoryV3(conf, cacheConf);
338     default:
339       throw new IllegalArgumentException("Cannot create writer for HFile " +
340           "format version " + version);
341     }
342   }
343 
344   /**
345    * An abstraction used by the block index.
346    * Implementations will check cache for any asked-for block and return cached block if found.
347    * Otherwise, after reading from fs, will try and put block into cache before returning.
348    */
349   public interface CachingBlockReader {
350     /**
351      * Read in a file block.
352      * @param offset offset to read.
353      * @param onDiskBlockSize size of the block
354      * @param cacheBlock
355      * @param pread
356      * @param isCompaction is this block being read as part of a compaction
357      * @param expectedBlockType the block type we are expecting to read with this read operation,
358      *  or null to read whatever block type is available and avoid checking (that might reduce
359      *  caching efficiency of encoded data blocks)
360      * @param expectedDataBlockEncoding the data block encoding the caller is expecting data blocks
361      *  to be in, or null to not perform this check and return the block irrespective of the
362      *  encoding. This check only applies to data blocks and can be set to null when the caller is
363      *  expecting to read a non-data block and has set expectedBlockType accordingly.
364      * @return Block wrapped in a ByteBuffer.
365      * @throws IOException
366      */
367     HFileBlock readBlock(long offset, long onDiskBlockSize,
368         boolean cacheBlock, final boolean pread, final boolean isCompaction,
369         final boolean updateCacheMetrics, BlockType expectedBlockType,
370         DataBlockEncoding expectedDataBlockEncoding)
371         throws IOException;
372   }
373 
374   /** An interface used by clients to open and iterate an {@link HFile}. */
375   public interface Reader extends Closeable, CachingBlockReader {
376     /**
377      * Returns this reader's "name". Usually the last component of the path.
378      * Needs to be constant as the file is being moved to support caching on
379      * write.
380      */
381     String getName();
382 
383     KVComparator getComparator();
384 
385     HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction);
386 
387     ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;
388 
389     Map<byte[], byte[]> loadFileInfo() throws IOException;
390 
391     byte[] getLastKey();
392 
393     byte[] midkey() throws IOException;
394 
395     long length();
396 
397     long getEntries();
398 
399     byte[] getFirstKey();
400 
401     long indexSize();
402 
403     byte[] getFirstRowKey();
404 
405     byte[] getLastRowKey();
406 
407     FixedFileTrailer getTrailer();
408 
409     HFileBlockIndex.BlockIndexReader getDataBlockIndexReader();
410 
411     HFileScanner getScanner(boolean cacheBlocks, boolean pread);
412 
413     Compression.Algorithm getCompressionAlgorithm();
414 
415     /**
416      * Retrieves general Bloom filter metadata as appropriate for each
417      * {@link HFile} version.
418      * Knows nothing about how that metadata is structured.
419      */
420     DataInput getGeneralBloomFilterMetadata() throws IOException;
421 
422     /**
423      * Retrieves delete family Bloom filter metadata as appropriate for each
424      * {@link HFile}  version.
425      * Knows nothing about how that metadata is structured.
426      */
427     DataInput getDeleteBloomFilterMetadata() throws IOException;
428 
429     Path getPath();
430 
431     /** Close method with optional evictOnClose */
432     void close(boolean evictOnClose) throws IOException;
433 
434     DataBlockEncoding getDataBlockEncoding();
435 
436     boolean hasMVCCInfo();
437 
438     /**
439      * Return the file context of the HFile this reader belongs to
440      */
441     HFileContext getFileContext();
442   }
443 
444   /**
445    * Method returns the reader given the specified arguments.
446    * TODO This is a bad abstraction.  See HBASE-6635.
447    *
448    * @param path hfile's path
449    * @param fsdis stream of path's file
450    * @param size max size of the trailer.
451    * @param cacheConf Cache configuation values, cannot be null.
452    * @param hfs
453    * @return an appropriate instance of HFileReader
454    * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
455    */
456   private static Reader pickReaderVersion(Path path, FSDataInputStreamWrapper fsdis,
457       long size, CacheConfig cacheConf, HFileSystem hfs, Configuration conf) throws IOException {
458     FixedFileTrailer trailer = null;
459     try {
460       boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
461       assert !isHBaseChecksum; // Initially we must read with FS checksum.
462       trailer = FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
463       switch (trailer.getMajorVersion()) {
464       case 2:
465         return new HFileReaderV2(path, trailer, fsdis, size, cacheConf, hfs, conf);
466       case 3 :
467         return new HFileReaderV3(path, trailer, fsdis, size, cacheConf, hfs, conf);
468       default:
469         throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());
470       }
471     } catch (Throwable t) {
472       try {
473         fsdis.close();
474       } catch (Throwable t2) {
475         LOG.warn("Error closing fsdis FSDataInputStreamWrapper", t2);
476       }
477       throw new CorruptHFileException("Problem reading HFile Trailer from file " + path, t);
478     }
479   }
480 
481   /**
482    * @param fs A file system
483    * @param path Path to HFile
484    * @param fsdis a stream of path's file
485    * @param size max size of the trailer.
486    * @param cacheConf Cache configuration for hfile's contents
487    * @param conf Configuration
488    * @return A version specific Hfile Reader
489    * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
490    */
491   public static Reader createReader(FileSystem fs, Path path,
492       FSDataInputStreamWrapper fsdis, long size, CacheConfig cacheConf, Configuration conf)
493       throws IOException {
494     HFileSystem hfs = null;
495 
496     // If the fs is not an instance of HFileSystem, then create an
497     // instance of HFileSystem that wraps over the specified fs.
498     // In this case, we will not be able to avoid checksumming inside
499     // the filesystem.
500     if (!(fs instanceof HFileSystem)) {
501       hfs = new HFileSystem(fs);
502     } else {
503       hfs = (HFileSystem)fs;
504     }
505     return pickReaderVersion(path, fsdis, size, cacheConf, hfs, conf);
506   }
507 
508   /**
509    *
510    * @param fs filesystem
511    * @param path Path to file to read
512    * @param cacheConf This must not be null.  @see {@link org.apache.hadoop.hbase.io.hfile.CacheConfig#CacheConfig(Configuration)}
513    * @return an active Reader instance
514    * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile is corrupt/invalid.
515    */
516   public static Reader createReader(
517       FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf) throws IOException {
518     Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
519     FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);
520     return pickReaderVersion(path, stream, fs.getFileStatus(path).getLen(),
521       cacheConf, stream.getHfs(), conf);
522   }
523 
524   /**
525    * This factory method is used only by unit tests
526    */
527   static Reader createReaderFromStream(Path path,
528       FSDataInputStream fsdis, long size, CacheConfig cacheConf, Configuration conf)
529       throws IOException {
530     FSDataInputStreamWrapper wrapper = new FSDataInputStreamWrapper(fsdis);
531     return pickReaderVersion(path, wrapper, size, cacheConf, null, conf);
532   }
533 
534   /**
535    * Metadata for this file. Conjured by the writer. Read in by the reader.
536    */
537   public static class FileInfo implements SortedMap<byte[], byte[]> {
538     static final String RESERVED_PREFIX = "hfile.";
539     static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
540     static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
541     static final byte [] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
542     static final byte [] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
543     static final byte [] COMPARATOR = Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR");
544     static final byte [] TAGS_COMPRESSED = Bytes.toBytes(RESERVED_PREFIX + "TAGS_COMPRESSED");
545     public static final byte [] MAX_TAGS_LEN = Bytes.toBytes(RESERVED_PREFIX + "MAX_TAGS_LEN");
546     private final SortedMap<byte [], byte []> map = new TreeMap<byte [], byte []>(Bytes.BYTES_COMPARATOR);
547 
548     public FileInfo() {
549       super();
550     }
551 
552     /**
553      * Append the given key/value pair to the file info, optionally checking the
554      * key prefix.
555      *
556      * @param k key to add
557      * @param v value to add
558      * @param checkPrefix whether to check that the provided key does not start
559      *          with the reserved prefix
560      * @return this file info object
561      * @throws IOException if the key or value is invalid
562      */
563     public FileInfo append(final byte[] k, final byte[] v,
564         final boolean checkPrefix) throws IOException {
565       if (k == null || v == null) {
566         throw new NullPointerException("Key nor value may be null");
567       }
568       if (checkPrefix && isReservedFileInfoKey(k)) {
569         throw new IOException("Keys with a " + FileInfo.RESERVED_PREFIX
570             + " are reserved");
571       }
572       put(k, v);
573       return this;
574     }
575 
576     public void clear() {
577       this.map.clear();
578     }
579 
580     public Comparator<? super byte[]> comparator() {
581       return map.comparator();
582     }
583 
584     public boolean containsKey(Object key) {
585       return map.containsKey(key);
586     }
587 
588     public boolean containsValue(Object value) {
589       return map.containsValue(value);
590     }
591 
592     public Set<java.util.Map.Entry<byte[], byte[]>> entrySet() {
593       return map.entrySet();
594     }
595 
596     public boolean equals(Object o) {
597       return map.equals(o);
598     }
599 
600     public byte[] firstKey() {
601       return map.firstKey();
602     }
603 
604     public byte[] get(Object key) {
605       return map.get(key);
606     }
607 
608     public int hashCode() {
609       return map.hashCode();
610     }
611 
612     public SortedMap<byte[], byte[]> headMap(byte[] toKey) {
613       return this.map.headMap(toKey);
614     }
615 
616     public boolean isEmpty() {
617       return map.isEmpty();
618     }
619 
620     public Set<byte[]> keySet() {
621       return map.keySet();
622     }
623 
624     public byte[] lastKey() {
625       return map.lastKey();
626     }
627 
628     public byte[] put(byte[] key, byte[] value) {
629       return this.map.put(key, value);
630     }
631 
632     public void putAll(Map<? extends byte[], ? extends byte[]> m) {
633       this.map.putAll(m);
634     }
635 
636     public byte[] remove(Object key) {
637       return this.map.remove(key);
638     }
639 
640     public int size() {
641       return map.size();
642     }
643 
644     public SortedMap<byte[], byte[]> subMap(byte[] fromKey, byte[] toKey) {
645       return this.map.subMap(fromKey, toKey);
646     }
647 
648     public SortedMap<byte[], byte[]> tailMap(byte[] fromKey) {
649       return this.map.tailMap(fromKey);
650     }
651 
652     public Collection<byte[]> values() {
653       return map.values();
654     }
655 
656     /**
657      * Write out this instance on the passed in <code>out</code> stream.
658      * We write it as a protobuf.
659      * @param out
660      * @throws IOException
661      * @see #read(DataInputStream)
662      */
663     void write(final DataOutputStream out) throws IOException {
664       HFileProtos.FileInfoProto.Builder builder = HFileProtos.FileInfoProto.newBuilder();
665       for (Map.Entry<byte [], byte[]> e: this.map.entrySet()) {
666         HBaseProtos.BytesBytesPair.Builder bbpBuilder = HBaseProtos.BytesBytesPair.newBuilder();
667         bbpBuilder.setFirst(ByteStringer.wrap(e.getKey()));
668         bbpBuilder.setSecond(ByteStringer.wrap(e.getValue()));
669         builder.addMapEntry(bbpBuilder.build());
670       }
671       out.write(ProtobufUtil.PB_MAGIC);
672       builder.build().writeDelimitedTo(out);
673     }
674 
675     /**
676      * Populate this instance with what we find on the passed in <code>in</code> stream.
677      * Can deserialize protobuf of old Writables format.
678      * @param in
679      * @throws IOException
680      * @see #write(DataOutputStream)
681      */
682     void read(final DataInputStream in) throws IOException {
683       // This code is tested over in TestHFileReaderV1 where we read an old hfile w/ this new code.
684       int pblen = ProtobufUtil.lengthOfPBMagic();
685       byte [] pbuf = new byte[pblen];
686       if (in.markSupported()) in.mark(pblen);
687       int read = in.read(pbuf);
688       if (read != pblen) throw new IOException("read=" + read + ", wanted=" + pblen);
689       if (ProtobufUtil.isPBMagicPrefix(pbuf)) {
690         parsePB(HFileProtos.FileInfoProto.parseDelimitedFrom(in));
691       } else {
692         if (in.markSupported()) {
693           in.reset();
694           parseWritable(in);
695         } else {
696           // We cannot use BufferedInputStream, it consumes more than we read from the underlying IS
697           ByteArrayInputStream bais = new ByteArrayInputStream(pbuf);
698           SequenceInputStream sis = new SequenceInputStream(bais, in); // Concatenate input streams
699           // TODO: Am I leaking anything here wrapping the passed in stream?  We are not calling close on the wrapped
700           // streams but they should be let go after we leave this context?  I see that we keep a reference to the
701           // passed in inputstream but since we no longer have a reference to this after we leave, we should be ok.
702           parseWritable(new DataInputStream(sis));
703         }
704       }
705     }
706 
707     /** Now parse the old Writable format.  It was a list of Map entries.  Each map entry was a key and a value of
708      * a byte [].  The old map format had a byte before each entry that held a code which was short for the key or
709      * value type.  We know it was a byte [] so in below we just read and dump it.
710      * @throws IOException
711      */
712     void parseWritable(final DataInputStream in) throws IOException {
713       // First clear the map.  Otherwise we will just accumulate entries every time this method is called.
714       this.map.clear();
715       // Read the number of entries in the map
716       int entries = in.readInt();
717       // Then read each key/value pair
718       for (int i = 0; i < entries; i++) {
719         byte [] key = Bytes.readByteArray(in);
720         // We used to read a byte that encoded the class type.  Read and ignore it because it is always byte [] in hfile
721         in.readByte();
722         byte [] value = Bytes.readByteArray(in);
723         this.map.put(key, value);
724       }
725     }
726 
727     /**
728      * Fill our map with content of the pb we read off disk
729      * @param fip protobuf message to read
730      */
731     void parsePB(final HFileProtos.FileInfoProto fip) {
732       this.map.clear();
733       for (BytesBytesPair pair: fip.getMapEntryList()) {
734         this.map.put(pair.getFirst().toByteArray(), pair.getSecond().toByteArray());
735       }
736     }
737   }
738 
739   /** Return true if the given file info key is reserved for internal use. */
740   public static boolean isReservedFileInfoKey(byte[] key) {
741     return Bytes.startsWith(key, FileInfo.RESERVED_PREFIX_BYTES);
742   }
743 
744   /**
745    * Get names of supported compression algorithms. The names are acceptable by
746    * HFile.Writer.
747    *
748    * @return Array of strings, each represents a supported compression
749    *         algorithm. Currently, the following compression algorithms are
750    *         supported.
751    *         <ul>
752    *         <li>"none" - No compression.
753    *         <li>"gz" - GZIP compression.
754    *         </ul>
755    */
756   public static String[] getSupportedCompressionAlgorithms() {
757     return Compression.getSupportedAlgorithms();
758   }
759 
760   // Utility methods.
761   /*
762    * @param l Long to convert to an int.
763    * @return <code>l</code> cast as an int.
764    */
765   static int longToInt(final long l) {
766     // Expecting the size() of a block not exceeding 4GB. Assuming the
767     // size() will wrap to negative integer if it exceeds 2GB (From tfile).
768     return (int)(l & 0x00000000ffffffffL);
769   }
770 
771   /**
772    * Returns all files belonging to the given region directory. Could return an
773    * empty list.
774    *
775    * @param fs  The file system reference.
776    * @param regionDir  The region directory to scan.
777    * @return The list of files found.
778    * @throws IOException When scanning the files fails.
779    */
780   static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
781       throws IOException {
782     List<Path> res = new ArrayList<Path>();
783     PathFilter dirFilter = new FSUtils.DirFilter(fs);
784     FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
785     for(FileStatus dir : familyDirs) {
786       FileStatus[] files = fs.listStatus(dir.getPath());
787       for (FileStatus file : files) {
788         if (!file.isDirectory()) {
789           res.add(file.getPath());
790         }
791       }
792     }
793     return res;
794   }
795 
796   /**
797    * Checks the given {@link HFile} format version, and throws an exception if
798    * invalid. Note that if the version number comes from an input file and has
799    * not been verified, the caller needs to re-throw an {@link IOException} to
800    * indicate that this is not a software error, but corrupted input.
801    *
802    * @param version an HFile version
803    * @throws IllegalArgumentException if the version is invalid
804    */
805   public static void checkFormatVersion(int version)
806       throws IllegalArgumentException {
807     if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
808       throw new IllegalArgumentException("Invalid HFile version: " + version
809           + " (expected to be " + "between " + MIN_FORMAT_VERSION + " and "
810           + MAX_FORMAT_VERSION + ")");
811     }
812   }
813 
814   public static void main(String[] args) throws Exception {
815     // delegate to preserve old behavior
816     HFilePrettyPrinter.main(args);
817   }
818 }