View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.Closeable;
23  import java.io.DataInput;
24  import java.io.DataInputStream;
25  import java.io.DataOutputStream;
26  import java.io.IOException;
27  import java.io.SequenceInputStream;
28  import java.net.InetSocketAddress;
29  import java.nio.ByteBuffer;
30  import java.util.ArrayList;
31  import java.util.Collection;
32  import java.util.Comparator;
33  import java.util.List;
34  import java.util.Map;
35  import java.util.Set;
36  import java.util.SortedMap;
37  import java.util.TreeMap;
38  import java.util.concurrent.atomic.AtomicLong;
39  
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.hbase.classification.InterfaceAudience;
43  import org.apache.hadoop.conf.Configuration;
44  import org.apache.hadoop.fs.FSDataInputStream;
45  import org.apache.hadoop.fs.FSDataOutputStream;
46  import org.apache.hadoop.fs.FileStatus;
47  import org.apache.hadoop.fs.FileSystem;
48  import org.apache.hadoop.fs.Path;
49  import org.apache.hadoop.fs.PathFilter;
50  import org.apache.hadoop.hbase.Cell;
51  import org.apache.hadoop.hbase.HConstants;
52  import org.apache.hadoop.hbase.KeyValue;
53  import org.apache.hadoop.hbase.KeyValue.KVComparator;
54  import org.apache.hadoop.hbase.fs.HFileSystem;
55  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
56  import org.apache.hadoop.hbase.io.compress.Compression;
57  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
58  import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
59  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
60  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
61  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
62  import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
63  import org.apache.hadoop.hbase.util.BloomFilterWriter;
64  import org.apache.hadoop.hbase.util.ByteStringer;
65  import org.apache.hadoop.hbase.util.Bytes;
66  import org.apache.hadoop.hbase.util.ChecksumType;
67  import org.apache.hadoop.hbase.util.FSUtils;
68  import org.apache.hadoop.io.Writable;
69  
70  import com.google.common.annotations.VisibleForTesting;
71  import com.google.common.base.Preconditions;
72  
73  /**
74   * File format for hbase.
75   * A file of sorted key/value pairs. Both keys and values are byte arrays.
76   * <p>
77   * The memory footprint of a HFile includes the following (below is taken from the
78   * <a
79   * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation
80   * but applies also to HFile):
81   * <ul>
82   * <li>Some constant overhead of reading or writing a compressed block.
83   * <ul>
84   * <li>Each compressed block requires one compression/decompression codec for
85   * I/O.
86   * <li>Temporary space to buffer the key.
87   * <li>Temporary space to buffer the value.
88   * </ul>
89   * <li>HFile index, which is proportional to the total number of Data Blocks.
90   * The total amount of memory needed to hold the index can be estimated as
91   * (56+AvgKeySize)*NumBlocks.
92   * </ul>
93   * Suggestions on performance optimization.
94   * <ul>
95   * <li>Minimum block size. We recommend a setting of minimum block size between
96   * 8KB to 1MB for general usage. Larger block size is preferred if files are
97   * primarily for sequential access. However, it would lead to inefficient random
98   * access (because there are more data to decompress). Smaller blocks are good
99   * for random access, but require more memory to hold the block index, and may
100  * be slower to create (because we must flush the compressor stream at the
101  * conclusion of each data block, which leads to an FS I/O flush). Further, due
102  * to the internal caching in Compression codec, the smallest possible block
103  * size would be around 20KB-30KB.
104  * <li>The current implementation does not offer true multi-threading for
105  * reading. The implementation uses FSDataInputStream seek()+read(), which is
106  * shown to be much faster than positioned-read call in single thread mode.
107  * However, it also means that if multiple threads attempt to access the same
108  * HFile (using multiple scanners) simultaneously, the actual I/O is carried out
109  * sequentially even if they access different DFS blocks (Reexamine! pread seems
110  * to be 10% faster than seek+read in my testing -- stack).
111  * <li>Compression codec. Use "none" if the data is not very compressable (by
112  * compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
113  * as the starting point for experimenting. "gz" overs slightly better
114  * compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
115  * decompress, comparing to "lzo".
116  * </ul>
117  *
118  * For more on the background behind HFile, see <a
119  * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>.
120  * <p>
121  * File is made of data blocks followed by meta data blocks (if any), a fileinfo
122  * block, data block index, meta data block index, and a fixed size trailer
123  * which records the offsets at which file changes content type.
124  * <pre>&lt;data blocks>&lt;meta blocks>&lt;fileinfo>&lt;data index>&lt;meta index>&lt;trailer></pre>
125  * Each block has a bit of magic at its start.  Block are comprised of
126  * key/values.  In data blocks, they are both byte arrays.  Metadata blocks are
127  * a String key and a byte array value.  An empty file looks like this:
128  * <pre>&lt;fileinfo>&lt;trailer></pre>.  That is, there are not data nor meta
129  * blocks present.
130  * <p>
131  * TODO: Do scanners need to be able to take a start and end row?
132  * TODO: Should BlockIndex know the name of its file?  Should it have a Path
133  * that points at its file say for the case where an index lives apart from
134  * an HFile instance?
135  */
136 @InterfaceAudience.Private
137 public class HFile {
138   static final Log LOG = LogFactory.getLog(HFile.class);
139 
140   /**
141    * Maximum length of key in HFile.
142    */
143   public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
144 
145   /**
146    * Default compression: none.
147    */
148   public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
149     Compression.Algorithm.NONE;
150 
151   /** Minimum supported HFile format version */
152   public static final int MIN_FORMAT_VERSION = 2;
153 
154   /** Maximum supported HFile format version
155    */
156   public static final int MAX_FORMAT_VERSION = 3;
157 
158   /**
159    * Minimum HFile format version with support for persisting cell tags
160    */
161   public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3;
162 
163   /** Default compression name: none. */
164   public final static String DEFAULT_COMPRESSION =
165     DEFAULT_COMPRESSION_ALGORITHM.getName();
166 
167   /** Meta data block name for bloom filter bits. */
168   public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";
169 
170   /**
171    * We assume that HFile path ends with
172    * ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at least this
173    * many levels of nesting. This is needed for identifying table and CF name
174    * from an HFile path.
175    */
176   public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;
177 
178   /**
179    * The number of bytes per checksum.
180    */
181   public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
182   // TODO: This define is done in three places.  Fix.
183   public static final ChecksumType DEFAULT_CHECKSUM_TYPE = ChecksumType.CRC32;
184 
185   // For measuring number of checksum failures
186   static final AtomicLong checksumFailures = new AtomicLong();
187 
188   // for test purpose
189   public static final AtomicLong dataBlockReadCnt = new AtomicLong(0);
190 
191   /**
192    * Number of checksum verification failures. It also
193    * clears the counter.
194    */
195   public static final long getChecksumFailuresCount() {
196     return checksumFailures.getAndSet(0);
197   }
198 
199   /** API required to write an {@link HFile} */
200   public interface Writer extends Closeable {
201     /** Max memstore (mvcc) timestamp in FileInfo */
202     public static final byte [] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY");
203 
204     /** Add an element to the file info map. */
205     void appendFileInfo(byte[] key, byte[] value) throws IOException;
206 
207     void append(Cell cell) throws IOException;
208 
209     /** @return the path to this {@link HFile} */
210     Path getPath();
211 
212     /**
213      * Adds an inline block writer such as a multi-level block index writer or
214      * a compound Bloom filter writer.
215      */
216     void addInlineBlockWriter(InlineBlockWriter bloomWriter);
217 
218     // The below three methods take Writables.  We'd like to undo Writables but undoing the below would be pretty
219     // painful.  Could take a byte [] or a Message but we want to be backward compatible around hfiles so would need
220     // to map between Message and Writable or byte [] and current Writable serialization.  This would be a bit of work
221     // to little gain.  Thats my thinking at moment.  St.Ack 20121129
222 
223     void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);
224 
225     /**
226      * Store general Bloom filter in the file. This does not deal with Bloom filter
227      * internals but is necessary, since Bloom filters are stored differently
228      * in HFile version 1 and version 2.
229      */
230     void addGeneralBloomFilter(BloomFilterWriter bfw);
231 
232     /**
233      * Store delete family Bloom filter in the file, which is only supported in
234      * HFile V2.
235      */
236     void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;
237 
238     /**
239      * Return the file context for the HFile this writer belongs to
240      */
241     HFileContext getFileContext();
242   }
243 
244   /**
245    * This variety of ways to construct writers is used throughout the code, and
246    * we want to be able to swap writer implementations.
247    */
248   public static abstract class WriterFactory {
249     protected final Configuration conf;
250     protected final CacheConfig cacheConf;
251     protected FileSystem fs;
252     protected Path path;
253     protected FSDataOutputStream ostream;
254     protected KVComparator comparator = KeyValue.COMPARATOR;
255     protected InetSocketAddress[] favoredNodes;
256     private HFileContext fileContext;
257 
258     WriterFactory(Configuration conf, CacheConfig cacheConf) {
259       this.conf = conf;
260       this.cacheConf = cacheConf;
261     }
262 
263     public WriterFactory withPath(FileSystem fs, Path path) {
264       Preconditions.checkNotNull(fs);
265       Preconditions.checkNotNull(path);
266       this.fs = fs;
267       this.path = path;
268       return this;
269     }
270 
271     public WriterFactory withOutputStream(FSDataOutputStream ostream) {
272       Preconditions.checkNotNull(ostream);
273       this.ostream = ostream;
274       return this;
275     }
276 
277     public WriterFactory withComparator(KVComparator comparator) {
278       Preconditions.checkNotNull(comparator);
279       this.comparator = comparator;
280       return this;
281     }
282 
283     public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) {
284       // Deliberately not checking for null here.
285       this.favoredNodes = favoredNodes;
286       return this;
287     }
288 
289     public WriterFactory withFileContext(HFileContext fileContext) {
290       this.fileContext = fileContext;
291       return this;
292     }
293 
294     public Writer create() throws IOException {
295       if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
296         throw new AssertionError("Please specify exactly one of " +
297             "filesystem/path or path");
298       }
299       if (path != null) {
300         ostream = HFileWriterImpl.createOutputStream(conf, fs, path, favoredNodes);
301       }
302       return createWriter(fs, path, ostream,
303                    comparator, fileContext);
304     }
305 
306     protected abstract Writer createWriter(FileSystem fs, Path path, FSDataOutputStream ostream,
307         KVComparator comparator, HFileContext fileContext) throws IOException;
308   }
309 
310   /** The configuration key for HFile version to use for new files */
311   public static final String FORMAT_VERSION_KEY = "hfile.format.version";
312 
313   public static int getFormatVersion(Configuration conf) {
314     int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
315     checkFormatVersion(version);
316     return version;
317   }
318 
319   /**
320    * Returns the factory to be used to create {@link HFile} writers.
321    * Disables block cache access for all writers created through the
322    * returned factory.
323    */
324   public static final WriterFactory getWriterFactoryNoCache(Configuration
325        conf) {
326     Configuration tempConf = new Configuration(conf);
327     tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
328     return HFile.getWriterFactory(conf, new CacheConfig(tempConf));
329   }
330 
331   /**
332    * Returns the factory to be used to create {@link HFile} writers
333    */
334   public static final WriterFactory getWriterFactory(Configuration conf,
335       CacheConfig cacheConf) {
336     int version = getFormatVersion(conf);
337     switch (version) {
338     case 2:
339       throw new IllegalArgumentException("This should never happen. " +
340         "Did you change hfile.format.version to read v2? This version of the software writes v3" +
341         " hfiles only (but it can read v2 files without having to update hfile.format.version " +
342         "in hbase-site.xml)");
343     case 3:
344       return new HFileWriterFactory(conf, cacheConf);
345     default:
346       throw new IllegalArgumentException("Cannot create writer for HFile " +
347           "format version " + version);
348     }
349   }
350 
351   /**
352    * An abstraction used by the block index.
353    * Implementations will check cache for any asked-for block and return cached block if found.
354    * Otherwise, after reading from fs, will try and put block into cache before returning.
355    */
356   public interface CachingBlockReader {
357     /**
358      * Read in a file block.
359      * @param offset offset to read.
360      * @param onDiskBlockSize size of the block
361      * @param cacheBlock
362      * @param pread
363      * @param isCompaction is this block being read as part of a compaction
364      * @param expectedBlockType the block type we are expecting to read with this read operation,
365      *  or null to read whatever block type is available and avoid checking (that might reduce
366      *  caching efficiency of encoded data blocks)
367      * @param expectedDataBlockEncoding the data block encoding the caller is expecting data blocks
368      *  to be in, or null to not perform this check and return the block irrespective of the
369      *  encoding. This check only applies to data blocks and can be set to null when the caller is
370      *  expecting to read a non-data block and has set expectedBlockType accordingly.
371      * @return Block wrapped in a ByteBuffer.
372      * @throws IOException
373      */
374     HFileBlock readBlock(long offset, long onDiskBlockSize,
375         boolean cacheBlock, final boolean pread, final boolean isCompaction,
376         final boolean updateCacheMetrics, BlockType expectedBlockType,
377         DataBlockEncoding expectedDataBlockEncoding)
378         throws IOException;
379   }
380 
381   /** An interface used by clients to open and iterate an {@link HFile}. */
382   public interface Reader extends Closeable, CachingBlockReader {
383     /**
384      * Returns this reader's "name". Usually the last component of the path.
385      * Needs to be constant as the file is being moved to support caching on
386      * write.
387      */
388     String getName();
389 
390     KVComparator getComparator();
391 
392     HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction);
393 
394     ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;
395 
396     Map<byte[], byte[]> loadFileInfo() throws IOException;
397 
398     byte[] getLastKey();
399 
400     byte[] midkey() throws IOException;
401 
402     long length();
403 
404     long getEntries();
405 
406     byte[] getFirstKey();
407 
408     long indexSize();
409 
410     byte[] getFirstRowKey();
411 
412     byte[] getLastRowKey();
413 
414     FixedFileTrailer getTrailer();
415 
416     HFileBlockIndex.BlockIndexReader getDataBlockIndexReader();
417 
418     HFileScanner getScanner(boolean cacheBlocks, boolean pread);
419 
420     Compression.Algorithm getCompressionAlgorithm();
421 
422     /**
423      * Retrieves general Bloom filter metadata as appropriate for each
424      * {@link HFile} version.
425      * Knows nothing about how that metadata is structured.
426      */
427     DataInput getGeneralBloomFilterMetadata() throws IOException;
428 
429     /**
430      * Retrieves delete family Bloom filter metadata as appropriate for each
431      * {@link HFile}  version.
432      * Knows nothing about how that metadata is structured.
433      */
434     DataInput getDeleteBloomFilterMetadata() throws IOException;
435 
436     Path getPath();
437 
438     /** Close method with optional evictOnClose */
439     void close(boolean evictOnClose) throws IOException;
440 
441     DataBlockEncoding getDataBlockEncoding();
442 
443     boolean hasMVCCInfo();
444 
445     /**
446      * Return the file context of the HFile this reader belongs to
447      */
448     HFileContext getFileContext();
449 
450     boolean shouldIncludeMemstoreTS();
451 
452     boolean isDecodeMemstoreTS();
453 
454     DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction);
455 
456     @VisibleForTesting
457     HFileBlock.FSReader getUncachedBlockReader();
458 
459     @VisibleForTesting
460     boolean prefetchComplete();
461   }
462 
463   /**
464    * Method returns the reader given the specified arguments.
465    * TODO This is a bad abstraction.  See HBASE-6635.
466    *
467    * @param path hfile's path
468    * @param fsdis stream of path's file
469    * @param size max size of the trailer.
470    * @param cacheConf Cache configuation values, cannot be null.
471    * @param hfs
472    * @return an appropriate instance of HFileReader
473    * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
474    */
475   private static Reader pickReaderVersion(Path path, FSDataInputStreamWrapper fsdis,
476       long size, CacheConfig cacheConf, HFileSystem hfs, Configuration conf) throws IOException {
477     FixedFileTrailer trailer = null;
478     try {
479       boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
480       assert !isHBaseChecksum; // Initially we must read with FS checksum.
481       trailer = FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
482       switch (trailer.getMajorVersion()) {
483       case 2:
484         LOG.debug("Opening HFile v2 with v3 reader");
485         // Fall through.
486       case 3 :
487         return new HFileReaderImpl(path, trailer, fsdis, size, cacheConf, hfs, conf);
488       default:
489         throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());
490       }
491     } catch (Throwable t) {
492       try {
493         fsdis.close();
494       } catch (Throwable t2) {
495         LOG.warn("Error closing fsdis FSDataInputStreamWrapper", t2);
496       }
497       throw new CorruptHFileException("Problem reading HFile Trailer from file " + path, t);
498     }
499   }
500 
501   /**
502    * @param fs A file system
503    * @param path Path to HFile
504    * @param fsdis a stream of path's file
505    * @param size max size of the trailer.
506    * @param cacheConf Cache configuration for hfile's contents
507    * @param conf Configuration
508    * @return A version specific Hfile Reader
509    * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
510    */
511   @SuppressWarnings("resource")
512   public static Reader createReader(FileSystem fs, Path path,
513       FSDataInputStreamWrapper fsdis, long size, CacheConfig cacheConf, Configuration conf)
514       throws IOException {
515     HFileSystem hfs = null;
516 
517     // If the fs is not an instance of HFileSystem, then create an
518     // instance of HFileSystem that wraps over the specified fs.
519     // In this case, we will not be able to avoid checksumming inside
520     // the filesystem.
521     if (!(fs instanceof HFileSystem)) {
522       hfs = new HFileSystem(fs);
523     } else {
524       hfs = (HFileSystem)fs;
525     }
526     return pickReaderVersion(path, fsdis, size, cacheConf, hfs, conf);
527   }
528 
529   /**
530    *
531    * @param fs filesystem
532    * @param path Path to file to read
533    * @param cacheConf This must not be null.  @see {@link org.apache.hadoop.hbase.io.hfile.CacheConfig#CacheConfig(Configuration)}
534    * @return an active Reader instance
535    * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile is corrupt/invalid.
536    */
537   public static Reader createReader(
538       FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf) throws IOException {
539     Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
540     FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);
541     return pickReaderVersion(path, stream, fs.getFileStatus(path).getLen(),
542       cacheConf, stream.getHfs(), conf);
543   }
544 
545   /**
546    * This factory method is used only by unit tests
547    */
548   static Reader createReaderFromStream(Path path,
549       FSDataInputStream fsdis, long size, CacheConfig cacheConf, Configuration conf)
550       throws IOException {
551     FSDataInputStreamWrapper wrapper = new FSDataInputStreamWrapper(fsdis);
552     return pickReaderVersion(path, wrapper, size, cacheConf, null, conf);
553   }
554 
555   /**
556    * Returns true if the specified file has a valid HFile Trailer.
557    * @param fs filesystem
558    * @param path Path to file to verify
559    * @return true if the file has a valid HFile Trailer, otherwise false
560    * @throws IOException if failed to read from the underlying stream
561    */
562   public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException {
563     return isHFileFormat(fs, fs.getFileStatus(path));
564   }
565 
566   /**
567    * Returns true if the specified file has a valid HFile Trailer.
568    * @param fs filesystem
569    * @param fileStatus the file to verify
570    * @return true if the file has a valid HFile Trailer, otherwise false
571    * @throws IOException if failed to read from the underlying stream
572    */
573   public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus)
574       throws IOException {
575     final Path path = fileStatus.getPath();
576     final long size = fileStatus.getLen();
577     FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path);
578     try {
579       boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
580       assert !isHBaseChecksum; // Initially we must read with FS checksum.
581       FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
582       return true;
583     } catch (IllegalArgumentException e) {
584       return false;
585     } catch (IOException e) {
586       throw e;
587     } finally {
588       try {
589         fsdis.close();
590       } catch (Throwable t) {
591         LOG.warn("Error closing fsdis FSDataInputStreamWrapper: " + path, t);
592       }
593     }
594   }
595 
596   /**
597    * Metadata for this file. Conjured by the writer. Read in by the reader.
598    */
599   public static class FileInfo implements SortedMap<byte[], byte[]> {
600     static final String RESERVED_PREFIX = "hfile.";
601     static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
602     static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
603     static final byte [] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
604     static final byte [] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
605     static final byte [] CREATE_TIME_TS = Bytes.toBytes(RESERVED_PREFIX + "CREATE_TIME_TS");
606     static final byte [] COMPARATOR = Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR");
607     static final byte [] TAGS_COMPRESSED = Bytes.toBytes(RESERVED_PREFIX + "TAGS_COMPRESSED");
608     public static final byte [] MAX_TAGS_LEN = Bytes.toBytes(RESERVED_PREFIX + "MAX_TAGS_LEN");
609     private final SortedMap<byte [], byte []> map = new TreeMap<byte [], byte []>(Bytes.BYTES_COMPARATOR);
610 
611     public FileInfo() {
612       super();
613     }
614 
615     /**
616      * Append the given key/value pair to the file info, optionally checking the
617      * key prefix.
618      *
619      * @param k key to add
620      * @param v value to add
621      * @param checkPrefix whether to check that the provided key does not start
622      *          with the reserved prefix
623      * @return this file info object
624      * @throws IOException if the key or value is invalid
625      */
626     public FileInfo append(final byte[] k, final byte[] v,
627         final boolean checkPrefix) throws IOException {
628       if (k == null || v == null) {
629         throw new NullPointerException("Key nor value may be null");
630       }
631       if (checkPrefix && isReservedFileInfoKey(k)) {
632         throw new IOException("Keys with a " + FileInfo.RESERVED_PREFIX
633             + " are reserved");
634       }
635       put(k, v);
636       return this;
637     }
638 
639     public void clear() {
640       this.map.clear();
641     }
642 
643     public Comparator<? super byte[]> comparator() {
644       return map.comparator();
645     }
646 
647     public boolean containsKey(Object key) {
648       return map.containsKey(key);
649     }
650 
651     public boolean containsValue(Object value) {
652       return map.containsValue(value);
653     }
654 
655     public Set<java.util.Map.Entry<byte[], byte[]>> entrySet() {
656       return map.entrySet();
657     }
658 
659     public boolean equals(Object o) {
660       return map.equals(o);
661     }
662 
663     public byte[] firstKey() {
664       return map.firstKey();
665     }
666 
667     public byte[] get(Object key) {
668       return map.get(key);
669     }
670 
671     public int hashCode() {
672       return map.hashCode();
673     }
674 
675     public SortedMap<byte[], byte[]> headMap(byte[] toKey) {
676       return this.map.headMap(toKey);
677     }
678 
679     public boolean isEmpty() {
680       return map.isEmpty();
681     }
682 
683     public Set<byte[]> keySet() {
684       return map.keySet();
685     }
686 
687     public byte[] lastKey() {
688       return map.lastKey();
689     }
690 
691     public byte[] put(byte[] key, byte[] value) {
692       return this.map.put(key, value);
693     }
694 
695     public void putAll(Map<? extends byte[], ? extends byte[]> m) {
696       this.map.putAll(m);
697     }
698 
699     public byte[] remove(Object key) {
700       return this.map.remove(key);
701     }
702 
703     public int size() {
704       return map.size();
705     }
706 
707     public SortedMap<byte[], byte[]> subMap(byte[] fromKey, byte[] toKey) {
708       return this.map.subMap(fromKey, toKey);
709     }
710 
711     public SortedMap<byte[], byte[]> tailMap(byte[] fromKey) {
712       return this.map.tailMap(fromKey);
713     }
714 
715     public Collection<byte[]> values() {
716       return map.values();
717     }
718 
719     /**
720      * Write out this instance on the passed in <code>out</code> stream.
721      * We write it as a protobuf.
722      * @param out
723      * @throws IOException
724      * @see #read(DataInputStream)
725      */
726     void write(final DataOutputStream out) throws IOException {
727       HFileProtos.FileInfoProto.Builder builder = HFileProtos.FileInfoProto.newBuilder();
728       for (Map.Entry<byte [], byte[]> e: this.map.entrySet()) {
729         HBaseProtos.BytesBytesPair.Builder bbpBuilder = HBaseProtos.BytesBytesPair.newBuilder();
730         bbpBuilder.setFirst(ByteStringer.wrap(e.getKey()));
731         bbpBuilder.setSecond(ByteStringer.wrap(e.getValue()));
732         builder.addMapEntry(bbpBuilder.build());
733       }
734       out.write(ProtobufMagic.PB_MAGIC);
735       builder.build().writeDelimitedTo(out);
736     }
737 
738     /**
739      * Populate this instance with what we find on the passed in <code>in</code> stream.
740      * Can deserialize protobuf of old Writables format.
741      * @param in
742      * @throws IOException
743      * @see #write(DataOutputStream)
744      */
745     void read(final DataInputStream in) throws IOException {
746       // This code is tested over in TestHFileReaderV1 where we read an old hfile w/ this new code.
747       int pblen = ProtobufUtil.lengthOfPBMagic();
748       byte [] pbuf = new byte[pblen];
749       if (in.markSupported()) in.mark(pblen);
750       int read = in.read(pbuf);
751       if (read != pblen) throw new IOException("read=" + read + ", wanted=" + pblen);
752       if (ProtobufUtil.isPBMagicPrefix(pbuf)) {
753         parsePB(HFileProtos.FileInfoProto.parseDelimitedFrom(in));
754       } else {
755         if (in.markSupported()) {
756           in.reset();
757           parseWritable(in);
758         } else {
759           // We cannot use BufferedInputStream, it consumes more than we read from the underlying IS
760           ByteArrayInputStream bais = new ByteArrayInputStream(pbuf);
761           SequenceInputStream sis = new SequenceInputStream(bais, in); // Concatenate input streams
762           // TODO: Am I leaking anything here wrapping the passed in stream?  We are not calling close on the wrapped
763           // streams but they should be let go after we leave this context?  I see that we keep a reference to the
764           // passed in inputstream but since we no longer have a reference to this after we leave, we should be ok.
765           parseWritable(new DataInputStream(sis));
766         }
767       }
768     }
769 
770     /** Now parse the old Writable format.  It was a list of Map entries.  Each map entry was a key and a value of
771      * a byte [].  The old map format had a byte before each entry that held a code which was short for the key or
772      * value type.  We know it was a byte [] so in below we just read and dump it.
773      * @throws IOException
774      */
775     void parseWritable(final DataInputStream in) throws IOException {
776       // First clear the map.  Otherwise we will just accumulate entries every time this method is called.
777       this.map.clear();
778       // Read the number of entries in the map
779       int entries = in.readInt();
780       // Then read each key/value pair
781       for (int i = 0; i < entries; i++) {
782         byte [] key = Bytes.readByteArray(in);
783         // We used to read a byte that encoded the class type.  Read and ignore it because it is always byte [] in hfile
784         in.readByte();
785         byte [] value = Bytes.readByteArray(in);
786         this.map.put(key, value);
787       }
788     }
789 
790     /**
791      * Fill our map with content of the pb we read off disk
792      * @param fip protobuf message to read
793      */
794     void parsePB(final HFileProtos.FileInfoProto fip) {
795       this.map.clear();
796       for (BytesBytesPair pair: fip.getMapEntryList()) {
797         this.map.put(pair.getFirst().toByteArray(), pair.getSecond().toByteArray());
798       }
799     }
800   }
801 
802   /** Return true if the given file info key is reserved for internal use. */
803   public static boolean isReservedFileInfoKey(byte[] key) {
804     return Bytes.startsWith(key, FileInfo.RESERVED_PREFIX_BYTES);
805   }
806 
807   /**
808    * Get names of supported compression algorithms. The names are acceptable by
809    * HFile.Writer.
810    *
811    * @return Array of strings, each represents a supported compression
812    *         algorithm. Currently, the following compression algorithms are
813    *         supported.
814    *         <ul>
815    *         <li>"none" - No compression.
816    *         <li>"gz" - GZIP compression.
817    *         </ul>
818    */
819   public static String[] getSupportedCompressionAlgorithms() {
820     return Compression.getSupportedAlgorithms();
821   }
822 
823   // Utility methods.
824   /*
825    * @param l Long to convert to an int.
826    * @return <code>l</code> cast as an int.
827    */
828   static int longToInt(final long l) {
829     // Expecting the size() of a block not exceeding 4GB. Assuming the
830     // size() will wrap to negative integer if it exceeds 2GB (From tfile).
831     return (int)(l & 0x00000000ffffffffL);
832   }
833 
834   /**
835    * Returns all HFiles belonging to the given region directory. Could return an
836    * empty list.
837    *
838    * @param fs  The file system reference.
839    * @param regionDir  The region directory to scan.
840    * @return The list of files found.
841    * @throws IOException When scanning the files fails.
842    */
843   static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
844       throws IOException {
845     List<Path> regionHFiles = new ArrayList<Path>();
846     PathFilter dirFilter = new FSUtils.DirFilter(fs);
847     FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
848     for(FileStatus dir : familyDirs) {
849       FileStatus[] files = fs.listStatus(dir.getPath());
850       for (FileStatus file : files) {
851         if (!file.isDirectory() &&
852             (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) &&
853             (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))) {
854           regionHFiles.add(file.getPath());
855         }
856       }
857     }
858     return regionHFiles;
859   }
860 
861   /**
862    * Checks the given {@link HFile} format version, and throws an exception if
863    * invalid. Note that if the version number comes from an input file and has
864    * not been verified, the caller needs to re-throw an {@link IOException} to
865    * indicate that this is not a software error, but corrupted input.
866    *
867    * @param version an HFile version
868    * @throws IllegalArgumentException if the version is invalid
869    */
870   public static void checkFormatVersion(int version)
871       throws IllegalArgumentException {
872     if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
873       throw new IllegalArgumentException("Invalid HFile version: " + version
874           + " (expected to be " + "between " + MIN_FORMAT_VERSION + " and "
875           + MAX_FORMAT_VERSION + ")");
876     }
877   }
878 
879 
880   public static void checkHFileVersion(final Configuration c) {
881     int version = c.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
882     if (version < MAX_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
883       throw new IllegalArgumentException("The setting for " + FORMAT_VERSION_KEY +
884         " (in your hbase-*.xml files) is " + version + " which does not match " +
885         MAX_FORMAT_VERSION +
886         "; are you running with a configuration from an older or newer hbase install (an " +
887         "incompatible hbase-default.xml or hbase-site.xml on your CLASSPATH)?");
888     }
889   }
890 
891   public static void main(String[] args) throws Exception {
892     // delegate to preserve old behavior
893     HFilePrettyPrinter.main(args);
894   }
895 }