1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  
17  
18  
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.Closeable;
23  import java.io.DataInput;
24  import java.io.DataInputStream;
25  import java.io.DataOutputStream;
26  import java.io.IOException;
27  import java.io.SequenceInputStream;
28  import java.net.InetSocketAddress;
29  import java.nio.ByteBuffer;
30  import java.util.ArrayList;
31  import java.util.Collection;
32  import java.util.Comparator;
33  import java.util.List;
34  import java.util.Map;
35  import java.util.Set;
36  import java.util.SortedMap;
37  import java.util.TreeMap;
38  import java.util.concurrent.atomic.AtomicLong;
39  
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.hbase.classification.InterfaceAudience;
43  import org.apache.hadoop.conf.Configuration;
44  import org.apache.hadoop.fs.FSDataInputStream;
45  import org.apache.hadoop.fs.FSDataOutputStream;
46  import org.apache.hadoop.fs.FileStatus;
47  import org.apache.hadoop.fs.FileSystem;
48  import org.apache.hadoop.fs.Path;
49  import org.apache.hadoop.fs.PathFilter;
50  import org.apache.hadoop.hbase.Cell;
51  import org.apache.hadoop.hbase.HConstants;
52  import org.apache.hadoop.hbase.KeyValue;
53  import org.apache.hadoop.hbase.KeyValue.KVComparator;
54  import org.apache.hadoop.hbase.fs.HFileSystem;
55  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
56  import org.apache.hadoop.hbase.io.compress.Compression;
57  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
58  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
59  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
60  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
61  import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
62  import org.apache.hadoop.hbase.util.BloomFilterWriter;
63  import org.apache.hadoop.hbase.util.ByteStringer;
64  import org.apache.hadoop.hbase.util.Bytes;
65  import org.apache.hadoop.hbase.util.FSUtils;
66  import org.apache.hadoop.io.Writable;
67  
68  import com.google.common.base.Preconditions;
69  
70  
71  
72  
73  
74  
75  
76  
77  
78  
79  
80  
81  
82  
83  
84  
85  
86  
87  
88  
89  
90  
91  
92  
93  
94  
95  
96  
97  
98  
99  
100 
101 
102 
103 
104 
105 
106 
107 
108 
109 
110 
111 
112 
113 
114 
115 
116 
117 
118 
119 
120 
121 
122 
123 
124 
125 
126 
127 
128 
129 
130 
131 
132 
133 
134 @InterfaceAudience.Private
135 public class HFile {
136   
137   static final Log LOG = LogFactory.getLog(HFile.class);
138 
139   
140 
141 
142   public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
143 
144   
145 
146 
147   public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
148     Compression.Algorithm.NONE;
149 
150   
151   public static final int MIN_FORMAT_VERSION = 2;
152 
153   
154 
155   public static final int MAX_FORMAT_VERSION = 3;
156 
157   
158 
159 
160   public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3;
161 
162   
163   public final static String DEFAULT_COMPRESSION =
164     DEFAULT_COMPRESSION_ALGORITHM.getName();
165 
166   
167   public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";
168 
169   
170 
171 
172 
173 
174 
175   public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;
176 
177   
178 
179 
180   public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
181   
182   static final AtomicLong checksumFailures = new AtomicLong();
183 
184   
185   public static final AtomicLong dataBlockReadCnt = new AtomicLong(0);
186 
187   
188 
189 
190 
191   public static final long getChecksumFailuresCount() {
192     return checksumFailures.getAndSet(0);
193   }
194 
195   
196   public interface Writer extends Closeable {
197 
198     
199     void appendFileInfo(byte[] key, byte[] value) throws IOException;
200 
201     void append(Cell cell) throws IOException;
202 
203     
204     Path getPath();
205 
206     
207 
208 
209 
210     void addInlineBlockWriter(InlineBlockWriter bloomWriter);
211 
212     
213     
214     
215     
216 
217     void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);
218 
219     
220 
221 
222 
223 
224     void addGeneralBloomFilter(BloomFilterWriter bfw);
225 
226     
227 
228 
229 
230     void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;
231 
232     
233 
234 
235     HFileContext getFileContext();
236   }
237 
238   
239 
240 
241 
242   public static abstract class WriterFactory {
243     protected final Configuration conf;
244     protected final CacheConfig cacheConf;
245     protected FileSystem fs;
246     protected Path path;
247     protected FSDataOutputStream ostream;
248     protected KVComparator comparator = KeyValue.COMPARATOR;
249     protected InetSocketAddress[] favoredNodes;
250     private HFileContext fileContext;
251     protected boolean shouldDropBehind = false;
252 
253     WriterFactory(Configuration conf, CacheConfig cacheConf) {
254       this.conf = conf;
255       this.cacheConf = cacheConf;
256     }
257 
258     public WriterFactory withPath(FileSystem fs, Path path) {
259       Preconditions.checkNotNull(fs);
260       Preconditions.checkNotNull(path);
261       this.fs = fs;
262       this.path = path;
263       return this;
264     }
265 
266     public WriterFactory withOutputStream(FSDataOutputStream ostream) {
267       Preconditions.checkNotNull(ostream);
268       this.ostream = ostream;
269       return this;
270     }
271 
272     public WriterFactory withComparator(KVComparator comparator) {
273       Preconditions.checkNotNull(comparator);
274       this.comparator = comparator;
275       return this;
276     }
277 
278     public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) {
279       
280       this.favoredNodes = favoredNodes;
281       return this;
282     }
283 
284     public WriterFactory withFileContext(HFileContext fileContext) {
285       this.fileContext = fileContext;
286       return this;
287     }
288 
289     public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) {
290       this.shouldDropBehind = shouldDropBehind;
291       return this;
292     }
293 
294 
295     public Writer create() throws IOException {
296       if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
297         throw new AssertionError("Please specify exactly one of " +
298             "filesystem/path or path");
299       }
300       if (path != null) {
301         ostream = AbstractHFileWriter.createOutputStream(conf, fs, path, favoredNodes);
302         try {
303           ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction());
304         } catch (UnsupportedOperationException uoe) {
305           if (LOG.isTraceEnabled()) LOG.trace("Unable to set drop behind on " + path, uoe);
306           else if (LOG.isDebugEnabled()) LOG.debug("Unable to set drop behind on " + path);
307         }
308       }
309       return createWriter(fs, path, ostream,
310                    comparator, fileContext);
311     }
312 
313     protected abstract Writer createWriter(FileSystem fs, Path path, FSDataOutputStream ostream,
314         KVComparator comparator, HFileContext fileContext) throws IOException;
315   }
316 
317   
318   public static final String FORMAT_VERSION_KEY = "hfile.format.version";
319 
320   public static int getFormatVersion(Configuration conf) {
321     int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
322     checkFormatVersion(version);
323     return version;
324   }
325 
326   
327 
328 
329 
330 
331   public static final WriterFactory getWriterFactoryNoCache(Configuration
332        conf) {
333     Configuration tempConf = new Configuration(conf);
334     tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
335     return HFile.getWriterFactory(conf, new CacheConfig(tempConf));
336   }
337 
338   
339 
340 
341   public static final WriterFactory getWriterFactory(Configuration conf,
342       CacheConfig cacheConf) {
343     int version = getFormatVersion(conf);
344     switch (version) {
345     case 2:
346       return new HFileWriterV2.WriterFactoryV2(conf, cacheConf);
347     case 3:
348       return new HFileWriterV3.WriterFactoryV3(conf, cacheConf);
349     default:
350       throw new IllegalArgumentException("Cannot create writer for HFile " +
351           "format version " + version);
352     }
353   }
354 
355   
356 
357 
358 
359 
360   public interface CachingBlockReader {
361     
362 
363 
364 
365 
366 
367 
368 
369 
370 
371 
372 
373 
374 
375 
376 
377 
378     HFileBlock readBlock(long offset, long onDiskBlockSize,
379         boolean cacheBlock, final boolean pread, final boolean isCompaction,
380         final boolean updateCacheMetrics, BlockType expectedBlockType,
381         DataBlockEncoding expectedDataBlockEncoding)
382         throws IOException;
383   }
384 
385   
386   public interface Reader extends Closeable, CachingBlockReader {
387     
388 
389 
390 
391 
392     String getName();
393 
394     KVComparator getComparator();
395 
396     HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction);
397 
398     ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;
399 
400     Map<byte[], byte[]> loadFileInfo() throws IOException;
401 
402     byte[] getLastKey();
403 
404     byte[] midkey() throws IOException;
405 
406     long length();
407 
408     long getEntries();
409 
410     byte[] getFirstKey();
411 
412     long indexSize();
413 
414     byte[] getFirstRowKey();
415 
416     byte[] getLastRowKey();
417 
418     FixedFileTrailer getTrailer();
419 
420     HFileBlockIndex.BlockIndexReader getDataBlockIndexReader();
421 
422     HFileScanner getScanner(boolean cacheBlocks, boolean pread);
423 
424     Compression.Algorithm getCompressionAlgorithm();
425 
426     
427 
428 
429 
430 
431     DataInput getGeneralBloomFilterMetadata() throws IOException;
432 
433     
434 
435 
436 
437 
438     DataInput getDeleteBloomFilterMetadata() throws IOException;
439 
440     Path getPath();
441 
442     
443     void close(boolean evictOnClose) throws IOException;
444 
445     DataBlockEncoding getDataBlockEncoding();
446 
447     boolean hasMVCCInfo();
448 
449     
450 
451 
452     HFileContext getFileContext();
453 
454     boolean isPrimaryReplicaReader();
455 
456     void setPrimaryReplicaReader(boolean isPrimaryReplicaReader);
457 
458     
459 
460 
461 
462     void unbufferStream();
463   }
464 
465   
466 
467 
468 
469 
470 
471 
472 
473 
474 
475 
476 
477   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SF_SWITCH_FALLTHROUGH",
478       justification="Intentional")
479   private static Reader openReader(Path path, FSDataInputStreamWrapper fsdis, long size,
480       CacheConfig cacheConf, HFileSystem hfs, Configuration conf) throws IOException {
481     FixedFileTrailer trailer = null;
482     try {
483       boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
484       assert !isHBaseChecksum; 
485       trailer = FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
486       switch (trailer.getMajorVersion()) {
487       case 2:
488         return new HFileReaderV2(path, trailer, fsdis, size, cacheConf, hfs, conf);
489       case 3 :
490         return new HFileReaderV3(path, trailer, fsdis, size, cacheConf, hfs, conf);
491       default:
492         throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());
493       }
494     } catch (Throwable t) {
495       try {
496         fsdis.close();
497       } catch (Throwable t2) {
498         LOG.warn("Error closing fsdis FSDataInputStreamWrapper", t2);
499       }
500       throw new CorruptHFileException("Problem reading HFile Trailer from file " + path, t);
501     } finally {
502       fsdis.unbuffer();
503     }
504   }
505 
506   
507 
508 
509 
510 
511 
512 
513 
514 
515 
516 
517 
518 
519   public static Reader createReader(FileSystem fs, Path path,
520       FSDataInputStreamWrapper fsdis, long size, CacheConfig cacheConf, Configuration conf)
521       throws IOException {
522     HFileSystem hfs = null;
523 
524     
525     
526     
527     
528     if (!(fs instanceof HFileSystem)) {
529       hfs = new HFileSystem(fs);
530     } else {
531       hfs = (HFileSystem)fs;
532     }
533     return openReader(path, fsdis, size, cacheConf, hfs, conf);
534   }
535 
536   
537 
538 
539 
540 
541 
542 
543 
544   public static Reader createReader(
545       FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf) throws IOException {
546     Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
547     FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);
548     return openReader(path, stream, fs.getFileStatus(path).getLen(),
549       cacheConf, stream.getHfs(), conf);
550   }
551 
552   
553 
554 
555 
556 
557 
558   static Reader createReaderFromStream(Path path,
559       FSDataInputStream fsdis, long size, CacheConfig cacheConf, Configuration conf)
560       throws IOException {
561     FSDataInputStreamWrapper wrapper = new FSDataInputStreamWrapper(fsdis);
562     return openReader(path, wrapper, size, cacheConf, null, conf);
563   }
564 
565   
566 
567 
568 
569 
570 
571 
572   public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException {
573     return isHFileFormat(fs, fs.getFileStatus(path));
574   }
575 
576   
577 
578 
579 
580 
581 
582 
583   public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus)
584       throws IOException {
585     final Path path = fileStatus.getPath();
586     final long size = fileStatus.getLen();
587     FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path);
588     try {
589       boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
590       assert !isHBaseChecksum; 
591       FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
592       return true;
593     } catch (IllegalArgumentException e) {
594       return false;
595     } catch (IOException e) {
596       throw e;
597     } finally {
598       try {
599         fsdis.close();
600       } catch (Throwable t) {
601         LOG.warn("Error closing fsdis FSDataInputStreamWrapper: " + path, t);
602       }
603     }
604   }
605 
606   
607 
608 
609   public static class FileInfo implements SortedMap<byte[], byte[]> {
610     static final String RESERVED_PREFIX = "hfile.";
611     static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
612     static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
613     static final byte [] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
614     static final byte [] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
615     static final byte [] CREATE_TIME_TS = Bytes.toBytes(RESERVED_PREFIX + "CREATE_TIME_TS");
616     static final byte [] COMPARATOR = Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR");
617     static final byte [] TAGS_COMPRESSED = Bytes.toBytes(RESERVED_PREFIX + "TAGS_COMPRESSED");
618     public static final byte [] MAX_TAGS_LEN = Bytes.toBytes(RESERVED_PREFIX + "MAX_TAGS_LEN");
619     private final SortedMap<byte [], byte []> map = new TreeMap<byte [], byte []>(Bytes.BYTES_COMPARATOR);
620 
621     public FileInfo() {
622       super();
623     }
624 
625     
626 
627 
628 
629 
630 
631 
632 
633 
634 
635 
636     public FileInfo append(final byte[] k, final byte[] v,
637         final boolean checkPrefix) throws IOException {
638       if (k == null || v == null) {
639         throw new NullPointerException("Key nor value may be null");
640       }
641       if (checkPrefix && isReservedFileInfoKey(k)) {
642         throw new IOException("Keys with a " + FileInfo.RESERVED_PREFIX
643             + " are reserved");
644       }
645       put(k, v);
646       return this;
647     }
648 
649     public void clear() {
650       this.map.clear();
651     }
652 
653     public Comparator<? super byte[]> comparator() {
654       return map.comparator();
655     }
656 
657     public boolean containsKey(Object key) {
658       return map.containsKey(key);
659     }
660 
661     public boolean containsValue(Object value) {
662       return map.containsValue(value);
663     }
664 
665     public Set<java.util.Map.Entry<byte[], byte[]>> entrySet() {
666       return map.entrySet();
667     }
668 
669     public boolean equals(Object o) {
670       return map.equals(o);
671     }
672 
673     public byte[] firstKey() {
674       return map.firstKey();
675     }
676 
677     public byte[] get(Object key) {
678       return map.get(key);
679     }
680 
681     public int hashCode() {
682       return map.hashCode();
683     }
684 
685     public SortedMap<byte[], byte[]> headMap(byte[] toKey) {
686       return this.map.headMap(toKey);
687     }
688 
689     public boolean isEmpty() {
690       return map.isEmpty();
691     }
692 
693     public Set<byte[]> keySet() {
694       return map.keySet();
695     }
696 
697     public byte[] lastKey() {
698       return map.lastKey();
699     }
700 
701     public byte[] put(byte[] key, byte[] value) {
702       return this.map.put(key, value);
703     }
704 
705     public void putAll(Map<? extends byte[], ? extends byte[]> m) {
706       this.map.putAll(m);
707     }
708 
709     public byte[] remove(Object key) {
710       return this.map.remove(key);
711     }
712 
713     public int size() {
714       return map.size();
715     }
716 
717     public SortedMap<byte[], byte[]> subMap(byte[] fromKey, byte[] toKey) {
718       return this.map.subMap(fromKey, toKey);
719     }
720 
721     public SortedMap<byte[], byte[]> tailMap(byte[] fromKey) {
722       return this.map.tailMap(fromKey);
723     }
724 
725     public Collection<byte[]> values() {
726       return map.values();
727     }
728 
729     
730 
731 
732 
733 
734 
735 
736     void write(final DataOutputStream out) throws IOException {
737       HFileProtos.FileInfoProto.Builder builder = HFileProtos.FileInfoProto.newBuilder();
738       for (Map.Entry<byte [], byte[]> e: this.map.entrySet()) {
739         HBaseProtos.BytesBytesPair.Builder bbpBuilder = HBaseProtos.BytesBytesPair.newBuilder();
740         bbpBuilder.setFirst(ByteStringer.wrap(e.getKey()));
741         bbpBuilder.setSecond(ByteStringer.wrap(e.getValue()));
742         builder.addMapEntry(bbpBuilder.build());
743       }
744       out.write(ProtobufUtil.PB_MAGIC);
745       builder.build().writeDelimitedTo(out);
746     }
747 
748     
749 
750 
751 
752 
753 
754 
755     void read(final DataInputStream in) throws IOException {
756       
757       int pblen = ProtobufUtil.lengthOfPBMagic();
758       byte [] pbuf = new byte[pblen];
759       if (in.markSupported()) in.mark(pblen);
760       int read = in.read(pbuf);
761       if (read != pblen) throw new IOException("read=" + read + ", wanted=" + pblen);
762       if (ProtobufUtil.isPBMagicPrefix(pbuf)) {
763         parsePB(HFileProtos.FileInfoProto.parseDelimitedFrom(in));
764       } else {
765         if (in.markSupported()) {
766           in.reset();
767           parseWritable(in);
768         } else {
769           
770           ByteArrayInputStream bais = new ByteArrayInputStream(pbuf);
771           SequenceInputStream sis = new SequenceInputStream(bais, in); 
772           
773           
774           
775           parseWritable(new DataInputStream(sis));
776         }
777       }
778     }
779 
780     
781 
782 
783 
784 
785     void parseWritable(final DataInputStream in) throws IOException {
786       
787       this.map.clear();
788       
789       int entries = in.readInt();
790       
791       for (int i = 0; i < entries; i++) {
792         byte [] key = Bytes.readByteArray(in);
793         
794         in.readByte();
795         byte [] value = Bytes.readByteArray(in);
796         this.map.put(key, value);
797       }
798     }
799 
800     
801 
802 
803 
804     void parsePB(final HFileProtos.FileInfoProto fip) {
805       this.map.clear();
806       for (BytesBytesPair pair: fip.getMapEntryList()) {
807         this.map.put(pair.getFirst().toByteArray(), pair.getSecond().toByteArray());
808       }
809     }
810   }
811 
812   
813   public static boolean isReservedFileInfoKey(byte[] key) {
814     return Bytes.startsWith(key, FileInfo.RESERVED_PREFIX_BYTES);
815   }
816 
817   
818 
819 
820 
821 
822 
823 
824 
825 
826 
827 
828 
829   public static String[] getSupportedCompressionAlgorithms() {
830     return Compression.getSupportedAlgorithms();
831   }
832 
833   
834   
835 
836 
837 
838   static int longToInt(final long l) {
839     
840     
841     return (int)(l & 0x00000000ffffffffL);
842   }
843 
844   
845 
846 
847 
848 
849 
850 
851 
852 
853   static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
854       throws IOException {
855     List<Path> regionHFiles = new ArrayList<Path>();
856     PathFilter dirFilter = new FSUtils.DirFilter(fs);
857     FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
858     for(FileStatus dir : familyDirs) {
859       FileStatus[] files = fs.listStatus(dir.getPath());
860       for (FileStatus file : files) {
861         if (!file.isDirectory() &&
862             (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) &&
863             (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))) {
864           regionHFiles.add(file.getPath());
865         }
866       }
867     }
868     return regionHFiles;
869   }
870 
871   
872 
873 
874 
875 
876 
877 
878 
879 
880   public static void checkFormatVersion(int version)
881       throws IllegalArgumentException {
882     if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
883       throw new IllegalArgumentException("Invalid HFile version: " + version
884           + " (expected to be " + "between " + MIN_FORMAT_VERSION + " and "
885           + MAX_FORMAT_VERSION + ")");
886     }
887   }
888 
889   public static void main(String[] args) throws Exception {
890     
891     HFilePrettyPrinter.main(args);
892   }
893 }