1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.io.hfile;
20
21 import java.io.ByteArrayInputStream;
22 import java.io.Closeable;
23 import java.io.DataInput;
24 import java.io.DataInputStream;
25 import java.io.DataOutputStream;
26 import java.io.IOException;
27 import java.io.SequenceInputStream;
28 import java.net.InetSocketAddress;
29 import java.nio.ByteBuffer;
30 import java.util.ArrayList;
31 import java.util.Collection;
32 import java.util.Comparator;
33 import java.util.List;
34 import java.util.Map;
35 import java.util.Set;
36 import java.util.SortedMap;
37 import java.util.TreeMap;
38 import java.util.concurrent.atomic.AtomicLong;
39
40 import org.apache.commons.logging.Log;
41 import org.apache.commons.logging.LogFactory;
42 import org.apache.hadoop.hbase.classification.InterfaceAudience;
43 import org.apache.hadoop.conf.Configuration;
44 import org.apache.hadoop.fs.FSDataInputStream;
45 import org.apache.hadoop.fs.FSDataOutputStream;
46 import org.apache.hadoop.fs.FileStatus;
47 import org.apache.hadoop.fs.FileSystem;
48 import org.apache.hadoop.fs.Path;
49 import org.apache.hadoop.fs.PathFilter;
50 import org.apache.hadoop.hbase.Cell;
51 import org.apache.hadoop.hbase.HConstants;
52 import org.apache.hadoop.hbase.KeyValue;
53 import org.apache.hadoop.hbase.KeyValue.KVComparator;
54 import org.apache.hadoop.hbase.fs.HFileSystem;
55 import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
56 import org.apache.hadoop.hbase.io.compress.Compression;
57 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
58 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
59 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
60 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
61 import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
62 import org.apache.hadoop.hbase.util.BloomFilterWriter;
63 import org.apache.hadoop.hbase.util.ByteStringer;
64 import org.apache.hadoop.hbase.util.Bytes;
65 import org.apache.hadoop.hbase.util.FSUtils;
66 import org.apache.hadoop.io.Writable;
67
68 import com.google.common.base.Preconditions;
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134 @InterfaceAudience.Private
135 public class HFile {
136
137 static final Log LOG = LogFactory.getLog(HFile.class);
138
139
140
141
142 public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
143
144
145
146
147 public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
148 Compression.Algorithm.NONE;
149
150
151 public static final int MIN_FORMAT_VERSION = 2;
152
153
154
155 public static final int MAX_FORMAT_VERSION = 3;
156
157
158
159
160 public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3;
161
162
163 public final static String DEFAULT_COMPRESSION =
164 DEFAULT_COMPRESSION_ALGORITHM.getName();
165
166
167 public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";
168
169
170
171
172
173
174
175 public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;
176
177
178
179
180 public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
181
182 static final AtomicLong checksumFailures = new AtomicLong();
183
184
185 public static final AtomicLong dataBlockReadCnt = new AtomicLong(0);
186
187
188
189
190
191 public static final long getChecksumFailuresCount() {
192 return checksumFailures.getAndSet(0);
193 }
194
195
196 public interface Writer extends Closeable {
197
198
199 void appendFileInfo(byte[] key, byte[] value) throws IOException;
200
201 void append(Cell cell) throws IOException;
202
203
204 Path getPath();
205
206
207
208
209
210 void addInlineBlockWriter(InlineBlockWriter bloomWriter);
211
212
213
214
215
216
217 void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);
218
219
220
221
222
223
224 void addGeneralBloomFilter(BloomFilterWriter bfw);
225
226
227
228
229
230 void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;
231
232
233
234
235 HFileContext getFileContext();
236 }
237
238
239
240
241
242 public static abstract class WriterFactory {
243 protected final Configuration conf;
244 protected final CacheConfig cacheConf;
245 protected FileSystem fs;
246 protected Path path;
247 protected FSDataOutputStream ostream;
248 protected KVComparator comparator = KeyValue.COMPARATOR;
249 protected InetSocketAddress[] favoredNodes;
250 private HFileContext fileContext;
251 protected boolean shouldDropBehind = false;
252
253 WriterFactory(Configuration conf, CacheConfig cacheConf) {
254 this.conf = conf;
255 this.cacheConf = cacheConf;
256 }
257
258 public WriterFactory withPath(FileSystem fs, Path path) {
259 Preconditions.checkNotNull(fs);
260 Preconditions.checkNotNull(path);
261 this.fs = fs;
262 this.path = path;
263 return this;
264 }
265
266 public WriterFactory withOutputStream(FSDataOutputStream ostream) {
267 Preconditions.checkNotNull(ostream);
268 this.ostream = ostream;
269 return this;
270 }
271
272 public WriterFactory withComparator(KVComparator comparator) {
273 Preconditions.checkNotNull(comparator);
274 this.comparator = comparator;
275 return this;
276 }
277
278 public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) {
279
280 this.favoredNodes = favoredNodes;
281 return this;
282 }
283
284 public WriterFactory withFileContext(HFileContext fileContext) {
285 this.fileContext = fileContext;
286 return this;
287 }
288
289 public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) {
290 this.shouldDropBehind = shouldDropBehind;
291 return this;
292 }
293
294
295 public Writer create() throws IOException {
296 if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
297 throw new AssertionError("Please specify exactly one of " +
298 "filesystem/path or path");
299 }
300 if (path != null) {
301 ostream = AbstractHFileWriter.createOutputStream(conf, fs, path, favoredNodes);
302 try {
303 ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction());
304 } catch (UnsupportedOperationException uoe) {
305 if (LOG.isTraceEnabled()) LOG.trace("Unable to set drop behind on " + path, uoe);
306 else if (LOG.isDebugEnabled()) LOG.debug("Unable to set drop behind on " + path);
307 }
308 }
309 return createWriter(fs, path, ostream,
310 comparator, fileContext);
311 }
312
313 protected abstract Writer createWriter(FileSystem fs, Path path, FSDataOutputStream ostream,
314 KVComparator comparator, HFileContext fileContext) throws IOException;
315 }
316
317
318 public static final String FORMAT_VERSION_KEY = "hfile.format.version";
319
320 public static int getFormatVersion(Configuration conf) {
321 int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
322 checkFormatVersion(version);
323 return version;
324 }
325
326
327
328
329
330
331 public static final WriterFactory getWriterFactoryNoCache(Configuration
332 conf) {
333 Configuration tempConf = new Configuration(conf);
334 tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
335 return HFile.getWriterFactory(conf, new CacheConfig(tempConf));
336 }
337
338
339
340
341 public static final WriterFactory getWriterFactory(Configuration conf,
342 CacheConfig cacheConf) {
343 int version = getFormatVersion(conf);
344 switch (version) {
345 case 2:
346 return new HFileWriterV2.WriterFactoryV2(conf, cacheConf);
347 case 3:
348 return new HFileWriterV3.WriterFactoryV3(conf, cacheConf);
349 default:
350 throw new IllegalArgumentException("Cannot create writer for HFile " +
351 "format version " + version);
352 }
353 }
354
355
356
357
358
359
360 public interface CachingBlockReader {
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378 HFileBlock readBlock(long offset, long onDiskBlockSize,
379 boolean cacheBlock, final boolean pread, final boolean isCompaction,
380 final boolean updateCacheMetrics, BlockType expectedBlockType,
381 DataBlockEncoding expectedDataBlockEncoding)
382 throws IOException;
383 }
384
385
386 public interface Reader extends Closeable, CachingBlockReader {
387
388
389
390
391
392 String getName();
393
394 KVComparator getComparator();
395
396 HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction);
397
398 ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;
399
400 Map<byte[], byte[]> loadFileInfo() throws IOException;
401
402 byte[] getLastKey();
403
404 byte[] midkey() throws IOException;
405
406 long length();
407
408 long getEntries();
409
410 byte[] getFirstKey();
411
412 long indexSize();
413
414 byte[] getFirstRowKey();
415
416 byte[] getLastRowKey();
417
418 FixedFileTrailer getTrailer();
419
420 HFileBlockIndex.BlockIndexReader getDataBlockIndexReader();
421
422 HFileScanner getScanner(boolean cacheBlocks, boolean pread);
423
424 Compression.Algorithm getCompressionAlgorithm();
425
426
427
428
429
430
431 DataInput getGeneralBloomFilterMetadata() throws IOException;
432
433
434
435
436
437
438 DataInput getDeleteBloomFilterMetadata() throws IOException;
439
440 Path getPath();
441
442
443 void close(boolean evictOnClose) throws IOException;
444
445 DataBlockEncoding getDataBlockEncoding();
446
447 boolean hasMVCCInfo();
448
449
450
451
452 HFileContext getFileContext();
453
454 boolean isPrimaryReplicaReader();
455
456 void setPrimaryReplicaReader(boolean isPrimaryReplicaReader);
457
458
459
460
461
462 void unbufferStream();
463 }
464
465
466
467
468
469
470
471
472
473
474
475
476
477 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SF_SWITCH_FALLTHROUGH",
478 justification="Intentional")
479 private static Reader openReader(Path path, FSDataInputStreamWrapper fsdis, long size,
480 CacheConfig cacheConf, HFileSystem hfs, Configuration conf) throws IOException {
481 FixedFileTrailer trailer = null;
482 try {
483 boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
484 assert !isHBaseChecksum;
485 trailer = FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
486 switch (trailer.getMajorVersion()) {
487 case 2:
488 return new HFileReaderV2(path, trailer, fsdis, size, cacheConf, hfs, conf);
489 case 3 :
490 return new HFileReaderV3(path, trailer, fsdis, size, cacheConf, hfs, conf);
491 default:
492 throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());
493 }
494 } catch (Throwable t) {
495 try {
496 fsdis.close();
497 } catch (Throwable t2) {
498 LOG.warn("Error closing fsdis FSDataInputStreamWrapper", t2);
499 }
500 throw new CorruptHFileException("Problem reading HFile Trailer from file " + path, t);
501 } finally {
502 fsdis.unbuffer();
503 }
504 }
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519 public static Reader createReader(FileSystem fs, Path path,
520 FSDataInputStreamWrapper fsdis, long size, CacheConfig cacheConf, Configuration conf)
521 throws IOException {
522 HFileSystem hfs = null;
523
524
525
526
527
528 if (!(fs instanceof HFileSystem)) {
529 hfs = new HFileSystem(fs);
530 } else {
531 hfs = (HFileSystem)fs;
532 }
533 return openReader(path, fsdis, size, cacheConf, hfs, conf);
534 }
535
536
537
538
539
540
541
542
543
544 public static Reader createReader(
545 FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf) throws IOException {
546 Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
547 FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);
548 return openReader(path, stream, fs.getFileStatus(path).getLen(),
549 cacheConf, stream.getHfs(), conf);
550 }
551
552
553
554
555
556
557
558 static Reader createReaderFromStream(Path path,
559 FSDataInputStream fsdis, long size, CacheConfig cacheConf, Configuration conf)
560 throws IOException {
561 FSDataInputStreamWrapper wrapper = new FSDataInputStreamWrapper(fsdis);
562 return openReader(path, wrapper, size, cacheConf, null, conf);
563 }
564
565
566
567
568
569
570
571
572 public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException {
573 return isHFileFormat(fs, fs.getFileStatus(path));
574 }
575
576
577
578
579
580
581
582
583 public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus)
584 throws IOException {
585 final Path path = fileStatus.getPath();
586 final long size = fileStatus.getLen();
587 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path);
588 try {
589 boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
590 assert !isHBaseChecksum;
591 FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
592 return true;
593 } catch (IllegalArgumentException e) {
594 return false;
595 } catch (IOException e) {
596 throw e;
597 } finally {
598 try {
599 fsdis.close();
600 } catch (Throwable t) {
601 LOG.warn("Error closing fsdis FSDataInputStreamWrapper: " + path, t);
602 }
603 }
604 }
605
606
607
608
609 public static class FileInfo implements SortedMap<byte[], byte[]> {
610 static final String RESERVED_PREFIX = "hfile.";
611 static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
612 static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
613 static final byte [] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
614 static final byte [] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
615 static final byte [] CREATE_TIME_TS = Bytes.toBytes(RESERVED_PREFIX + "CREATE_TIME_TS");
616 static final byte [] COMPARATOR = Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR");
617 static final byte [] TAGS_COMPRESSED = Bytes.toBytes(RESERVED_PREFIX + "TAGS_COMPRESSED");
618 public static final byte [] MAX_TAGS_LEN = Bytes.toBytes(RESERVED_PREFIX + "MAX_TAGS_LEN");
619 private final SortedMap<byte [], byte []> map = new TreeMap<byte [], byte []>(Bytes.BYTES_COMPARATOR);
620
621 public FileInfo() {
622 super();
623 }
624
625
626
627
628
629
630
631
632
633
634
635
636 public FileInfo append(final byte[] k, final byte[] v,
637 final boolean checkPrefix) throws IOException {
638 if (k == null || v == null) {
639 throw new NullPointerException("Key nor value may be null");
640 }
641 if (checkPrefix && isReservedFileInfoKey(k)) {
642 throw new IOException("Keys with a " + FileInfo.RESERVED_PREFIX
643 + " are reserved");
644 }
645 put(k, v);
646 return this;
647 }
648
649 public void clear() {
650 this.map.clear();
651 }
652
653 public Comparator<? super byte[]> comparator() {
654 return map.comparator();
655 }
656
657 public boolean containsKey(Object key) {
658 return map.containsKey(key);
659 }
660
661 public boolean containsValue(Object value) {
662 return map.containsValue(value);
663 }
664
665 public Set<java.util.Map.Entry<byte[], byte[]>> entrySet() {
666 return map.entrySet();
667 }
668
669 public boolean equals(Object o) {
670 return map.equals(o);
671 }
672
673 public byte[] firstKey() {
674 return map.firstKey();
675 }
676
677 public byte[] get(Object key) {
678 return map.get(key);
679 }
680
681 public int hashCode() {
682 return map.hashCode();
683 }
684
685 public SortedMap<byte[], byte[]> headMap(byte[] toKey) {
686 return this.map.headMap(toKey);
687 }
688
689 public boolean isEmpty() {
690 return map.isEmpty();
691 }
692
693 public Set<byte[]> keySet() {
694 return map.keySet();
695 }
696
697 public byte[] lastKey() {
698 return map.lastKey();
699 }
700
701 public byte[] put(byte[] key, byte[] value) {
702 return this.map.put(key, value);
703 }
704
705 public void putAll(Map<? extends byte[], ? extends byte[]> m) {
706 this.map.putAll(m);
707 }
708
709 public byte[] remove(Object key) {
710 return this.map.remove(key);
711 }
712
713 public int size() {
714 return map.size();
715 }
716
717 public SortedMap<byte[], byte[]> subMap(byte[] fromKey, byte[] toKey) {
718 return this.map.subMap(fromKey, toKey);
719 }
720
721 public SortedMap<byte[], byte[]> tailMap(byte[] fromKey) {
722 return this.map.tailMap(fromKey);
723 }
724
725 public Collection<byte[]> values() {
726 return map.values();
727 }
728
729
730
731
732
733
734
735
736 void write(final DataOutputStream out) throws IOException {
737 HFileProtos.FileInfoProto.Builder builder = HFileProtos.FileInfoProto.newBuilder();
738 for (Map.Entry<byte [], byte[]> e: this.map.entrySet()) {
739 HBaseProtos.BytesBytesPair.Builder bbpBuilder = HBaseProtos.BytesBytesPair.newBuilder();
740 bbpBuilder.setFirst(ByteStringer.wrap(e.getKey()));
741 bbpBuilder.setSecond(ByteStringer.wrap(e.getValue()));
742 builder.addMapEntry(bbpBuilder.build());
743 }
744 out.write(ProtobufUtil.PB_MAGIC);
745 builder.build().writeDelimitedTo(out);
746 }
747
748
749
750
751
752
753
754
755 void read(final DataInputStream in) throws IOException {
756
757 int pblen = ProtobufUtil.lengthOfPBMagic();
758 byte [] pbuf = new byte[pblen];
759 if (in.markSupported()) in.mark(pblen);
760 int read = in.read(pbuf);
761 if (read != pblen) throw new IOException("read=" + read + ", wanted=" + pblen);
762 if (ProtobufUtil.isPBMagicPrefix(pbuf)) {
763 parsePB(HFileProtos.FileInfoProto.parseDelimitedFrom(in));
764 } else {
765 if (in.markSupported()) {
766 in.reset();
767 parseWritable(in);
768 } else {
769
770 ByteArrayInputStream bais = new ByteArrayInputStream(pbuf);
771 SequenceInputStream sis = new SequenceInputStream(bais, in);
772
773
774
775 parseWritable(new DataInputStream(sis));
776 }
777 }
778 }
779
780
781
782
783
784
785 void parseWritable(final DataInputStream in) throws IOException {
786
787 this.map.clear();
788
789 int entries = in.readInt();
790
791 for (int i = 0; i < entries; i++) {
792 byte [] key = Bytes.readByteArray(in);
793
794 in.readByte();
795 byte [] value = Bytes.readByteArray(in);
796 this.map.put(key, value);
797 }
798 }
799
800
801
802
803
804 void parsePB(final HFileProtos.FileInfoProto fip) {
805 this.map.clear();
806 for (BytesBytesPair pair: fip.getMapEntryList()) {
807 this.map.put(pair.getFirst().toByteArray(), pair.getSecond().toByteArray());
808 }
809 }
810 }
811
812
813 public static boolean isReservedFileInfoKey(byte[] key) {
814 return Bytes.startsWith(key, FileInfo.RESERVED_PREFIX_BYTES);
815 }
816
817
818
819
820
821
822
823
824
825
826
827
828
829 public static String[] getSupportedCompressionAlgorithms() {
830 return Compression.getSupportedAlgorithms();
831 }
832
833
834
835
836
837
838 static int longToInt(final long l) {
839
840
841 return (int)(l & 0x00000000ffffffffL);
842 }
843
844
845
846
847
848
849
850
851
852
853 static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
854 throws IOException {
855 List<Path> regionHFiles = new ArrayList<Path>();
856 PathFilter dirFilter = new FSUtils.DirFilter(fs);
857 FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
858 for(FileStatus dir : familyDirs) {
859 FileStatus[] files = fs.listStatus(dir.getPath());
860 for (FileStatus file : files) {
861 if (!file.isDirectory() &&
862 (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) &&
863 (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))) {
864 regionHFiles.add(file.getPath());
865 }
866 }
867 }
868 return regionHFiles;
869 }
870
871
872
873
874
875
876
877
878
879
880 public static void checkFormatVersion(int version)
881 throws IllegalArgumentException {
882 if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
883 throw new IllegalArgumentException("Invalid HFile version: " + version
884 + " (expected to be " + "between " + MIN_FORMAT_VERSION + " and "
885 + MAX_FORMAT_VERSION + ")");
886 }
887 }
888
889 public static void main(String[] args) throws Exception {
890
891 HFilePrettyPrinter.main(args);
892 }
893 }