1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.io.hfile;
20
21 import java.io.ByteArrayInputStream;
22 import java.io.Closeable;
23 import java.io.DataInput;
24 import java.io.DataInputStream;
25 import java.io.DataOutputStream;
26 import java.io.IOException;
27 import java.io.SequenceInputStream;
28 import java.net.InetSocketAddress;
29 import java.nio.ByteBuffer;
30 import java.util.ArrayList;
31 import java.util.Collection;
32 import java.util.Comparator;
33 import java.util.List;
34 import java.util.Map;
35 import java.util.Set;
36 import java.util.SortedMap;
37 import java.util.TreeMap;
38 import java.util.concurrent.atomic.AtomicLong;
39
40 import org.apache.commons.logging.Log;
41 import org.apache.commons.logging.LogFactory;
42 import org.apache.hadoop.hbase.classification.InterfaceAudience;
43 import org.apache.hadoop.conf.Configuration;
44 import org.apache.hadoop.fs.FSDataInputStream;
45 import org.apache.hadoop.fs.FSDataOutputStream;
46 import org.apache.hadoop.fs.FileStatus;
47 import org.apache.hadoop.fs.FileSystem;
48 import org.apache.hadoop.fs.Path;
49 import org.apache.hadoop.fs.PathFilter;
50 import org.apache.hadoop.hbase.Cell;
51 import org.apache.hadoop.hbase.HConstants;
52 import org.apache.hadoop.hbase.KeyValue;
53 import org.apache.hadoop.hbase.KeyValue.KVComparator;
54 import org.apache.hadoop.hbase.fs.HFileSystem;
55 import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
56 import org.apache.hadoop.hbase.io.compress.Compression;
57 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
58 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
59 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
60 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
61 import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
62 import org.apache.hadoop.hbase.util.BloomFilterWriter;
63 import org.apache.hadoop.hbase.util.ByteStringer;
64 import org.apache.hadoop.hbase.util.Bytes;
65 import org.apache.hadoop.hbase.util.ChecksumType;
66 import org.apache.hadoop.hbase.util.FSUtils;
67 import org.apache.hadoop.io.Writable;
68
69 import com.google.common.base.Preconditions;
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134 @InterfaceAudience.Private
135 public class HFile {
136 static final Log LOG = LogFactory.getLog(HFile.class);
137
138
139
140
141 public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
142
143
144
145
146 public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
147 Compression.Algorithm.NONE;
148
149
150 public static final int MIN_FORMAT_VERSION = 2;
151
152
153
154 public static final int MAX_FORMAT_VERSION = 3;
155
156
157
158
159 public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3;
160
161
162 public final static String DEFAULT_COMPRESSION =
163 DEFAULT_COMPRESSION_ALGORITHM.getName();
164
165
166 public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";
167
168
169
170
171
172
173
174 public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;
175
176
177
178
179 public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
180
181 public static final ChecksumType DEFAULT_CHECKSUM_TYPE = ChecksumType.CRC32;
182
183
184 static final AtomicLong checksumFailures = new AtomicLong();
185
186
187 public static final AtomicLong dataBlockReadCnt = new AtomicLong(0);
188
189
190
191
192
193 public static final long getChecksumFailuresCount() {
194 return checksumFailures.getAndSet(0);
195 }
196
197
198 public interface Writer extends Closeable {
199
200
201 void appendFileInfo(byte[] key, byte[] value) throws IOException;
202
203 void append(Cell cell) throws IOException;
204
205
206 Path getPath();
207
208
209
210
211
212 void addInlineBlockWriter(InlineBlockWriter bloomWriter);
213
214
215
216
217
218
219 void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);
220
221
222
223
224
225
226 void addGeneralBloomFilter(BloomFilterWriter bfw);
227
228
229
230
231
232 void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;
233
234
235
236
237 HFileContext getFileContext();
238 }
239
240
241
242
243
244 public static abstract class WriterFactory {
245 protected final Configuration conf;
246 protected final CacheConfig cacheConf;
247 protected FileSystem fs;
248 protected Path path;
249 protected FSDataOutputStream ostream;
250 protected KVComparator comparator = KeyValue.COMPARATOR;
251 protected InetSocketAddress[] favoredNodes;
252 private HFileContext fileContext;
253
254 WriterFactory(Configuration conf, CacheConfig cacheConf) {
255 this.conf = conf;
256 this.cacheConf = cacheConf;
257 }
258
259 public WriterFactory withPath(FileSystem fs, Path path) {
260 Preconditions.checkNotNull(fs);
261 Preconditions.checkNotNull(path);
262 this.fs = fs;
263 this.path = path;
264 return this;
265 }
266
267 public WriterFactory withOutputStream(FSDataOutputStream ostream) {
268 Preconditions.checkNotNull(ostream);
269 this.ostream = ostream;
270 return this;
271 }
272
273 public WriterFactory withComparator(KVComparator comparator) {
274 Preconditions.checkNotNull(comparator);
275 this.comparator = comparator;
276 return this;
277 }
278
279 public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) {
280
281 this.favoredNodes = favoredNodes;
282 return this;
283 }
284
285 public WriterFactory withFileContext(HFileContext fileContext) {
286 this.fileContext = fileContext;
287 return this;
288 }
289
290 public Writer create() throws IOException {
291 if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
292 throw new AssertionError("Please specify exactly one of " +
293 "filesystem/path or path");
294 }
295 if (path != null) {
296 ostream = AbstractHFileWriter.createOutputStream(conf, fs, path, favoredNodes);
297 }
298 return createWriter(fs, path, ostream,
299 comparator, fileContext);
300 }
301
302 protected abstract Writer createWriter(FileSystem fs, Path path, FSDataOutputStream ostream,
303 KVComparator comparator, HFileContext fileContext) throws IOException;
304 }
305
306
307 public static final String FORMAT_VERSION_KEY = "hfile.format.version";
308
309 public static int getFormatVersion(Configuration conf) {
310 int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
311 checkFormatVersion(version);
312 return version;
313 }
314
315
316
317
318
319
320 public static final WriterFactory getWriterFactoryNoCache(Configuration
321 conf) {
322 Configuration tempConf = new Configuration(conf);
323 tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
324 return HFile.getWriterFactory(conf, new CacheConfig(tempConf));
325 }
326
327
328
329
330 public static final WriterFactory getWriterFactory(Configuration conf,
331 CacheConfig cacheConf) {
332 int version = getFormatVersion(conf);
333 switch (version) {
334 case 2:
335 return new HFileWriterV2.WriterFactoryV2(conf, cacheConf);
336 case 3:
337 return new HFileWriterV3.WriterFactoryV3(conf, cacheConf);
338 default:
339 throw new IllegalArgumentException("Cannot create writer for HFile " +
340 "format version " + version);
341 }
342 }
343
344
345
346
347
348
349 public interface CachingBlockReader {
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367 HFileBlock readBlock(long offset, long onDiskBlockSize,
368 boolean cacheBlock, final boolean pread, final boolean isCompaction,
369 final boolean updateCacheMetrics, BlockType expectedBlockType,
370 DataBlockEncoding expectedDataBlockEncoding)
371 throws IOException;
372 }
373
374
375 public interface Reader extends Closeable, CachingBlockReader {
376
377
378
379
380
381 String getName();
382
383 KVComparator getComparator();
384
385 HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction);
386
387 ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;
388
389 Map<byte[], byte[]> loadFileInfo() throws IOException;
390
391 byte[] getLastKey();
392
393 byte[] midkey() throws IOException;
394
395 long length();
396
397 long getEntries();
398
399 byte[] getFirstKey();
400
401 long indexSize();
402
403 byte[] getFirstRowKey();
404
405 byte[] getLastRowKey();
406
407 FixedFileTrailer getTrailer();
408
409 HFileBlockIndex.BlockIndexReader getDataBlockIndexReader();
410
411 HFileScanner getScanner(boolean cacheBlocks, boolean pread);
412
413 Compression.Algorithm getCompressionAlgorithm();
414
415
416
417
418
419
420 DataInput getGeneralBloomFilterMetadata() throws IOException;
421
422
423
424
425
426
427 DataInput getDeleteBloomFilterMetadata() throws IOException;
428
429 Path getPath();
430
431
432 void close(boolean evictOnClose) throws IOException;
433
434 DataBlockEncoding getDataBlockEncoding();
435
436 boolean hasMVCCInfo();
437
438
439
440
441 HFileContext getFileContext();
442 }
443
444
445
446
447
448
449
450
451
452
453
454
455
456 private static Reader pickReaderVersion(Path path, FSDataInputStreamWrapper fsdis,
457 long size, CacheConfig cacheConf, HFileSystem hfs, Configuration conf) throws IOException {
458 FixedFileTrailer trailer = null;
459 try {
460 boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
461 assert !isHBaseChecksum;
462 trailer = FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
463 switch (trailer.getMajorVersion()) {
464 case 2:
465 return new HFileReaderV2(path, trailer, fsdis, size, cacheConf, hfs, conf);
466 case 3 :
467 return new HFileReaderV3(path, trailer, fsdis, size, cacheConf, hfs, conf);
468 default:
469 throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());
470 }
471 } catch (Throwable t) {
472 try {
473 fsdis.close();
474 } catch (Throwable t2) {
475 LOG.warn("Error closing fsdis FSDataInputStreamWrapper", t2);
476 }
477 throw new CorruptHFileException("Problem reading HFile Trailer from file " + path, t);
478 }
479 }
480
481
482
483
484
485
486
487
488
489
490
491 public static Reader createReader(FileSystem fs, Path path,
492 FSDataInputStreamWrapper fsdis, long size, CacheConfig cacheConf, Configuration conf)
493 throws IOException {
494 HFileSystem hfs = null;
495
496
497
498
499
500 if (!(fs instanceof HFileSystem)) {
501 hfs = new HFileSystem(fs);
502 } else {
503 hfs = (HFileSystem)fs;
504 }
505 return pickReaderVersion(path, fsdis, size, cacheConf, hfs, conf);
506 }
507
508
509
510
511
512
513
514
515
516 public static Reader createReader(
517 FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf) throws IOException {
518 Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
519 FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);
520 return pickReaderVersion(path, stream, fs.getFileStatus(path).getLen(),
521 cacheConf, stream.getHfs(), conf);
522 }
523
524
525
526
527 static Reader createReaderFromStream(Path path,
528 FSDataInputStream fsdis, long size, CacheConfig cacheConf, Configuration conf)
529 throws IOException {
530 FSDataInputStreamWrapper wrapper = new FSDataInputStreamWrapper(fsdis);
531 return pickReaderVersion(path, wrapper, size, cacheConf, null, conf);
532 }
533
534
535
536
537
538
539
540
541 public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException {
542 return isHFileFormat(fs, fs.getFileStatus(path));
543 }
544
545
546
547
548
549
550
551
552 public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus)
553 throws IOException {
554 final Path path = fileStatus.getPath();
555 final long size = fileStatus.getLen();
556 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path);
557 try {
558 boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
559 assert !isHBaseChecksum;
560 FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
561 return true;
562 } catch (IllegalArgumentException e) {
563 return false;
564 } catch (IOException e) {
565 throw e;
566 } finally {
567 try {
568 fsdis.close();
569 } catch (Throwable t) {
570 LOG.warn("Error closing fsdis FSDataInputStreamWrapper: " + path, t);
571 }
572 }
573 }
574
575
576
577
578 public static class FileInfo implements SortedMap<byte[], byte[]> {
579 static final String RESERVED_PREFIX = "hfile.";
580 static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
581 static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
582 static final byte [] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
583 static final byte [] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
584 static final byte [] CREATE_TIME_TS = Bytes.toBytes(RESERVED_PREFIX + "CREATE_TIME_TS");
585 static final byte [] COMPARATOR = Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR");
586 static final byte [] TAGS_COMPRESSED = Bytes.toBytes(RESERVED_PREFIX + "TAGS_COMPRESSED");
587 public static final byte [] MAX_TAGS_LEN = Bytes.toBytes(RESERVED_PREFIX + "MAX_TAGS_LEN");
588 private final SortedMap<byte [], byte []> map = new TreeMap<byte [], byte []>(Bytes.BYTES_COMPARATOR);
589
590 public FileInfo() {
591 super();
592 }
593
594
595
596
597
598
599
600
601
602
603
604
605 public FileInfo append(final byte[] k, final byte[] v,
606 final boolean checkPrefix) throws IOException {
607 if (k == null || v == null) {
608 throw new NullPointerException("Key nor value may be null");
609 }
610 if (checkPrefix && isReservedFileInfoKey(k)) {
611 throw new IOException("Keys with a " + FileInfo.RESERVED_PREFIX
612 + " are reserved");
613 }
614 put(k, v);
615 return this;
616 }
617
618 public void clear() {
619 this.map.clear();
620 }
621
622 public Comparator<? super byte[]> comparator() {
623 return map.comparator();
624 }
625
626 public boolean containsKey(Object key) {
627 return map.containsKey(key);
628 }
629
630 public boolean containsValue(Object value) {
631 return map.containsValue(value);
632 }
633
634 public Set<java.util.Map.Entry<byte[], byte[]>> entrySet() {
635 return map.entrySet();
636 }
637
638 public boolean equals(Object o) {
639 return map.equals(o);
640 }
641
642 public byte[] firstKey() {
643 return map.firstKey();
644 }
645
646 public byte[] get(Object key) {
647 return map.get(key);
648 }
649
650 public int hashCode() {
651 return map.hashCode();
652 }
653
654 public SortedMap<byte[], byte[]> headMap(byte[] toKey) {
655 return this.map.headMap(toKey);
656 }
657
658 public boolean isEmpty() {
659 return map.isEmpty();
660 }
661
662 public Set<byte[]> keySet() {
663 return map.keySet();
664 }
665
666 public byte[] lastKey() {
667 return map.lastKey();
668 }
669
670 public byte[] put(byte[] key, byte[] value) {
671 return this.map.put(key, value);
672 }
673
674 public void putAll(Map<? extends byte[], ? extends byte[]> m) {
675 this.map.putAll(m);
676 }
677
678 public byte[] remove(Object key) {
679 return this.map.remove(key);
680 }
681
682 public int size() {
683 return map.size();
684 }
685
686 public SortedMap<byte[], byte[]> subMap(byte[] fromKey, byte[] toKey) {
687 return this.map.subMap(fromKey, toKey);
688 }
689
690 public SortedMap<byte[], byte[]> tailMap(byte[] fromKey) {
691 return this.map.tailMap(fromKey);
692 }
693
694 public Collection<byte[]> values() {
695 return map.values();
696 }
697
698
699
700
701
702
703
704
705 void write(final DataOutputStream out) throws IOException {
706 HFileProtos.FileInfoProto.Builder builder = HFileProtos.FileInfoProto.newBuilder();
707 for (Map.Entry<byte [], byte[]> e: this.map.entrySet()) {
708 HBaseProtos.BytesBytesPair.Builder bbpBuilder = HBaseProtos.BytesBytesPair.newBuilder();
709 bbpBuilder.setFirst(ByteStringer.wrap(e.getKey()));
710 bbpBuilder.setSecond(ByteStringer.wrap(e.getValue()));
711 builder.addMapEntry(bbpBuilder.build());
712 }
713 out.write(ProtobufUtil.PB_MAGIC);
714 builder.build().writeDelimitedTo(out);
715 }
716
717
718
719
720
721
722
723
724 void read(final DataInputStream in) throws IOException {
725
726 int pblen = ProtobufUtil.lengthOfPBMagic();
727 byte [] pbuf = new byte[pblen];
728 if (in.markSupported()) in.mark(pblen);
729 int read = in.read(pbuf);
730 if (read != pblen) throw new IOException("read=" + read + ", wanted=" + pblen);
731 if (ProtobufUtil.isPBMagicPrefix(pbuf)) {
732 parsePB(HFileProtos.FileInfoProto.parseDelimitedFrom(in));
733 } else {
734 if (in.markSupported()) {
735 in.reset();
736 parseWritable(in);
737 } else {
738
739 ByteArrayInputStream bais = new ByteArrayInputStream(pbuf);
740 SequenceInputStream sis = new SequenceInputStream(bais, in);
741
742
743
744 parseWritable(new DataInputStream(sis));
745 }
746 }
747 }
748
749
750
751
752
753
754 void parseWritable(final DataInputStream in) throws IOException {
755
756 this.map.clear();
757
758 int entries = in.readInt();
759
760 for (int i = 0; i < entries; i++) {
761 byte [] key = Bytes.readByteArray(in);
762
763 in.readByte();
764 byte [] value = Bytes.readByteArray(in);
765 this.map.put(key, value);
766 }
767 }
768
769
770
771
772
773 void parsePB(final HFileProtos.FileInfoProto fip) {
774 this.map.clear();
775 for (BytesBytesPair pair: fip.getMapEntryList()) {
776 this.map.put(pair.getFirst().toByteArray(), pair.getSecond().toByteArray());
777 }
778 }
779 }
780
781
782 public static boolean isReservedFileInfoKey(byte[] key) {
783 return Bytes.startsWith(key, FileInfo.RESERVED_PREFIX_BYTES);
784 }
785
786
787
788
789
790
791
792
793
794
795
796
797
798 public static String[] getSupportedCompressionAlgorithms() {
799 return Compression.getSupportedAlgorithms();
800 }
801
802
803
804
805
806
807 static int longToInt(final long l) {
808
809
810 return (int)(l & 0x00000000ffffffffL);
811 }
812
813
814
815
816
817
818
819
820
821
822 static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
823 throws IOException {
824 List<Path> regionHFiles = new ArrayList<Path>();
825 PathFilter dirFilter = new FSUtils.DirFilter(fs);
826 FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
827 for(FileStatus dir : familyDirs) {
828 FileStatus[] files = fs.listStatus(dir.getPath());
829 for (FileStatus file : files) {
830 if (!file.isDirectory() &&
831 (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) &&
832 (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))) {
833 regionHFiles.add(file.getPath());
834 }
835 }
836 }
837 return regionHFiles;
838 }
839
840
841
842
843
844
845
846
847
848
849 public static void checkFormatVersion(int version)
850 throws IllegalArgumentException {
851 if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
852 throw new IllegalArgumentException("Invalid HFile version: " + version
853 + " (expected to be " + "between " + MIN_FORMAT_VERSION + " and "
854 + MAX_FORMAT_VERSION + ")");
855 }
856 }
857
858 public static void main(String[] args) throws Exception {
859
860 HFilePrettyPrinter.main(args);
861 }
862 }