1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.io.hfile;
20
21 import java.io.ByteArrayOutputStream;
22 import java.io.DataInput;
23 import java.io.DataInputStream;
24 import java.io.DataOutput;
25 import java.io.DataOutputStream;
26 import java.io.IOException;
27 import java.nio.ByteBuffer;
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.List;
31 import java.util.concurrent.atomic.AtomicReference;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.FSDataOutputStream;
37 import org.apache.hadoop.hbase.Cell;
38 import org.apache.hadoop.hbase.KeyValue;
39 import org.apache.hadoop.hbase.KeyValue.KVComparator;
40 import org.apache.hadoop.hbase.KeyValueUtil;
41 import org.apache.hadoop.hbase.classification.InterfaceAudience;
42 import org.apache.hadoop.hbase.io.HeapSize;
43 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
44 import org.apache.hadoop.hbase.io.hfile.HFile.CachingBlockReader;
45 import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
46 import org.apache.hadoop.hbase.util.ByteBufferUtils;
47 import org.apache.hadoop.hbase.util.Bytes;
48 import org.apache.hadoop.hbase.util.ClassSize;
49 import org.apache.hadoop.io.WritableUtils;
50 import org.apache.hadoop.util.StringUtils;
51
52
53
54
55
56
57
58
59
60
61
62
63 @InterfaceAudience.Private
64 public class HFileBlockIndex {
65
66 private static final Log LOG = LogFactory.getLog(HFileBlockIndex.class);
67
68 static final int DEFAULT_MAX_CHUNK_SIZE = 128 * 1024;
69
70
71
72
73
74 public static final String MAX_CHUNK_SIZE_KEY = "hfile.index.block.max.size";
75
76
77
78
79
80
81
82 public static final String MIN_INDEX_NUM_ENTRIES_KEY = "hfile.index.block.min.entries";
83
84 static final int DEFAULT_MIN_INDEX_NUM_ENTRIES = 16;
85
86
87
88
89
90
91
92 static final int SECONDARY_INDEX_ENTRY_OVERHEAD = Bytes.SIZEOF_INT
93 + Bytes.SIZEOF_LONG;
94
95
96
97
98 private static final String INLINE_BLOCKS_NOT_ALLOWED =
99 "Inline blocks are not allowed in the single-level-only mode";
100
101
102
103
104
105
106
107 private static final int MID_KEY_METADATA_SIZE = Bytes.SIZEOF_LONG +
108 2 * Bytes.SIZEOF_INT;
109
110
111
112
113
114
115
116
117
118
119
120 public static class BlockIndexReader implements HeapSize {
121
122 private final KVComparator comparator;
123
124
125 private byte[][] blockKeys;
126 private long[] blockOffsets;
127 private int[] blockDataSizes;
128 private int rootCount = 0;
129
130
131 private long midLeafBlockOffset = -1;
132 private int midLeafBlockOnDiskSize = -1;
133 private int midKeyEntry = -1;
134
135
136 private AtomicReference<byte[]> midKey = new AtomicReference<byte[]>();
137
138
139
140
141
142 private int searchTreeLevel;
143
144
145 private CachingBlockReader cachingBlockReader;
146
147 public BlockIndexReader(final KVComparator c, final int treeLevel,
148 final CachingBlockReader cachingBlockReader) {
149 this(c, treeLevel);
150 this.cachingBlockReader = cachingBlockReader;
151 }
152
153 public BlockIndexReader(final KVComparator c, final int treeLevel)
154 {
155 comparator = c;
156 searchTreeLevel = treeLevel;
157 }
158
159
160
161
162 public boolean isEmpty() {
163 return blockKeys.length == 0;
164 }
165
166
167
168
169
170 public void ensureNonEmpty() {
171 if (blockKeys.length == 0) {
172 throw new IllegalStateException("Block index is empty or not loaded");
173 }
174 }
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191 public HFileBlock seekToDataBlock(final Cell key, HFileBlock currentBlock, boolean cacheBlocks,
192 boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
193 throws IOException {
194 BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, currentBlock,
195 cacheBlocks,
196 pread, isCompaction, expectedDataBlockEncoding);
197 if (blockWithScanInfo == null) {
198 return null;
199 } else {
200 return blockWithScanInfo.getHFileBlock();
201 }
202 }
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223 public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,
224 boolean cacheBlocks,
225 boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
226 throws IOException {
227 int rootLevelIndex = rootBlockContainingKey(key);
228 if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {
229 return null;
230 }
231
232
233 Cell nextIndexedKey = null;
234
235
236 long currentOffset = blockOffsets[rootLevelIndex];
237 int currentOnDiskSize = blockDataSizes[rootLevelIndex];
238
239 if (rootLevelIndex < blockKeys.length - 1) {
240 nextIndexedKey = new KeyValue.KeyOnlyKeyValue(blockKeys[rootLevelIndex + 1]);
241 } else {
242 nextIndexedKey = KeyValueScanner.NO_NEXT_INDEXED_KEY;
243 }
244
245 int lookupLevel = 1;
246 int index = -1;
247
248 HFileBlock block;
249 while (true) {
250
251 if (currentBlock != null && currentBlock.getOffset() == currentOffset)
252 {
253
254
255
256
257 block = currentBlock;
258 } else {
259
260
261 boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel);
262 BlockType expectedBlockType;
263 if (lookupLevel < searchTreeLevel - 1) {
264 expectedBlockType = BlockType.INTERMEDIATE_INDEX;
265 } else if (lookupLevel == searchTreeLevel - 1) {
266 expectedBlockType = BlockType.LEAF_INDEX;
267 } else {
268
269 expectedBlockType = BlockType.DATA;
270 }
271 block = cachingBlockReader.readBlock(currentOffset,
272 currentOnDiskSize, shouldCache, pread, isCompaction, true,
273 expectedBlockType, expectedDataBlockEncoding);
274 }
275
276 if (block == null) {
277 throw new IOException("Failed to read block at offset " +
278 currentOffset + ", onDiskSize=" + currentOnDiskSize);
279 }
280
281
282 if (block.getBlockType().isData()) {
283 break;
284 }
285
286
287
288 if (++lookupLevel > searchTreeLevel) {
289 throw new IOException("Search Tree Level overflow: lookupLevel="+
290 lookupLevel + ", searchTreeLevel=" + searchTreeLevel);
291 }
292
293
294
295 ByteBuffer buffer = block.getBufferWithoutHeader();
296 index = locateNonRootIndexEntry(buffer, key, comparator);
297 if (index == -1) {
298
299
300 KeyValue kv = KeyValueUtil.ensureKeyValue(key);
301 throw new IOException("The key "
302 + Bytes.toStringBinary(kv.getKey(), kv.getKeyOffset(), kv.getKeyLength())
303 + " is before the" + " first key of the non-root index block "
304 + block);
305 }
306
307 currentOffset = buffer.getLong();
308 currentOnDiskSize = buffer.getInt();
309
310
311 byte[] tmpNextIndexedKey = getNonRootIndexedKey(buffer, index + 1);
312 if (tmpNextIndexedKey != null) {
313 nextIndexedKey = new KeyValue.KeyOnlyKeyValue(tmpNextIndexedKey);
314 }
315 }
316
317 if (lookupLevel != searchTreeLevel) {
318 throw new IOException("Reached a data block at level " + lookupLevel +
319 " but the number of levels is " + searchTreeLevel);
320 }
321
322
323 BlockWithScanInfo blockWithScanInfo = new BlockWithScanInfo(block, nextIndexedKey);
324 return blockWithScanInfo;
325 }
326
327
328
329
330
331
332
333
334 public byte[] midkey() throws IOException {
335 if (rootCount == 0)
336 throw new IOException("HFile empty");
337
338 byte[] targetMidKey = this.midKey.get();
339 if (targetMidKey != null) {
340 return targetMidKey;
341 }
342
343 if (midLeafBlockOffset >= 0) {
344 if (cachingBlockReader == null) {
345 throw new IOException("Have to read the middle leaf block but " +
346 "no block reader available");
347 }
348
349
350 HFileBlock midLeafBlock = cachingBlockReader.readBlock(
351 midLeafBlockOffset, midLeafBlockOnDiskSize, true, true, false, true,
352 BlockType.LEAF_INDEX, null);
353
354 ByteBuffer b = midLeafBlock.getBufferWithoutHeader();
355 int numDataBlocks = b.getInt();
356 int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (midKeyEntry + 1));
357 int keyLen = b.getInt(Bytes.SIZEOF_INT * (midKeyEntry + 2)) -
358 keyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD;
359 int keyOffset = Bytes.SIZEOF_INT * (numDataBlocks + 2) + keyRelOffset
360 + SECONDARY_INDEX_ENTRY_OVERHEAD;
361 targetMidKey = ByteBufferUtils.toBytes(b, keyOffset, keyLen);
362 } else {
363
364 targetMidKey = blockKeys[rootCount / 2];
365 }
366
367 this.midKey.set(targetMidKey);
368 return targetMidKey;
369 }
370
371
372
373
374 public byte[] getRootBlockKey(int i) {
375 return blockKeys[i];
376 }
377
378
379
380
381 public long getRootBlockOffset(int i) {
382 return blockOffsets[i];
383 }
384
385
386
387
388
389
390 public int getRootBlockDataSize(int i) {
391 return blockDataSizes[i];
392 }
393
394
395
396
397 public int getRootBlockCount() {
398 return rootCount;
399 }
400
401
402
403
404
405
406
407
408
409
410 public int rootBlockContainingKey(final byte[] key, int offset, int length) {
411 int pos = Bytes.binarySearch(blockKeys, key, offset, length, comparator);
412
413
414
415 if (pos >= 0) {
416
417 assert pos < blockKeys.length;
418 return pos;
419 }
420
421
422
423
424
425
426 int i = -pos - 1;
427 assert 0 <= i && i <= blockKeys.length;
428 return i - 1;
429 }
430
431
432
433
434
435
436
437 public int rootBlockContainingKey(final Cell key) {
438 int pos = Bytes.binarySearch(blockKeys, key, comparator);
439
440
441
442 if (pos >= 0) {
443
444 assert pos < blockKeys.length;
445 return pos;
446 }
447
448
449
450
451
452
453 int i = -pos - 1;
454 assert 0 <= i && i <= blockKeys.length;
455 return i - 1;
456 }
457
458
459
460
461
462
463
464
465 private void add(final byte[] key, final long offset, final int dataSize) {
466 blockOffsets[rootCount] = offset;
467 blockKeys[rootCount] = key;
468 blockDataSizes[rootCount] = dataSize;
469 rootCount++;
470 }
471
472
473
474
475
476
477
478 private byte[] getNonRootIndexedKey(ByteBuffer nonRootIndex, int i) {
479 int numEntries = nonRootIndex.getInt(0);
480 if (i < 0 || i >= numEntries) {
481 return null;
482 }
483
484
485
486 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
487
488 int targetKeyRelOffset = nonRootIndex.getInt(
489 Bytes.SIZEOF_INT * (i + 1));
490
491
492 int targetKeyOffset = entriesOffset
493 + targetKeyRelOffset
494 + SECONDARY_INDEX_ENTRY_OVERHEAD;
495
496
497
498
499 int targetKeyLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (i + 2)) -
500 targetKeyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD;
501
502 return ByteBufferUtils.toBytes(nonRootIndex, targetKeyOffset, targetKeyLength);
503 }
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521 static int binarySearchNonRootIndex(Cell key, ByteBuffer nonRootIndex,
522 KVComparator comparator) {
523
524 int numEntries = nonRootIndex.getInt(0);
525 int low = 0;
526 int high = numEntries - 1;
527 int mid = 0;
528
529
530
531 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
532
533
534
535
536 KeyValue.KeyOnlyKeyValue nonRootIndexKV = new KeyValue.KeyOnlyKeyValue();
537 while (low <= high) {
538 mid = (low + high) >>> 1;
539
540
541 int midKeyRelOffset = nonRootIndex.getInt(
542 Bytes.SIZEOF_INT * (mid + 1));
543
544
545 int midKeyOffset = entriesOffset
546 + midKeyRelOffset
547 + SECONDARY_INDEX_ENTRY_OVERHEAD;
548
549
550
551
552 int midLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (mid + 2)) -
553 midKeyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD;
554
555
556
557
558
559 nonRootIndexKV.setKey(nonRootIndex.array(),
560 nonRootIndex.arrayOffset() + midKeyOffset, midLength);
561 int cmp = comparator.compareOnlyKeyPortion(key, nonRootIndexKV);
562
563
564 if (cmp > 0)
565 low = mid + 1;
566
567 else if (cmp < 0)
568 high = mid - 1;
569 else
570 return mid;
571 }
572
573
574
575
576
577 if (low != high + 1) {
578 throw new IllegalStateException("Binary search broken: low=" + low
579 + " " + "instead of " + (high + 1));
580 }
581
582
583
584 int i = low - 1;
585
586
587 if (i < -1 || i >= numEntries) {
588 throw new IllegalStateException("Binary search broken: result is " +
589 i + " but expected to be between -1 and (numEntries - 1) = " +
590 (numEntries - 1));
591 }
592
593 return i;
594 }
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610 static int locateNonRootIndexEntry(ByteBuffer nonRootBlock, Cell key,
611 KVComparator comparator) {
612 int entryIndex = binarySearchNonRootIndex(key, nonRootBlock, comparator);
613
614 if (entryIndex != -1) {
615 int numEntries = nonRootBlock.getInt(0);
616
617
618 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
619
620
621
622 int entryRelOffset = nonRootBlock.getInt(Bytes.SIZEOF_INT * (1 + entryIndex));
623
624 nonRootBlock.position(entriesOffset + entryRelOffset);
625 }
626
627 return entryIndex;
628 }
629
630
631
632
633
634
635
636
637
638
639
640 public void readRootIndex(DataInput in, final int numEntries)
641 throws IOException {
642 blockOffsets = new long[numEntries];
643 blockKeys = new byte[numEntries][];
644 blockDataSizes = new int[numEntries];
645
646
647 if (numEntries > 0) {
648 for (int i = 0; i < numEntries; ++i) {
649 long offset = in.readLong();
650 int dataSize = in.readInt();
651 byte[] key = Bytes.readByteArray(in);
652 add(key, offset, dataSize);
653 }
654 }
655 }
656
657
658
659
660
661
662
663
664
665
666
667
668 public DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException {
669 DataInputStream in = blk.getByteStream();
670 readRootIndex(in, numEntries);
671 return in;
672 }
673
674
675
676
677
678
679
680
681
682
683 public void readMultiLevelIndexRoot(HFileBlock blk,
684 final int numEntries) throws IOException {
685 DataInputStream in = readRootIndex(blk, numEntries);
686
687
688 int checkSumBytes = blk.totalChecksumBytes();
689 if ((in.available() - checkSumBytes) < MID_KEY_METADATA_SIZE) {
690
691 return;
692 }
693 midLeafBlockOffset = in.readLong();
694 midLeafBlockOnDiskSize = in.readInt();
695 midKeyEntry = in.readInt();
696 }
697
698 @Override
699 public String toString() {
700 StringBuilder sb = new StringBuilder();
701 sb.append("size=" + rootCount).append("\n");
702 for (int i = 0; i < rootCount; i++) {
703 sb.append("key=").append(KeyValue.keyToString(blockKeys[i]))
704 .append("\n offset=").append(blockOffsets[i])
705 .append(", dataSize=" + blockDataSizes[i]).append("\n");
706 }
707 return sb.toString();
708 }
709
710 @Override
711 public long heapSize() {
712 long heapSize = ClassSize.align(6 * ClassSize.REFERENCE +
713 2 * Bytes.SIZEOF_INT + ClassSize.OBJECT);
714
715
716 heapSize += MID_KEY_METADATA_SIZE;
717
718
719 if (blockKeys != null) {
720
721 heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length
722 * ClassSize.REFERENCE);
723
724
725 for (byte[] key : blockKeys) {
726 heapSize += ClassSize.align(ClassSize.ARRAY + key.length);
727 }
728 }
729
730 if (blockOffsets != null) {
731 heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length
732 * Bytes.SIZEOF_LONG);
733 }
734
735 if (blockDataSizes != null) {
736 heapSize += ClassSize.align(ClassSize.ARRAY + blockDataSizes.length
737 * Bytes.SIZEOF_INT);
738 }
739
740 return ClassSize.align(heapSize);
741 }
742
743 }
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760 public static class BlockIndexWriter implements InlineBlockWriter {
761
762
763
764
765
766
767
768
769
770 private BlockIndexChunk rootChunk = new BlockIndexChunk();
771
772
773
774
775
776 private BlockIndexChunk curInlineChunk = new BlockIndexChunk();
777
778
779
780
781
782
783
784
785
786
787 private int numLevels = 1;
788
789 private HFileBlock.Writer blockWriter;
790 private byte[] firstKey = null;
791
792
793
794
795
796
797 private long totalNumEntries;
798
799
800 private long totalBlockOnDiskSize;
801
802
803 private long totalBlockUncompressedSize;
804
805
806 private int maxChunkSize;
807
808
809 private int minIndexNumEntries;
810
811
812 private boolean singleLevelOnly;
813
814
815 private CacheConfig cacheConf;
816
817
818 private String nameForCaching;
819
820
821 public BlockIndexWriter() {
822 this(null, null, null);
823 singleLevelOnly = true;
824 }
825
826
827
828
829
830
831
832 public BlockIndexWriter(HFileBlock.Writer blockWriter,
833 CacheConfig cacheConf, String nameForCaching) {
834 if ((cacheConf == null) != (nameForCaching == null)) {
835 throw new IllegalArgumentException("Block cache and file name for " +
836 "caching must be both specified or both null");
837 }
838
839 this.blockWriter = blockWriter;
840 this.cacheConf = cacheConf;
841 this.nameForCaching = nameForCaching;
842 this.maxChunkSize = HFileBlockIndex.DEFAULT_MAX_CHUNK_SIZE;
843 this.minIndexNumEntries = HFileBlockIndex.DEFAULT_MIN_INDEX_NUM_ENTRIES;
844 }
845
846 public void setMaxChunkSize(int maxChunkSize) {
847 if (maxChunkSize <= 0) {
848 throw new IllegalArgumentException("Invalid maximum index block size");
849 }
850 this.maxChunkSize = maxChunkSize;
851 }
852
853 public void setMinIndexNumEntries(int minIndexNumEntries) {
854 if (minIndexNumEntries <= 1) {
855 throw new IllegalArgumentException("Invalid maximum index level, should be >= 2");
856 }
857 this.minIndexNumEntries = minIndexNumEntries;
858 }
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878 public long writeIndexBlocks(FSDataOutputStream out) throws IOException {
879 if (curInlineChunk != null && curInlineChunk.getNumEntries() != 0) {
880 throw new IOException("Trying to write a multi-level block index, " +
881 "but are " + curInlineChunk.getNumEntries() + " entries in the " +
882 "last inline chunk.");
883 }
884
885
886
887 byte[] midKeyMetadata = numLevels > 1 ? rootChunk.getMidKeyMetadata()
888 : null;
889
890 if (curInlineChunk != null) {
891 while (rootChunk.getRootSize() > maxChunkSize
892
893 && rootChunk.getNumEntries() > minIndexNumEntries
894
895 && numLevels < 16) {
896 rootChunk = writeIntermediateLevel(out, rootChunk);
897 numLevels += 1;
898 }
899 }
900
901
902 long rootLevelIndexPos = out.getPos();
903
904 {
905 DataOutput blockStream =
906 blockWriter.startWriting(BlockType.ROOT_INDEX);
907 rootChunk.writeRoot(blockStream);
908 if (midKeyMetadata != null)
909 blockStream.write(midKeyMetadata);
910 blockWriter.writeHeaderAndData(out);
911 }
912
913
914 totalBlockOnDiskSize += blockWriter.getOnDiskSizeWithoutHeader();
915 totalBlockUncompressedSize +=
916 blockWriter.getUncompressedSizeWithoutHeader();
917
918 if (LOG.isTraceEnabled()) {
919 LOG.trace("Wrote a " + numLevels + "-level index with root level at pos "
920 + rootLevelIndexPos + ", " + rootChunk.getNumEntries()
921 + " root-level entries, " + totalNumEntries + " total entries, "
922 + StringUtils.humanReadableInt(this.totalBlockOnDiskSize) +
923 " on-disk size, "
924 + StringUtils.humanReadableInt(totalBlockUncompressedSize) +
925 " total uncompressed size.");
926 }
927 return rootLevelIndexPos;
928 }
929
930
931
932
933
934
935
936
937
938
939
940 public void writeSingleLevelIndex(DataOutput out, String description)
941 throws IOException {
942 expectNumLevels(1);
943
944 if (!singleLevelOnly)
945 throw new IOException("Single-level mode is turned off");
946
947 if (rootChunk.getNumEntries() > 0)
948 throw new IOException("Root-level entries already added in " +
949 "single-level mode");
950
951 rootChunk = curInlineChunk;
952 curInlineChunk = new BlockIndexChunk();
953
954 if (LOG.isTraceEnabled()) {
955 LOG.trace("Wrote a single-level " + description + " index with "
956 + rootChunk.getNumEntries() + " entries, " + rootChunk.getRootSize()
957 + " bytes");
958 }
959 rootChunk.writeRoot(out);
960 }
961
962
963
964
965
966
967
968
969
970
971
972
973
974 private BlockIndexChunk writeIntermediateLevel(FSDataOutputStream out,
975 BlockIndexChunk currentLevel) throws IOException {
976
977 BlockIndexChunk parent = new BlockIndexChunk();
978
979
980 BlockIndexChunk curChunk = new BlockIndexChunk();
981
982 for (int i = 0; i < currentLevel.getNumEntries(); ++i) {
983 curChunk.add(currentLevel.getBlockKey(i),
984 currentLevel.getBlockOffset(i), currentLevel.getOnDiskDataSize(i));
985
986
987
988
989 if (i >= minIndexNumEntries && curChunk.getRootSize() >= maxChunkSize) {
990 writeIntermediateBlock(out, parent, curChunk);
991 }
992 }
993
994 if (curChunk.getNumEntries() > 0) {
995 writeIntermediateBlock(out, parent, curChunk);
996 }
997
998 return parent;
999 }
1000
1001 private void writeIntermediateBlock(FSDataOutputStream out,
1002 BlockIndexChunk parent, BlockIndexChunk curChunk) throws IOException {
1003 long beginOffset = out.getPos();
1004 DataOutputStream dos = blockWriter.startWriting(
1005 BlockType.INTERMEDIATE_INDEX);
1006 curChunk.writeNonRoot(dos);
1007 byte[] curFirstKey = curChunk.getBlockKey(0);
1008 blockWriter.writeHeaderAndData(out);
1009
1010 if (cacheConf != null) {
1011 HFileBlock blockForCaching = blockWriter.getBlockForCaching(cacheConf);
1012 cacheConf.getBlockCache().cacheBlock(new BlockCacheKey(nameForCaching,
1013 beginOffset), blockForCaching);
1014 }
1015
1016
1017 totalBlockOnDiskSize += blockWriter.getOnDiskSizeWithoutHeader();
1018 totalBlockUncompressedSize +=
1019 blockWriter.getUncompressedSizeWithoutHeader();
1020
1021
1022
1023
1024
1025
1026 parent.add(curFirstKey, beginOffset,
1027 blockWriter.getOnDiskSizeWithHeader());
1028
1029
1030 curChunk.clear();
1031 curFirstKey = null;
1032 }
1033
1034
1035
1036
1037 public final int getNumRootEntries() {
1038 return rootChunk.getNumEntries();
1039 }
1040
1041
1042
1043
1044 public int getNumLevels() {
1045 return numLevels;
1046 }
1047
1048 private void expectNumLevels(int expectedNumLevels) {
1049 if (numLevels != expectedNumLevels) {
1050 throw new IllegalStateException("Number of block index levels is "
1051 + numLevels + "but is expected to be " + expectedNumLevels);
1052 }
1053 }
1054
1055
1056
1057
1058
1059
1060 @Override
1061 public boolean shouldWriteBlock(boolean closing) {
1062 if (singleLevelOnly) {
1063 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1064 }
1065
1066 if (curInlineChunk == null) {
1067 throw new IllegalStateException("curInlineChunk is null; has shouldWriteBlock been " +
1068 "called with closing=true and then called again?");
1069 }
1070
1071 if (curInlineChunk.getNumEntries() == 0) {
1072 return false;
1073 }
1074
1075
1076 if (closing) {
1077 if (rootChunk.getNumEntries() == 0) {
1078
1079
1080
1081 expectNumLevels(1);
1082 rootChunk = curInlineChunk;
1083 curInlineChunk = null;
1084 return false;
1085 }
1086
1087 return true;
1088 } else {
1089 return curInlineChunk.getNonRootSize() >= maxChunkSize;
1090 }
1091 }
1092
1093
1094
1095
1096
1097
1098
1099 @Override
1100 public void writeInlineBlock(DataOutput out) throws IOException {
1101 if (singleLevelOnly)
1102 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1103
1104
1105
1106 curInlineChunk.writeNonRoot(out);
1107
1108
1109
1110 firstKey = curInlineChunk.getBlockKey(0);
1111
1112
1113 curInlineChunk.clear();
1114 }
1115
1116
1117
1118
1119
1120 @Override
1121 public void blockWritten(long offset, int onDiskSize, int uncompressedSize) {
1122
1123 totalBlockOnDiskSize += onDiskSize;
1124 totalBlockUncompressedSize += uncompressedSize;
1125
1126 if (singleLevelOnly)
1127 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1128
1129 if (firstKey == null) {
1130 throw new IllegalStateException("Trying to add second-level index " +
1131 "entry with offset=" + offset + " and onDiskSize=" + onDiskSize +
1132 "but the first key was not set in writeInlineBlock");
1133 }
1134
1135 if (rootChunk.getNumEntries() == 0) {
1136
1137 expectNumLevels(1);
1138 numLevels = 2;
1139 }
1140
1141
1142
1143 rootChunk.add(firstKey, offset, onDiskSize, totalNumEntries);
1144 firstKey = null;
1145 }
1146
1147 @Override
1148 public BlockType getInlineBlockType() {
1149 return BlockType.LEAF_INDEX;
1150 }
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162 public void addEntry(byte[] firstKey, long blockOffset, int blockDataSize) {
1163 curInlineChunk.add(firstKey, blockOffset, blockDataSize);
1164 ++totalNumEntries;
1165 }
1166
1167
1168
1169
1170 public void ensureSingleLevel() throws IOException {
1171 if (numLevels > 1) {
1172 throw new IOException ("Wrote a " + numLevels + "-level index with " +
1173 rootChunk.getNumEntries() + " root-level entries, but " +
1174 "this is expected to be a single-level block index.");
1175 }
1176 }
1177
1178
1179
1180
1181
1182
1183 @Override
1184 public boolean getCacheOnWrite() {
1185 return cacheConf != null && cacheConf.shouldCacheIndexesOnWrite();
1186 }
1187
1188
1189
1190
1191
1192
1193
1194 public long getTotalUncompressedSize() {
1195 return totalBlockUncompressedSize;
1196 }
1197
1198 }
1199
1200
1201
1202
1203
1204
1205 static class BlockIndexChunk {
1206
1207
1208 private final List<byte[]> blockKeys = new ArrayList<byte[]>();
1209
1210
1211 private final List<Long> blockOffsets = new ArrayList<Long>();
1212
1213
1214 private final List<Integer> onDiskDataSizes = new ArrayList<Integer>();
1215
1216
1217
1218
1219
1220
1221 private final List<Long> numSubEntriesAt = new ArrayList<Long>();
1222
1223
1224
1225
1226
1227
1228 private int curTotalNonRootEntrySize = 0;
1229
1230
1231
1232
1233 private int curTotalRootSize = 0;
1234
1235
1236
1237
1238
1239
1240 private final List<Integer> secondaryIndexOffsetMarks =
1241 new ArrayList<Integer>();
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256 void add(byte[] firstKey, long blockOffset, int onDiskDataSize,
1257 long curTotalNumSubEntries) {
1258
1259 secondaryIndexOffsetMarks.add(curTotalNonRootEntrySize);
1260 curTotalNonRootEntrySize += SECONDARY_INDEX_ENTRY_OVERHEAD
1261 + firstKey.length;
1262
1263 curTotalRootSize += Bytes.SIZEOF_LONG + Bytes.SIZEOF_INT
1264 + WritableUtils.getVIntSize(firstKey.length) + firstKey.length;
1265
1266 blockKeys.add(firstKey);
1267 blockOffsets.add(blockOffset);
1268 onDiskDataSizes.add(onDiskDataSize);
1269
1270 if (curTotalNumSubEntries != -1) {
1271 numSubEntriesAt.add(curTotalNumSubEntries);
1272
1273
1274 if (numSubEntriesAt.size() != blockKeys.size()) {
1275 throw new IllegalStateException("Only have key/value count " +
1276 "stats for " + numSubEntriesAt.size() + " block index " +
1277 "entries out of " + blockKeys.size());
1278 }
1279 }
1280 }
1281
1282
1283
1284
1285
1286
1287
1288 public void add(byte[] firstKey, long blockOffset, int onDiskDataSize) {
1289 add(firstKey, blockOffset, onDiskDataSize, -1);
1290 }
1291
1292 public void clear() {
1293 blockKeys.clear();
1294 blockOffsets.clear();
1295 onDiskDataSizes.clear();
1296 secondaryIndexOffsetMarks.clear();
1297 numSubEntriesAt.clear();
1298 curTotalNonRootEntrySize = 0;
1299 curTotalRootSize = 0;
1300 }
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319 public int getEntryBySubEntry(long k) {
1320
1321
1322
1323 int i = Collections.binarySearch(numSubEntriesAt, k);
1324
1325
1326
1327
1328 if (i >= 0)
1329 return i + 1;
1330
1331
1332 return -i - 1;
1333 }
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343 public byte[] getMidKeyMetadata() throws IOException {
1344 ByteArrayOutputStream baos = new ByteArrayOutputStream(
1345 MID_KEY_METADATA_SIZE);
1346 DataOutputStream baosDos = new DataOutputStream(baos);
1347 long totalNumSubEntries = numSubEntriesAt.get(blockKeys.size() - 1);
1348 if (totalNumSubEntries == 0) {
1349 throw new IOException("No leaf-level entries, mid-key unavailable");
1350 }
1351 long midKeySubEntry = (totalNumSubEntries - 1) / 2;
1352 int midKeyEntry = getEntryBySubEntry(midKeySubEntry);
1353
1354 baosDos.writeLong(blockOffsets.get(midKeyEntry));
1355 baosDos.writeInt(onDiskDataSizes.get(midKeyEntry));
1356
1357 long numSubEntriesBefore = midKeyEntry > 0
1358 ? numSubEntriesAt.get(midKeyEntry - 1) : 0;
1359 long subEntryWithinEntry = midKeySubEntry - numSubEntriesBefore;
1360 if (subEntryWithinEntry < 0 || subEntryWithinEntry > Integer.MAX_VALUE)
1361 {
1362 throw new IOException("Could not identify mid-key index within the "
1363 + "leaf-level block containing mid-key: out of range ("
1364 + subEntryWithinEntry + ", numSubEntriesBefore="
1365 + numSubEntriesBefore + ", midKeySubEntry=" + midKeySubEntry
1366 + ")");
1367 }
1368
1369 baosDos.writeInt((int) subEntryWithinEntry);
1370
1371 if (baosDos.size() != MID_KEY_METADATA_SIZE) {
1372 throw new IOException("Could not write mid-key metadata: size=" +
1373 baosDos.size() + ", correct size: " + MID_KEY_METADATA_SIZE);
1374 }
1375
1376
1377 baos.close();
1378
1379 return baos.toByteArray();
1380 }
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391 void writeNonRoot(DataOutput out) throws IOException {
1392
1393 out.writeInt(blockKeys.size());
1394
1395 if (secondaryIndexOffsetMarks.size() != blockKeys.size()) {
1396 throw new IOException("Corrupted block index chunk writer: " +
1397 blockKeys.size() + " entries but " +
1398 secondaryIndexOffsetMarks.size() + " secondary index items");
1399 }
1400
1401
1402
1403
1404
1405 for (int currentSecondaryIndex : secondaryIndexOffsetMarks)
1406 out.writeInt(currentSecondaryIndex);
1407
1408
1409
1410 out.writeInt(curTotalNonRootEntrySize);
1411
1412 for (int i = 0; i < blockKeys.size(); ++i) {
1413 out.writeLong(blockOffsets.get(i));
1414 out.writeInt(onDiskDataSizes.get(i));
1415 out.write(blockKeys.get(i));
1416 }
1417 }
1418
1419
1420
1421
1422
1423 int getNonRootSize() {
1424 return Bytes.SIZEOF_INT
1425 + Bytes.SIZEOF_INT * (blockKeys.size() + 1)
1426 + curTotalNonRootEntrySize;
1427 }
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439 void writeRoot(DataOutput out) throws IOException {
1440 for (int i = 0; i < blockKeys.size(); ++i) {
1441 out.writeLong(blockOffsets.get(i));
1442 out.writeInt(onDiskDataSizes.get(i));
1443 Bytes.writeByteArray(out, blockKeys.get(i));
1444 }
1445 }
1446
1447
1448
1449
1450 int getRootSize() {
1451 return curTotalRootSize;
1452 }
1453
1454
1455
1456
1457 public int getNumEntries() {
1458 return blockKeys.size();
1459 }
1460
1461 public byte[] getBlockKey(int i) {
1462 return blockKeys.get(i);
1463 }
1464
1465 public long getBlockOffset(int i) {
1466 return blockOffsets.get(i);
1467 }
1468
1469 public int getOnDiskDataSize(int i) {
1470 return onDiskDataSizes.get(i);
1471 }
1472
1473 public long getCumulativeNumKV(int i) {
1474 if (i < 0)
1475 return 0;
1476 return numSubEntriesAt.get(i);
1477 }
1478
1479 }
1480
1481 public static int getMaxChunkSize(Configuration conf) {
1482 return conf.getInt(MAX_CHUNK_SIZE_KEY, DEFAULT_MAX_CHUNK_SIZE);
1483 }
1484
1485 public static int getMinIndexNumEntries(Configuration conf) {
1486 return conf.getInt(MIN_INDEX_NUM_ENTRIES_KEY, DEFAULT_MIN_INDEX_NUM_ENTRIES);
1487 }
1488 }