1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.io.hfile;
20
21 import java.io.ByteArrayOutputStream;
22 import java.io.DataInput;
23 import java.io.DataInputStream;
24 import java.io.DataOutput;
25 import java.io.DataOutputStream;
26 import java.io.IOException;
27 import java.nio.ByteBuffer;
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.List;
31 import java.util.concurrent.atomic.AtomicReference;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.hbase.classification.InterfaceAudience;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.fs.FSDataOutputStream;
38 import org.apache.hadoop.hbase.Cell;
39 import org.apache.hadoop.hbase.HConstants;
40 import org.apache.hadoop.hbase.KeyValue;
41 import org.apache.hadoop.hbase.KeyValue.KVComparator;
42 import org.apache.hadoop.hbase.KeyValueUtil;
43 import org.apache.hadoop.hbase.io.HeapSize;
44 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
45 import org.apache.hadoop.hbase.io.hfile.HFile.CachingBlockReader;
46 import org.apache.hadoop.hbase.util.ByteBufferUtils;
47 import org.apache.hadoop.hbase.util.Bytes;
48 import org.apache.hadoop.hbase.util.ClassSize;
49 import org.apache.hadoop.io.WritableUtils;
50 import org.apache.hadoop.util.StringUtils;
51
52
53
54
55
56
57
58
59
60
61 @InterfaceAudience.Private
62 public class HFileBlockIndex {
63
64 private static final Log LOG = LogFactory.getLog(HFileBlockIndex.class);
65
66 static final int DEFAULT_MAX_CHUNK_SIZE = 128 * 1024;
67
68
69
70
71
72 public static final String MAX_CHUNK_SIZE_KEY = "hfile.index.block.max.size";
73
74
75
76
77
78
79
80 public static final String MIN_INDEX_NUM_ENTRIES_KEY = "hfile.index.block.min.entries";
81
82 static final int DEFAULT_MIN_INDEX_NUM_ENTRIES = 16;
83
84
85
86
87
88
89
90 static final int SECONDARY_INDEX_ENTRY_OVERHEAD = Bytes.SIZEOF_INT
91 + Bytes.SIZEOF_LONG;
92
93
94
95
96 private static final String INLINE_BLOCKS_NOT_ALLOWED =
97 "Inline blocks are not allowed in the single-level-only mode";
98
99
100
101
102
103
104
105 private static final int MID_KEY_METADATA_SIZE = Bytes.SIZEOF_LONG +
106 2 * Bytes.SIZEOF_INT;
107
108
109
110
111
112
113
114
115
116
117
118 public static class BlockIndexReader implements HeapSize {
119
120 private final KVComparator comparator;
121
122
123 private byte[][] blockKeys;
124 private long[] blockOffsets;
125 private int[] blockDataSizes;
126 private int rootCount = 0;
127
128
129 private long midLeafBlockOffset = -1;
130 private int midLeafBlockOnDiskSize = -1;
131 private int midKeyEntry = -1;
132
133
134 private AtomicReference<byte[]> midKey = new AtomicReference<byte[]>();
135
136
137
138
139
140 private int searchTreeLevel;
141
142
143 private CachingBlockReader cachingBlockReader;
144
145 public BlockIndexReader(final KVComparator c, final int treeLevel,
146 final CachingBlockReader cachingBlockReader) {
147 this(c, treeLevel);
148 this.cachingBlockReader = cachingBlockReader;
149 }
150
151 public BlockIndexReader(final KVComparator c, final int treeLevel)
152 {
153 comparator = c;
154 searchTreeLevel = treeLevel;
155 }
156
157
158
159
160 public boolean isEmpty() {
161 return blockKeys.length == 0;
162 }
163
164
165
166
167
168 public void ensureNonEmpty() {
169 if (blockKeys.length == 0) {
170 throw new IllegalStateException("Block index is empty or not loaded");
171 }
172 }
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189 public HFileBlock seekToDataBlock(final Cell key, HFileBlock currentBlock, boolean cacheBlocks,
190 boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
191 throws IOException {
192 BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, currentBlock,
193 cacheBlocks,
194 pread, isCompaction, expectedDataBlockEncoding);
195 if (blockWithScanInfo == null) {
196 return null;
197 } else {
198 return blockWithScanInfo.getHFileBlock();
199 }
200 }
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221 public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,
222 boolean cacheBlocks,
223 boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
224 throws IOException {
225 int rootLevelIndex = rootBlockContainingKey(key);
226 if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {
227 return null;
228 }
229
230
231 Cell nextIndexedKey = null;
232
233
234 long currentOffset = blockOffsets[rootLevelIndex];
235 int currentOnDiskSize = blockDataSizes[rootLevelIndex];
236
237 if (rootLevelIndex < blockKeys.length - 1) {
238 nextIndexedKey = new KeyValue.KeyOnlyKeyValue(blockKeys[rootLevelIndex + 1]);
239 } else {
240 nextIndexedKey = HConstants.NO_NEXT_INDEXED_KEY;
241 }
242
243 int lookupLevel = 1;
244 int index = -1;
245
246 HFileBlock block;
247 while (true) {
248
249 if (currentBlock != null && currentBlock.getOffset() == currentOffset)
250 {
251
252
253
254
255 block = currentBlock;
256 } else {
257
258
259 boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel);
260 BlockType expectedBlockType;
261 if (lookupLevel < searchTreeLevel - 1) {
262 expectedBlockType = BlockType.INTERMEDIATE_INDEX;
263 } else if (lookupLevel == searchTreeLevel - 1) {
264 expectedBlockType = BlockType.LEAF_INDEX;
265 } else {
266
267 expectedBlockType = BlockType.DATA;
268 }
269 block = cachingBlockReader.readBlock(currentOffset,
270 currentOnDiskSize, shouldCache, pread, isCompaction, true,
271 expectedBlockType, expectedDataBlockEncoding);
272 }
273
274 if (block == null) {
275 throw new IOException("Failed to read block at offset " +
276 currentOffset + ", onDiskSize=" + currentOnDiskSize);
277 }
278
279
280 if (block.getBlockType().isData()) {
281 break;
282 }
283
284
285
286 if (++lookupLevel > searchTreeLevel) {
287 throw new IOException("Search Tree Level overflow: lookupLevel="+
288 lookupLevel + ", searchTreeLevel=" + searchTreeLevel);
289 }
290
291
292
293 ByteBuffer buffer = block.getBufferWithoutHeader();
294 index = locateNonRootIndexEntry(buffer, key, comparator);
295 if (index == -1) {
296
297
298 KeyValue kv = KeyValueUtil.ensureKeyValue(key);
299 throw new IOException("The key "
300 + Bytes.toStringBinary(kv.getKey(), kv.getKeyOffset(), kv.getKeyLength())
301 + " is before the" + " first key of the non-root index block "
302 + block);
303 }
304
305 currentOffset = buffer.getLong();
306 currentOnDiskSize = buffer.getInt();
307
308
309 byte[] tmpNextIndexedKey = getNonRootIndexedKey(buffer, index + 1);
310 if (tmpNextIndexedKey != null) {
311 nextIndexedKey = new KeyValue.KeyOnlyKeyValue(tmpNextIndexedKey);
312 }
313 }
314
315 if (lookupLevel != searchTreeLevel) {
316 throw new IOException("Reached a data block at level " + lookupLevel +
317 " but the number of levels is " + searchTreeLevel);
318 }
319
320
321 BlockWithScanInfo blockWithScanInfo = new BlockWithScanInfo(block, nextIndexedKey);
322 return blockWithScanInfo;
323 }
324
325
326
327
328
329
330
331
332 public byte[] midkey() throws IOException {
333 if (rootCount == 0)
334 throw new IOException("HFile empty");
335
336 byte[] targetMidKey = this.midKey.get();
337 if (targetMidKey != null) {
338 return targetMidKey;
339 }
340
341 if (midLeafBlockOffset >= 0) {
342 if (cachingBlockReader == null) {
343 throw new IOException("Have to read the middle leaf block but " +
344 "no block reader available");
345 }
346
347
348 HFileBlock midLeafBlock = cachingBlockReader.readBlock(
349 midLeafBlockOffset, midLeafBlockOnDiskSize, true, true, false, true,
350 BlockType.LEAF_INDEX, null);
351
352 ByteBuffer b = midLeafBlock.getBufferWithoutHeader();
353 int numDataBlocks = b.getInt();
354 int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (midKeyEntry + 1));
355 int keyLen = b.getInt(Bytes.SIZEOF_INT * (midKeyEntry + 2)) -
356 keyRelOffset;
357 int keyOffset = Bytes.SIZEOF_INT * (numDataBlocks + 2) + keyRelOffset
358 + SECONDARY_INDEX_ENTRY_OVERHEAD;
359 targetMidKey = ByteBufferUtils.toBytes(b, keyOffset, keyLen);
360 } else {
361
362 targetMidKey = blockKeys[rootCount / 2];
363 }
364
365 this.midKey.set(targetMidKey);
366 return targetMidKey;
367 }
368
369
370
371
372 public byte[] getRootBlockKey(int i) {
373 return blockKeys[i];
374 }
375
376
377
378
379 public long getRootBlockOffset(int i) {
380 return blockOffsets[i];
381 }
382
383
384
385
386
387
388 public int getRootBlockDataSize(int i) {
389 return blockDataSizes[i];
390 }
391
392
393
394
395 public int getRootBlockCount() {
396 return rootCount;
397 }
398
399
400
401
402
403
404
405
406
407
408 public int rootBlockContainingKey(final byte[] key, int offset, int length) {
409 int pos = Bytes.binarySearch(blockKeys, key, offset, length, comparator);
410
411
412
413 if (pos >= 0) {
414
415 assert pos < blockKeys.length;
416 return pos;
417 }
418
419
420
421
422
423
424 int i = -pos - 1;
425 assert 0 <= i && i <= blockKeys.length;
426 return i - 1;
427 }
428
429
430
431
432
433
434
435 public int rootBlockContainingKey(final Cell key) {
436 int pos = Bytes.binarySearch(blockKeys, key, comparator);
437
438
439
440 if (pos >= 0) {
441
442 assert pos < blockKeys.length;
443 return pos;
444 }
445
446
447
448
449
450
451 int i = -pos - 1;
452 assert 0 <= i && i <= blockKeys.length;
453 return i - 1;
454 }
455
456
457
458
459
460
461
462
463 private void add(final byte[] key, final long offset, final int dataSize) {
464 blockOffsets[rootCount] = offset;
465 blockKeys[rootCount] = key;
466 blockDataSizes[rootCount] = dataSize;
467 rootCount++;
468 }
469
470
471
472
473
474
475
476 private byte[] getNonRootIndexedKey(ByteBuffer nonRootIndex, int i) {
477 int numEntries = nonRootIndex.getInt(0);
478 if (i < 0 || i >= numEntries) {
479 return null;
480 }
481
482
483
484 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
485
486 int targetKeyRelOffset = nonRootIndex.getInt(
487 Bytes.SIZEOF_INT * (i + 1));
488
489
490 int targetKeyOffset = entriesOffset
491 + targetKeyRelOffset
492 + SECONDARY_INDEX_ENTRY_OVERHEAD;
493
494
495
496
497 int targetKeyLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (i + 2)) -
498 targetKeyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD;
499
500 return ByteBufferUtils.toBytes(nonRootIndex, targetKeyOffset, targetKeyLength);
501 }
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519 static int binarySearchNonRootIndex(Cell key, ByteBuffer nonRootIndex,
520 KVComparator comparator) {
521
522 int numEntries = nonRootIndex.getInt(0);
523 int low = 0;
524 int high = numEntries - 1;
525 int mid = 0;
526
527
528
529 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
530
531
532
533
534 KeyValue.KeyOnlyKeyValue nonRootIndexKV = new KeyValue.KeyOnlyKeyValue();
535 while (low <= high) {
536 mid = (low + high) >>> 1;
537
538
539 int midKeyRelOffset = nonRootIndex.getInt(
540 Bytes.SIZEOF_INT * (mid + 1));
541
542
543 int midKeyOffset = entriesOffset
544 + midKeyRelOffset
545 + SECONDARY_INDEX_ENTRY_OVERHEAD;
546
547
548
549
550 int midLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (mid + 2)) -
551 midKeyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD;
552
553
554
555
556
557 nonRootIndexKV.setKey(nonRootIndex.array(),
558 nonRootIndex.arrayOffset() + midKeyOffset, midLength);
559 int cmp = comparator.compareOnlyKeyPortion(key, nonRootIndexKV);
560
561
562 if (cmp > 0)
563 low = mid + 1;
564
565 else if (cmp < 0)
566 high = mid - 1;
567 else
568 return mid;
569 }
570
571
572
573
574
575 if (low != high + 1) {
576 throw new IllegalStateException("Binary search broken: low=" + low
577 + " " + "instead of " + (high + 1));
578 }
579
580
581
582 int i = low - 1;
583
584
585 if (i < -1 || i >= numEntries) {
586 throw new IllegalStateException("Binary search broken: result is " +
587 i + " but expected to be between -1 and (numEntries - 1) = " +
588 (numEntries - 1));
589 }
590
591 return i;
592 }
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608 static int locateNonRootIndexEntry(ByteBuffer nonRootBlock, Cell key,
609 KVComparator comparator) {
610 int entryIndex = binarySearchNonRootIndex(key, nonRootBlock, comparator);
611
612 if (entryIndex != -1) {
613 int numEntries = nonRootBlock.getInt(0);
614
615
616 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
617
618
619
620 int entryRelOffset = nonRootBlock.getInt(Bytes.SIZEOF_INT * (1 + entryIndex));
621
622 nonRootBlock.position(entriesOffset + entryRelOffset);
623 }
624
625 return entryIndex;
626 }
627
628
629
630
631
632
633
634
635
636
637
638 public void readRootIndex(DataInput in, final int numEntries)
639 throws IOException {
640 blockOffsets = new long[numEntries];
641 blockKeys = new byte[numEntries][];
642 blockDataSizes = new int[numEntries];
643
644
645 if (numEntries > 0) {
646 for (int i = 0; i < numEntries; ++i) {
647 long offset = in.readLong();
648 int dataSize = in.readInt();
649 byte[] key = Bytes.readByteArray(in);
650 add(key, offset, dataSize);
651 }
652 }
653 }
654
655
656
657
658
659
660
661
662
663
664
665
666 public DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException {
667 DataInputStream in = blk.getByteStream();
668 readRootIndex(in, numEntries);
669 return in;
670 }
671
672
673
674
675
676
677
678
679
680
681 public void readMultiLevelIndexRoot(HFileBlock blk,
682 final int numEntries) throws IOException {
683 DataInputStream in = readRootIndex(blk, numEntries);
684
685
686 int checkSumBytes = blk.totalChecksumBytes();
687 if ((in.available() - checkSumBytes) < MID_KEY_METADATA_SIZE) {
688
689 return;
690 }
691 midLeafBlockOffset = in.readLong();
692 midLeafBlockOnDiskSize = in.readInt();
693 midKeyEntry = in.readInt();
694 }
695
696 @Override
697 public String toString() {
698 StringBuilder sb = new StringBuilder();
699 sb.append("size=" + rootCount).append("\n");
700 for (int i = 0; i < rootCount; i++) {
701 sb.append("key=").append(KeyValue.keyToString(blockKeys[i]))
702 .append("\n offset=").append(blockOffsets[i])
703 .append(", dataSize=" + blockDataSizes[i]).append("\n");
704 }
705 return sb.toString();
706 }
707
708 @Override
709 public long heapSize() {
710 long heapSize = ClassSize.align(6 * ClassSize.REFERENCE +
711 2 * Bytes.SIZEOF_INT + ClassSize.OBJECT);
712
713
714 heapSize += MID_KEY_METADATA_SIZE;
715
716
717 if (blockKeys != null) {
718
719 heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length
720 * ClassSize.REFERENCE);
721
722
723 for (byte[] key : blockKeys) {
724 heapSize += ClassSize.align(ClassSize.ARRAY + key.length);
725 }
726 }
727
728 if (blockOffsets != null) {
729 heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length
730 * Bytes.SIZEOF_LONG);
731 }
732
733 if (blockDataSizes != null) {
734 heapSize += ClassSize.align(ClassSize.ARRAY + blockDataSizes.length
735 * Bytes.SIZEOF_INT);
736 }
737
738 return ClassSize.align(heapSize);
739 }
740
741 }
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758 public static class BlockIndexWriter implements InlineBlockWriter {
759
760
761
762
763
764
765
766
767
768 private BlockIndexChunk rootChunk = new BlockIndexChunk();
769
770
771
772
773
774 private BlockIndexChunk curInlineChunk = new BlockIndexChunk();
775
776
777
778
779
780
781
782
783
784
785 private int numLevels = 1;
786
787 private HFileBlock.Writer blockWriter;
788 private byte[] firstKey = null;
789
790
791
792
793
794
795 private long totalNumEntries;
796
797
798 private long totalBlockOnDiskSize;
799
800
801 private long totalBlockUncompressedSize;
802
803
804 private int maxChunkSize;
805
806
807 private int minIndexNumEntries;
808
809
810 private boolean singleLevelOnly;
811
812
813 private CacheConfig cacheConf;
814
815
816 private String nameForCaching;
817
818
819 public BlockIndexWriter() {
820 this(null, null, null);
821 singleLevelOnly = true;
822 }
823
824
825
826
827
828
829
830 public BlockIndexWriter(HFileBlock.Writer blockWriter,
831 CacheConfig cacheConf, String nameForCaching) {
832 if ((cacheConf == null) != (nameForCaching == null)) {
833 throw new IllegalArgumentException("Block cache and file name for " +
834 "caching must be both specified or both null");
835 }
836
837 this.blockWriter = blockWriter;
838 this.cacheConf = cacheConf;
839 this.nameForCaching = nameForCaching;
840 this.maxChunkSize = HFileBlockIndex.DEFAULT_MAX_CHUNK_SIZE;
841 this.minIndexNumEntries = HFileBlockIndex.DEFAULT_MIN_INDEX_NUM_ENTRIES;
842 }
843
844 public void setMaxChunkSize(int maxChunkSize) {
845 if (maxChunkSize <= 0) {
846 throw new IllegalArgumentException("Invalid maximum index block size");
847 }
848 this.maxChunkSize = maxChunkSize;
849 }
850
851 public void setMinIndexNumEntries(int minIndexNumEntries) {
852 if (minIndexNumEntries <= 1) {
853 throw new IllegalArgumentException("Invalid maximum index level, should be >= 2");
854 }
855 this.minIndexNumEntries = minIndexNumEntries;
856 }
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876 public long writeIndexBlocks(FSDataOutputStream out) throws IOException {
877 if (curInlineChunk != null && curInlineChunk.getNumEntries() != 0) {
878 throw new IOException("Trying to write a multi-level block index, " +
879 "but are " + curInlineChunk.getNumEntries() + " entries in the " +
880 "last inline chunk.");
881 }
882
883
884
885 byte[] midKeyMetadata = numLevels > 1 ? rootChunk.getMidKeyMetadata()
886 : null;
887
888 if (curInlineChunk != null) {
889 while (rootChunk.getRootSize() > maxChunkSize
890
891 && rootChunk.getNumEntries() > minIndexNumEntries
892
893 && numLevels < 16) {
894 rootChunk = writeIntermediateLevel(out, rootChunk);
895 numLevels += 1;
896 }
897 }
898
899
900 long rootLevelIndexPos = out.getPos();
901
902 {
903 DataOutput blockStream =
904 blockWriter.startWriting(BlockType.ROOT_INDEX);
905 rootChunk.writeRoot(blockStream);
906 if (midKeyMetadata != null)
907 blockStream.write(midKeyMetadata);
908 blockWriter.writeHeaderAndData(out);
909 }
910
911
912 totalBlockOnDiskSize += blockWriter.getOnDiskSizeWithoutHeader();
913 totalBlockUncompressedSize +=
914 blockWriter.getUncompressedSizeWithoutHeader();
915
916 if (LOG.isTraceEnabled()) {
917 LOG.trace("Wrote a " + numLevels + "-level index with root level at pos "
918 + rootLevelIndexPos + ", " + rootChunk.getNumEntries()
919 + " root-level entries, " + totalNumEntries + " total entries, "
920 + StringUtils.humanReadableInt(this.totalBlockOnDiskSize) +
921 " on-disk size, "
922 + StringUtils.humanReadableInt(totalBlockUncompressedSize) +
923 " total uncompressed size.");
924 }
925 return rootLevelIndexPos;
926 }
927
928
929
930
931
932
933
934
935
936
937
938 public void writeSingleLevelIndex(DataOutput out, String description)
939 throws IOException {
940 expectNumLevels(1);
941
942 if (!singleLevelOnly)
943 throw new IOException("Single-level mode is turned off");
944
945 if (rootChunk.getNumEntries() > 0)
946 throw new IOException("Root-level entries already added in " +
947 "single-level mode");
948
949 rootChunk = curInlineChunk;
950 curInlineChunk = new BlockIndexChunk();
951
952 if (LOG.isTraceEnabled()) {
953 LOG.trace("Wrote a single-level " + description + " index with "
954 + rootChunk.getNumEntries() + " entries, " + rootChunk.getRootSize()
955 + " bytes");
956 }
957 rootChunk.writeRoot(out);
958 }
959
960
961
962
963
964
965
966
967
968
969
970
971
972 private BlockIndexChunk writeIntermediateLevel(FSDataOutputStream out,
973 BlockIndexChunk currentLevel) throws IOException {
974
975 BlockIndexChunk parent = new BlockIndexChunk();
976
977
978 BlockIndexChunk curChunk = new BlockIndexChunk();
979
980 for (int i = 0; i < currentLevel.getNumEntries(); ++i) {
981 curChunk.add(currentLevel.getBlockKey(i),
982 currentLevel.getBlockOffset(i), currentLevel.getOnDiskDataSize(i));
983
984
985
986
987 if (i >= minIndexNumEntries && curChunk.getRootSize() >= maxChunkSize) {
988 writeIntermediateBlock(out, parent, curChunk);
989 }
990 }
991
992 if (curChunk.getNumEntries() > 0) {
993 writeIntermediateBlock(out, parent, curChunk);
994 }
995
996 return parent;
997 }
998
999 private void writeIntermediateBlock(FSDataOutputStream out,
1000 BlockIndexChunk parent, BlockIndexChunk curChunk) throws IOException {
1001 long beginOffset = out.getPos();
1002 DataOutputStream dos = blockWriter.startWriting(
1003 BlockType.INTERMEDIATE_INDEX);
1004 curChunk.writeNonRoot(dos);
1005 byte[] curFirstKey = curChunk.getBlockKey(0);
1006 blockWriter.writeHeaderAndData(out);
1007
1008 if (cacheConf != null) {
1009 HFileBlock blockForCaching = blockWriter.getBlockForCaching(cacheConf);
1010 cacheConf.getBlockCache().cacheBlock(new BlockCacheKey(nameForCaching,
1011 beginOffset), blockForCaching);
1012 }
1013
1014
1015 totalBlockOnDiskSize += blockWriter.getOnDiskSizeWithoutHeader();
1016 totalBlockUncompressedSize +=
1017 blockWriter.getUncompressedSizeWithoutHeader();
1018
1019
1020
1021
1022
1023
1024 parent.add(curFirstKey, beginOffset,
1025 blockWriter.getOnDiskSizeWithHeader());
1026
1027
1028 curChunk.clear();
1029 curFirstKey = null;
1030 }
1031
1032
1033
1034
1035 public final int getNumRootEntries() {
1036 return rootChunk.getNumEntries();
1037 }
1038
1039
1040
1041
1042 public int getNumLevels() {
1043 return numLevels;
1044 }
1045
1046 private void expectNumLevels(int expectedNumLevels) {
1047 if (numLevels != expectedNumLevels) {
1048 throw new IllegalStateException("Number of block index levels is "
1049 + numLevels + "but is expected to be " + expectedNumLevels);
1050 }
1051 }
1052
1053
1054
1055
1056
1057
1058 @Override
1059 public boolean shouldWriteBlock(boolean closing) {
1060 if (singleLevelOnly) {
1061 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1062 }
1063
1064 if (curInlineChunk == null) {
1065 throw new IllegalStateException("curInlineChunk is null; has shouldWriteBlock been " +
1066 "called with closing=true and then called again?");
1067 }
1068
1069 if (curInlineChunk.getNumEntries() == 0) {
1070 return false;
1071 }
1072
1073
1074 if (closing) {
1075 if (rootChunk.getNumEntries() == 0) {
1076
1077
1078
1079 expectNumLevels(1);
1080 rootChunk = curInlineChunk;
1081 curInlineChunk = null;
1082 return false;
1083 }
1084
1085 return true;
1086 } else {
1087 return curInlineChunk.getNonRootSize() >= maxChunkSize;
1088 }
1089 }
1090
1091
1092
1093
1094
1095
1096
1097 @Override
1098 public void writeInlineBlock(DataOutput out) throws IOException {
1099 if (singleLevelOnly)
1100 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1101
1102
1103
1104 curInlineChunk.writeNonRoot(out);
1105
1106
1107
1108 firstKey = curInlineChunk.getBlockKey(0);
1109
1110
1111 curInlineChunk.clear();
1112 }
1113
1114
1115
1116
1117
1118 @Override
1119 public void blockWritten(long offset, int onDiskSize, int uncompressedSize) {
1120
1121 totalBlockOnDiskSize += onDiskSize;
1122 totalBlockUncompressedSize += uncompressedSize;
1123
1124 if (singleLevelOnly)
1125 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1126
1127 if (firstKey == null) {
1128 throw new IllegalStateException("Trying to add second-level index " +
1129 "entry with offset=" + offset + " and onDiskSize=" + onDiskSize +
1130 "but the first key was not set in writeInlineBlock");
1131 }
1132
1133 if (rootChunk.getNumEntries() == 0) {
1134
1135 expectNumLevels(1);
1136 numLevels = 2;
1137 }
1138
1139
1140
1141 rootChunk.add(firstKey, offset, onDiskSize, totalNumEntries);
1142 firstKey = null;
1143 }
1144
1145 @Override
1146 public BlockType getInlineBlockType() {
1147 return BlockType.LEAF_INDEX;
1148 }
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160 public void addEntry(byte[] firstKey, long blockOffset, int blockDataSize) {
1161 curInlineChunk.add(firstKey, blockOffset, blockDataSize);
1162 ++totalNumEntries;
1163 }
1164
1165
1166
1167
1168 public void ensureSingleLevel() throws IOException {
1169 if (numLevels > 1) {
1170 throw new IOException ("Wrote a " + numLevels + "-level index with " +
1171 rootChunk.getNumEntries() + " root-level entries, but " +
1172 "this is expected to be a single-level block index.");
1173 }
1174 }
1175
1176
1177
1178
1179
1180
1181 @Override
1182 public boolean getCacheOnWrite() {
1183 return cacheConf != null && cacheConf.shouldCacheIndexesOnWrite();
1184 }
1185
1186
1187
1188
1189
1190
1191
1192 public long getTotalUncompressedSize() {
1193 return totalBlockUncompressedSize;
1194 }
1195
1196 }
1197
1198
1199
1200
1201
1202
1203 static class BlockIndexChunk {
1204
1205
1206 private final List<byte[]> blockKeys = new ArrayList<byte[]>();
1207
1208
1209 private final List<Long> blockOffsets = new ArrayList<Long>();
1210
1211
1212 private final List<Integer> onDiskDataSizes = new ArrayList<Integer>();
1213
1214
1215
1216
1217
1218
1219 private final List<Long> numSubEntriesAt = new ArrayList<Long>();
1220
1221
1222
1223
1224
1225
1226 private int curTotalNonRootEntrySize = 0;
1227
1228
1229
1230
1231 private int curTotalRootSize = 0;
1232
1233
1234
1235
1236
1237
1238 private final List<Integer> secondaryIndexOffsetMarks =
1239 new ArrayList<Integer>();
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254 void add(byte[] firstKey, long blockOffset, int onDiskDataSize,
1255 long curTotalNumSubEntries) {
1256
1257 secondaryIndexOffsetMarks.add(curTotalNonRootEntrySize);
1258 curTotalNonRootEntrySize += SECONDARY_INDEX_ENTRY_OVERHEAD
1259 + firstKey.length;
1260
1261 curTotalRootSize += Bytes.SIZEOF_LONG + Bytes.SIZEOF_INT
1262 + WritableUtils.getVIntSize(firstKey.length) + firstKey.length;
1263
1264 blockKeys.add(firstKey);
1265 blockOffsets.add(blockOffset);
1266 onDiskDataSizes.add(onDiskDataSize);
1267
1268 if (curTotalNumSubEntries != -1) {
1269 numSubEntriesAt.add(curTotalNumSubEntries);
1270
1271
1272 if (numSubEntriesAt.size() != blockKeys.size()) {
1273 throw new IllegalStateException("Only have key/value count " +
1274 "stats for " + numSubEntriesAt.size() + " block index " +
1275 "entries out of " + blockKeys.size());
1276 }
1277 }
1278 }
1279
1280
1281
1282
1283
1284
1285
1286 public void add(byte[] firstKey, long blockOffset, int onDiskDataSize) {
1287 add(firstKey, blockOffset, onDiskDataSize, -1);
1288 }
1289
1290 public void clear() {
1291 blockKeys.clear();
1292 blockOffsets.clear();
1293 onDiskDataSizes.clear();
1294 secondaryIndexOffsetMarks.clear();
1295 numSubEntriesAt.clear();
1296 curTotalNonRootEntrySize = 0;
1297 curTotalRootSize = 0;
1298 }
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317 public int getEntryBySubEntry(long k) {
1318
1319
1320
1321 int i = Collections.binarySearch(numSubEntriesAt, k);
1322
1323
1324
1325
1326 if (i >= 0)
1327 return i + 1;
1328
1329
1330 return -i - 1;
1331 }
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341 public byte[] getMidKeyMetadata() throws IOException {
1342 ByteArrayOutputStream baos = new ByteArrayOutputStream(
1343 MID_KEY_METADATA_SIZE);
1344 DataOutputStream baosDos = new DataOutputStream(baos);
1345 long totalNumSubEntries = numSubEntriesAt.get(blockKeys.size() - 1);
1346 if (totalNumSubEntries == 0) {
1347 throw new IOException("No leaf-level entries, mid-key unavailable");
1348 }
1349 long midKeySubEntry = (totalNumSubEntries - 1) / 2;
1350 int midKeyEntry = getEntryBySubEntry(midKeySubEntry);
1351
1352 baosDos.writeLong(blockOffsets.get(midKeyEntry));
1353 baosDos.writeInt(onDiskDataSizes.get(midKeyEntry));
1354
1355 long numSubEntriesBefore = midKeyEntry > 0
1356 ? numSubEntriesAt.get(midKeyEntry - 1) : 0;
1357 long subEntryWithinEntry = midKeySubEntry - numSubEntriesBefore;
1358 if (subEntryWithinEntry < 0 || subEntryWithinEntry > Integer.MAX_VALUE)
1359 {
1360 throw new IOException("Could not identify mid-key index within the "
1361 + "leaf-level block containing mid-key: out of range ("
1362 + subEntryWithinEntry + ", numSubEntriesBefore="
1363 + numSubEntriesBefore + ", midKeySubEntry=" + midKeySubEntry
1364 + ")");
1365 }
1366
1367 baosDos.writeInt((int) subEntryWithinEntry);
1368
1369 if (baosDos.size() != MID_KEY_METADATA_SIZE) {
1370 throw new IOException("Could not write mid-key metadata: size=" +
1371 baosDos.size() + ", correct size: " + MID_KEY_METADATA_SIZE);
1372 }
1373
1374
1375 baos.close();
1376
1377 return baos.toByteArray();
1378 }
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389 void writeNonRoot(DataOutput out) throws IOException {
1390
1391 out.writeInt(blockKeys.size());
1392
1393 if (secondaryIndexOffsetMarks.size() != blockKeys.size()) {
1394 throw new IOException("Corrupted block index chunk writer: " +
1395 blockKeys.size() + " entries but " +
1396 secondaryIndexOffsetMarks.size() + " secondary index items");
1397 }
1398
1399
1400
1401
1402
1403 for (int currentSecondaryIndex : secondaryIndexOffsetMarks)
1404 out.writeInt(currentSecondaryIndex);
1405
1406
1407
1408 out.writeInt(curTotalNonRootEntrySize);
1409
1410 for (int i = 0; i < blockKeys.size(); ++i) {
1411 out.writeLong(blockOffsets.get(i));
1412 out.writeInt(onDiskDataSizes.get(i));
1413 out.write(blockKeys.get(i));
1414 }
1415 }
1416
1417
1418
1419
1420
1421 int getNonRootSize() {
1422 return Bytes.SIZEOF_INT
1423 + Bytes.SIZEOF_INT * (blockKeys.size() + 1)
1424 + curTotalNonRootEntrySize;
1425 }
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437 void writeRoot(DataOutput out) throws IOException {
1438 for (int i = 0; i < blockKeys.size(); ++i) {
1439 out.writeLong(blockOffsets.get(i));
1440 out.writeInt(onDiskDataSizes.get(i));
1441 Bytes.writeByteArray(out, blockKeys.get(i));
1442 }
1443 }
1444
1445
1446
1447
1448 int getRootSize() {
1449 return curTotalRootSize;
1450 }
1451
1452
1453
1454
1455 public int getNumEntries() {
1456 return blockKeys.size();
1457 }
1458
1459 public byte[] getBlockKey(int i) {
1460 return blockKeys.get(i);
1461 }
1462
1463 public long getBlockOffset(int i) {
1464 return blockOffsets.get(i);
1465 }
1466
1467 public int getOnDiskDataSize(int i) {
1468 return onDiskDataSizes.get(i);
1469 }
1470
1471 public long getCumulativeNumKV(int i) {
1472 if (i < 0)
1473 return 0;
1474 return numSubEntriesAt.get(i);
1475 }
1476
1477 }
1478
1479 public static int getMaxChunkSize(Configuration conf) {
1480 return conf.getInt(MAX_CHUNK_SIZE_KEY, DEFAULT_MAX_CHUNK_SIZE);
1481 }
1482
1483 public static int getMinIndexNumEntries(Configuration conf) {
1484 return conf.getInt(MIN_INDEX_NUM_ENTRIES_KEY, DEFAULT_MIN_INDEX_NUM_ENTRIES);
1485 }
1486 }