001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.MID_KEY_METADATA_SIZE; 021 022import java.io.DataInput; 023import java.io.DataInputStream; 024import java.io.DataOutput; 025import java.io.IOException; 026import java.util.concurrent.atomic.AtomicReference; 027import org.apache.hadoop.hbase.Cell; 028import org.apache.hadoop.hbase.CellComparator; 029import org.apache.hadoop.hbase.CellUtil; 030import org.apache.hadoop.hbase.ExtendedCell; 031import org.apache.hadoop.hbase.KeyValue; 032import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 033import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding; 034import org.apache.hadoop.hbase.nio.ByteBuff; 035import org.apache.hadoop.hbase.regionserver.KeyValueScanner; 036import org.apache.hadoop.hbase.util.Bytes; 037import org.apache.hadoop.hbase.util.ClassSize; 038import org.apache.yetus.audience.InterfaceAudience; 039 040/** 041 * Does not perform any kind of encoding/decoding. 042 */ 043@InterfaceAudience.Private 044public class NoOpIndexBlockEncoder implements HFileIndexBlockEncoder { 045 046 public static final NoOpIndexBlockEncoder INSTANCE = new NoOpIndexBlockEncoder(); 047 048 /** Cannot be instantiated. Use {@link #INSTANCE} instead. */ 049 private NoOpIndexBlockEncoder() { 050 } 051 052 @Override 053 public void saveMetadata(HFile.Writer writer) { 054 } 055 056 @Override 057 public void encode(BlockIndexChunk blockIndexChunk, boolean rootIndexBlock, DataOutput out) 058 throws IOException { 059 if (rootIndexBlock) { 060 writeRoot(blockIndexChunk, out); 061 } else { 062 writeNonRoot(blockIndexChunk, out); 063 } 064 } 065 066 /** 067 * Writes the block index chunk in the non-root index block format. This format contains the 068 * number of entries, an index of integer offsets for quick binary search on variable-length 069 * records, and tuples of block offset, on-disk block size, and the first key for each entry. 070 */ 071 private void writeNonRoot(BlockIndexChunk blockIndexChunk, DataOutput out) throws IOException { 072 // The number of entries in the block. 073 out.writeInt(blockIndexChunk.getNumEntries()); 074 075 if ( 076 blockIndexChunk.getSecondaryIndexOffsetMarks().size() != blockIndexChunk.getBlockKeys().size() 077 ) { 078 throw new IOException("Corrupted block index chunk writer: " 079 + blockIndexChunk.getBlockKeys().size() + " entries but " 080 + blockIndexChunk.getSecondaryIndexOffsetMarks().size() + " secondary index items"); 081 } 082 083 // For each entry, write a "secondary index" of relative offsets to the 084 // entries from the end of the secondary index. This works, because at 085 // read time we read the number of entries and know where the secondary 086 // index ends. 087 for (int currentSecondaryIndex : blockIndexChunk.getSecondaryIndexOffsetMarks()) 088 out.writeInt(currentSecondaryIndex); 089 090 // We include one other element in the secondary index to calculate the 091 // size of each entry more easily by subtracting secondary index elements. 092 out.writeInt(blockIndexChunk.getCurTotalNonRootEntrySize()); 093 094 for (int i = 0; i < blockIndexChunk.getNumEntries(); ++i) { 095 out.writeLong(blockIndexChunk.getBlockOffset(i)); 096 out.writeInt(blockIndexChunk.getOnDiskDataSize(i)); 097 out.write(blockIndexChunk.getBlockKey(i)); 098 } 099 } 100 101 /** 102 * Writes this chunk into the given output stream in the root block index format. This format is 103 * similar to the {@link HFile} version 1 block index format, except that we store on-disk size of 104 * the block instead of its uncompressed size. 105 * @param out the data output stream to write the block index to. Typically a stream writing into 106 * an {@link HFile} block. 107 */ 108 private void writeRoot(BlockIndexChunk blockIndexChunk, DataOutput out) throws IOException { 109 for (int i = 0; i < blockIndexChunk.getNumEntries(); ++i) { 110 out.writeLong(blockIndexChunk.getBlockOffset(i)); 111 out.writeInt(blockIndexChunk.getOnDiskDataSize(i)); 112 Bytes.writeByteArray(out, blockIndexChunk.getBlockKey(i)); 113 } 114 } 115 116 @Override 117 public IndexBlockEncoding getIndexBlockEncoding() { 118 return IndexBlockEncoding.NONE; 119 } 120 121 @Override 122 public EncodedSeeker createSeeker() { 123 return new NoOpEncodedSeeker(); 124 } 125 126 @Override 127 public String toString() { 128 return getClass().getSimpleName(); 129 } 130 131 protected static class NoOpEncodedSeeker implements EncodedSeeker { 132 133 protected long[] blockOffsets; 134 protected int[] blockDataSizes; 135 protected int rootCount = 0; 136 137 // Mid-key metadata. 138 protected long midLeafBlockOffset = -1; 139 protected int midLeafBlockOnDiskSize = -1; 140 protected int midKeyEntry = -1; 141 142 private ExtendedCell[] blockKeys; 143 private CellComparator comparator; 144 protected int searchTreeLevel; 145 146 /** Pre-computed mid-key */ 147 private AtomicReference<ExtendedCell> midKey = new AtomicReference<>(); 148 149 @Override 150 public long heapSize() { 151 long heapSize = ClassSize.align(ClassSize.OBJECT); 152 153 // Mid-key metadata. 154 heapSize += MID_KEY_METADATA_SIZE; 155 156 if (blockOffsets != null) { 157 heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length * Bytes.SIZEOF_LONG); 158 } 159 160 if (blockDataSizes != null) { 161 heapSize += ClassSize.align(ClassSize.ARRAY + blockDataSizes.length * Bytes.SIZEOF_INT); 162 } 163 164 if (blockKeys != null) { 165 heapSize += ClassSize.REFERENCE; 166 // Adding array + references overhead 167 heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length * ClassSize.REFERENCE); 168 169 // Adding blockKeys 170 for (Cell key : blockKeys) { 171 heapSize += ClassSize.align(key.heapSize()); 172 } 173 } 174 // Add comparator and the midkey atomicreference 175 heapSize += 2 * ClassSize.REFERENCE; 176 // Add rootCount and searchTreeLevel 177 heapSize += 2 * Bytes.SIZEOF_INT; 178 179 return ClassSize.align(heapSize); 180 } 181 182 @Override 183 public boolean isEmpty() { 184 return blockKeys.length == 0; 185 } 186 187 @Override 188 public ExtendedCell getRootBlockKey(int i) { 189 return blockKeys[i]; 190 } 191 192 @Override 193 public int getRootBlockCount() { 194 return rootCount; 195 } 196 197 @Override 198 public void initRootIndex(HFileBlock blk, int numEntries, CellComparator comparator, 199 int treeLevel) throws IOException { 200 this.comparator = comparator; 201 this.searchTreeLevel = treeLevel; 202 init(blk, numEntries); 203 } 204 205 private void init(HFileBlock blk, int numEntries) throws IOException { 206 DataInputStream in = readRootIndex(blk, numEntries); 207 // HFileBlock.getByteStream() returns a byte stream for reading the data(excluding checksum) 208 // of root index block, so after reading the root index there is no need to subtract the 209 // checksum bytes. 210 if (in.available() < MID_KEY_METADATA_SIZE) { 211 // No mid-key metadata available. 212 return; 213 } 214 midLeafBlockOffset = in.readLong(); 215 midLeafBlockOnDiskSize = in.readInt(); 216 midKeyEntry = in.readInt(); 217 } 218 219 private DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException { 220 DataInputStream in = blk.getByteStream(); 221 readRootIndex(in, numEntries); 222 return in; 223 } 224 225 private void readRootIndex(DataInput in, final int numEntries) throws IOException { 226 blockOffsets = new long[numEntries]; 227 initialize(numEntries); 228 blockDataSizes = new int[numEntries]; 229 230 // If index size is zero, no index was written. 231 if (numEntries > 0) { 232 for (int i = 0; i < numEntries; ++i) { 233 long offset = in.readLong(); 234 int dataSize = in.readInt(); 235 byte[] key = Bytes.readByteArray(in); 236 add(key, offset, dataSize); 237 } 238 } 239 } 240 241 private void initialize(int numEntries) { 242 blockKeys = new ExtendedCell[numEntries]; 243 } 244 245 private void add(final byte[] key, final long offset, final int dataSize) { 246 blockOffsets[rootCount] = offset; 247 // Create the blockKeys as Cells once when the reader is opened 248 blockKeys[rootCount] = new KeyValue.KeyOnlyKeyValue(key, 0, key.length); 249 blockDataSizes[rootCount] = dataSize; 250 rootCount++; 251 } 252 253 @Override 254 public ExtendedCell midkey(HFile.CachingBlockReader cachingBlockReader) throws IOException { 255 if (rootCount == 0) { 256 throw new IOException("HFile empty"); 257 } 258 259 ExtendedCell targetMidKey = this.midKey.get(); 260 if (targetMidKey != null) { 261 return targetMidKey; 262 } 263 264 if (midLeafBlockOffset >= 0) { 265 if (cachingBlockReader == null) { 266 throw new IOException( 267 "Have to read the middle leaf block but " + "no block reader available"); 268 } 269 270 // Caching, using pread, assuming this is not a compaction. 271 HFileBlock midLeafBlock = cachingBlockReader.readBlock(midLeafBlockOffset, 272 midLeafBlockOnDiskSize, true, true, false, true, BlockType.LEAF_INDEX, null); 273 try { 274 byte[] bytes = HFileBlockIndex.BlockIndexReader 275 .getNonRootIndexedKey(midLeafBlock.getBufferWithoutHeader(), midKeyEntry); 276 assert bytes != null; 277 targetMidKey = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length); 278 } finally { 279 midLeafBlock.release(); 280 } 281 } else { 282 // The middle of the root-level index. 283 targetMidKey = blockKeys[rootCount / 2]; 284 } 285 286 this.midKey.set(targetMidKey); 287 return targetMidKey; 288 } 289 290 @Override 291 public BlockWithScanInfo loadDataBlockWithScanInfo(ExtendedCell key, HFileBlock currentBlock, 292 boolean cacheBlocks, boolean pread, boolean isCompaction, 293 DataBlockEncoding expectedDataBlockEncoding, HFile.CachingBlockReader cachingBlockReader) 294 throws IOException { 295 int rootLevelIndex = rootBlockContainingKey(key); 296 if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) { 297 return null; 298 } 299 300 // the next indexed key 301 ExtendedCell nextIndexedKey = null; 302 303 // Read the next-level (intermediate or leaf) index block. 304 long currentOffset = blockOffsets[rootLevelIndex]; 305 int currentOnDiskSize = blockDataSizes[rootLevelIndex]; 306 307 if (rootLevelIndex < blockKeys.length - 1) { 308 nextIndexedKey = blockKeys[rootLevelIndex + 1]; 309 } else { 310 nextIndexedKey = KeyValueScanner.NO_NEXT_INDEXED_KEY; 311 } 312 313 int lookupLevel = 1; // How many levels deep we are in our lookup. 314 int index = -1; 315 316 HFileBlock block = null; 317 KeyValue.KeyOnlyKeyValue tmpNextIndexKV = new KeyValue.KeyOnlyKeyValue(); 318 while (true) { 319 try { 320 // Must initialize it with null here, because if don't and once an exception happen in 321 // readBlock, then we'll release the previous assigned block twice in the finally block. 322 // (See HBASE-22422) 323 block = null; 324 if (currentBlock != null && currentBlock.getOffset() == currentOffset) { 325 // Avoid reading the same block again, even with caching turned off. 326 // This is crucial for compaction-type workload which might have 327 // caching turned off. This is like a one-block cache inside the 328 // scanner. 329 block = currentBlock; 330 } else { 331 // Call HFile's caching block reader API. We always cache index 332 // blocks, otherwise we might get terrible performance. 333 boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel); 334 BlockType expectedBlockType; 335 if (lookupLevel < searchTreeLevel - 1) { 336 expectedBlockType = BlockType.INTERMEDIATE_INDEX; 337 } else if (lookupLevel == searchTreeLevel - 1) { 338 expectedBlockType = BlockType.LEAF_INDEX; 339 } else { 340 // this also accounts for ENCODED_DATA 341 expectedBlockType = BlockType.DATA; 342 } 343 block = cachingBlockReader.readBlock(currentOffset, currentOnDiskSize, shouldCache, 344 pread, isCompaction, true, expectedBlockType, expectedDataBlockEncoding); 345 } 346 347 if (block == null) { 348 throw new IOException("Failed to read block at offset " + currentOffset 349 + ", onDiskSize=" + currentOnDiskSize); 350 } 351 352 // Found a data block, break the loop and check our level in the tree. 353 if (block.getBlockType().isData()) { 354 break; 355 } 356 357 // Not a data block. This must be a leaf-level or intermediate-level 358 // index block. We don't allow going deeper than searchTreeLevel. 359 if (++lookupLevel > searchTreeLevel) { 360 throw new IOException("Search Tree Level overflow: lookupLevel=" + lookupLevel 361 + ", searchTreeLevel=" + searchTreeLevel); 362 } 363 364 // Locate the entry corresponding to the given key in the non-root 365 // (leaf or intermediate-level) index block. 366 ByteBuff buffer = block.getBufferWithoutHeader(); 367 index = HFileBlockIndex.BlockIndexReader.locateNonRootIndexEntry(buffer, key, comparator); 368 if (index == -1) { 369 // This has to be changed 370 // For now change this to key value 371 throw new IOException("The key " + CellUtil.getCellKeyAsString(key) + " is before the" 372 + " first key of the non-root index block " + block); 373 } 374 375 currentOffset = buffer.getLong(); 376 currentOnDiskSize = buffer.getInt(); 377 378 // Only update next indexed key if there is a next indexed key in the current level 379 byte[] nonRootIndexedKey = 380 HFileBlockIndex.BlockIndexReader.getNonRootIndexedKey(buffer, index + 1); 381 if (nonRootIndexedKey != null) { 382 tmpNextIndexKV.setKey(nonRootIndexedKey, 0, nonRootIndexedKey.length); 383 nextIndexedKey = tmpNextIndexKV; 384 } 385 } finally { 386 if (block != null && !block.getBlockType().isData()) { 387 // Release the block immediately if it is not the data block 388 block.release(); 389 } 390 } 391 } 392 393 if (lookupLevel != searchTreeLevel) { 394 assert block.getBlockType().isData(); 395 // Though we have retrieved a data block we have found an issue 396 // in the retrieved data block. Hence returned the block so that 397 // the ref count can be decremented 398 if (block != null) { 399 block.release(); 400 } 401 throw new IOException("Reached a data block at level " + lookupLevel 402 + " but the number of levels is " + searchTreeLevel); 403 } 404 405 // set the next indexed key for the current block. 406 return new BlockWithScanInfo(block, nextIndexedKey); 407 } 408 409 @Override 410 public int rootBlockContainingKey(Cell key) { 411 // Here the comparator should not be null as this happens for the root-level block 412 int pos = Bytes.binarySearch(blockKeys, key, comparator); 413 // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see 414 // binarySearch's javadoc. 415 416 if (pos >= 0) { 417 // This means this is an exact match with an element of blockKeys. 418 assert pos < blockKeys.length; 419 return pos; 420 } 421 422 // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i], 423 // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that 424 // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if 425 // key < blockKeys[0], meaning the file does not contain the given key. 426 427 int i = -pos - 1; 428 assert 0 <= i && i <= blockKeys.length; 429 return i - 1; 430 } 431 432 @Override 433 public String toString() { 434 StringBuilder sb = new StringBuilder(); 435 sb.append("size=" + rootCount).append("\n"); 436 for (int i = 0; i < rootCount; i++) { 437 sb.append("key=").append((blockKeys[i])).append("\n offset=").append(blockOffsets[i]) 438 .append(", dataSize=" + blockDataSizes[i]).append("\n"); 439 } 440 return sb.toString(); 441 } 442 } 443}