001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.MID_KEY_METADATA_SIZE;
021
022import java.io.DataInput;
023import java.io.DataInputStream;
024import java.io.DataOutput;
025import java.io.IOException;
026import java.util.concurrent.atomic.AtomicReference;
027import org.apache.hadoop.hbase.Cell;
028import org.apache.hadoop.hbase.CellComparator;
029import org.apache.hadoop.hbase.CellUtil;
030import org.apache.hadoop.hbase.ExtendedCell;
031import org.apache.hadoop.hbase.HBaseInterfaceAudience;
032import org.apache.hadoop.hbase.KeyValue;
033import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
034import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
035import org.apache.hadoop.hbase.nio.ByteBuff;
036import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
037import org.apache.hadoop.hbase.util.Bytes;
038import org.apache.hadoop.hbase.util.ClassSize;
039import org.apache.yetus.audience.InterfaceAudience;
040
041/**
042 * Does not perform any kind of encoding/decoding.
043 */
044@InterfaceAudience.Private
045public class NoOpIndexBlockEncoder implements HFileIndexBlockEncoder {
046
047  public static final NoOpIndexBlockEncoder INSTANCE = new NoOpIndexBlockEncoder();
048
049  /** Cannot be instantiated. Use {@link #INSTANCE} instead. */
050  private NoOpIndexBlockEncoder() {
051  }
052
053  @Override
054  public void saveMetadata(HFile.Writer writer) {
055  }
056
057  @Override
058  public void encode(BlockIndexChunk blockIndexChunk, boolean rootIndexBlock, DataOutput out)
059    throws IOException {
060    if (rootIndexBlock) {
061      writeRoot(blockIndexChunk, out);
062    } else {
063      writeNonRoot(blockIndexChunk, out);
064    }
065  }
066
067  /**
068   * Writes the block index chunk in the non-root index block format. This format contains the
069   * number of entries, an index of integer offsets for quick binary search on variable-length
070   * records, and tuples of block offset, on-disk block size, and the first key for each entry.
071   */
072  private void writeNonRoot(BlockIndexChunk blockIndexChunk, DataOutput out) throws IOException {
073    // The number of entries in the block.
074    out.writeInt(blockIndexChunk.getNumEntries());
075
076    if (
077      blockIndexChunk.getSecondaryIndexOffsetMarks().size() != blockIndexChunk.getBlockKeys().size()
078    ) {
079      throw new IOException("Corrupted block index chunk writer: "
080        + blockIndexChunk.getBlockKeys().size() + " entries but "
081        + blockIndexChunk.getSecondaryIndexOffsetMarks().size() + " secondary index items");
082    }
083
084    // For each entry, write a "secondary index" of relative offsets to the
085    // entries from the end of the secondary index. This works, because at
086    // read time we read the number of entries and know where the secondary
087    // index ends.
088    for (int currentSecondaryIndex : blockIndexChunk.getSecondaryIndexOffsetMarks())
089      out.writeInt(currentSecondaryIndex);
090
091    // We include one other element in the secondary index to calculate the
092    // size of each entry more easily by subtracting secondary index elements.
093    out.writeInt(blockIndexChunk.getCurTotalNonRootEntrySize());
094
095    for (int i = 0; i < blockIndexChunk.getNumEntries(); ++i) {
096      out.writeLong(blockIndexChunk.getBlockOffset(i));
097      out.writeInt(blockIndexChunk.getOnDiskDataSize(i));
098      out.write(blockIndexChunk.getBlockKey(i));
099    }
100  }
101
102  /**
103   * Writes this chunk into the given output stream in the root block index format. This format is
104   * similar to the {@link HFile} version 1 block index format, except that we store on-disk size of
105   * the block instead of its uncompressed size.
106   * @param out the data output stream to write the block index to. Typically a stream writing into
107   *            an {@link HFile} block.
108   */
109  private void writeRoot(BlockIndexChunk blockIndexChunk, DataOutput out) throws IOException {
110    for (int i = 0; i < blockIndexChunk.getNumEntries(); ++i) {
111      out.writeLong(blockIndexChunk.getBlockOffset(i));
112      out.writeInt(blockIndexChunk.getOnDiskDataSize(i));
113      Bytes.writeByteArray(out, blockIndexChunk.getBlockKey(i));
114    }
115  }
116
117  @Override
118  public IndexBlockEncoding getIndexBlockEncoding() {
119    return IndexBlockEncoding.NONE;
120  }
121
122  @Override
123  public EncodedSeeker createSeeker() {
124    return new NoOpEncodedSeeker();
125  }
126
127  @Override
128  public String toString() {
129    return getClass().getSimpleName();
130  }
131
132  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
133  public static class NoOpEncodedSeeker implements EncodedSeeker {
134
135    protected long[] blockOffsets;
136    protected int[] blockDataSizes;
137    protected int rootCount = 0;
138
139    // Mid-key metadata.
140    protected long midLeafBlockOffset = -1;
141    protected int midLeafBlockOnDiskSize = -1;
142    protected int midKeyEntry = -1;
143
144    private ExtendedCell[] blockKeys;
145    private CellComparator comparator;
146    protected int searchTreeLevel;
147
148    /** Pre-computed mid-key */
149    private AtomicReference<ExtendedCell> midKey = new AtomicReference<>();
150
151    @Override
152    public long heapSize() {
153      long heapSize = ClassSize.align(ClassSize.OBJECT);
154
155      // Mid-key metadata.
156      heapSize += MID_KEY_METADATA_SIZE;
157
158      if (blockOffsets != null) {
159        heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length * Bytes.SIZEOF_LONG);
160      }
161
162      if (blockDataSizes != null) {
163        heapSize += ClassSize.align(ClassSize.ARRAY + blockDataSizes.length * Bytes.SIZEOF_INT);
164      }
165
166      if (blockKeys != null) {
167        heapSize += ClassSize.REFERENCE;
168        // Adding array + references overhead
169        heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length * ClassSize.REFERENCE);
170
171        // Adding blockKeys
172        for (Cell key : blockKeys) {
173          heapSize += ClassSize.align(key.heapSize());
174        }
175      }
176      // Add comparator and the midkey atomicreference
177      heapSize += 2 * ClassSize.REFERENCE;
178      // Add rootCount and searchTreeLevel
179      heapSize += 2 * Bytes.SIZEOF_INT;
180
181      return ClassSize.align(heapSize);
182    }
183
184    @Override
185    public boolean isEmpty() {
186      return blockKeys.length == 0;
187    }
188
189    @Override
190    public ExtendedCell getRootBlockKey(int i) {
191      return blockKeys[i];
192    }
193
194    @Override
195    public int getRootBlockCount() {
196      return rootCount;
197    }
198
199    @Override
200    public void initRootIndex(HFileBlock blk, int numEntries, CellComparator comparator,
201      int treeLevel) throws IOException {
202      this.comparator = comparator;
203      this.searchTreeLevel = treeLevel;
204      init(blk, numEntries);
205    }
206
207    private void init(HFileBlock blk, int numEntries) throws IOException {
208      DataInputStream in = readRootIndex(blk, numEntries);
209      // HFileBlock.getByteStream() returns a byte stream for reading the data(excluding checksum)
210      // of root index block, so after reading the root index there is no need to subtract the
211      // checksum bytes.
212      if (in.available() < MID_KEY_METADATA_SIZE) {
213        // No mid-key metadata available.
214        return;
215      }
216      midLeafBlockOffset = in.readLong();
217      midLeafBlockOnDiskSize = in.readInt();
218      midKeyEntry = in.readInt();
219    }
220
221    private DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException {
222      DataInputStream in = blk.getByteStream();
223      readRootIndex(in, numEntries);
224      return in;
225    }
226
227    private void readRootIndex(DataInput in, final int numEntries) throws IOException {
228      blockOffsets = new long[numEntries];
229      initialize(numEntries);
230      blockDataSizes = new int[numEntries];
231
232      // If index size is zero, no index was written.
233      if (numEntries > 0) {
234        for (int i = 0; i < numEntries; ++i) {
235          long offset = in.readLong();
236          int dataSize = in.readInt();
237          byte[] key = Bytes.readByteArray(in);
238          add(key, offset, dataSize);
239        }
240      }
241    }
242
243    private void initialize(int numEntries) {
244      blockKeys = new ExtendedCell[numEntries];
245    }
246
247    private void add(final byte[] key, final long offset, final int dataSize) {
248      blockOffsets[rootCount] = offset;
249      // Create the blockKeys as Cells once when the reader is opened
250      blockKeys[rootCount] = new KeyValue.KeyOnlyKeyValue(key, 0, key.length);
251      blockDataSizes[rootCount] = dataSize;
252      rootCount++;
253    }
254
255    @Override
256    public ExtendedCell midkey(HFile.CachingBlockReader cachingBlockReader) throws IOException {
257      if (rootCount == 0) {
258        throw new IOException("HFile empty");
259      }
260
261      ExtendedCell targetMidKey = this.midKey.get();
262      if (targetMidKey != null) {
263        return targetMidKey;
264      }
265
266      if (midLeafBlockOffset >= 0) {
267        if (cachingBlockReader == null) {
268          throw new IOException(
269            "Have to read the middle leaf block but " + "no block reader available");
270        }
271
272        // Caching, using pread, assuming this is not a compaction.
273        HFileBlock midLeafBlock = cachingBlockReader.readBlock(midLeafBlockOffset,
274          midLeafBlockOnDiskSize, true, true, false, true, BlockType.LEAF_INDEX, null);
275        try {
276          byte[] bytes = HFileBlockIndex.BlockIndexReader
277            .getNonRootIndexedKey(midLeafBlock.getBufferWithoutHeader(), midKeyEntry);
278          assert bytes != null;
279          targetMidKey = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length);
280        } finally {
281          midLeafBlock.release();
282        }
283      } else {
284        // The middle of the root-level index.
285        targetMidKey = blockKeys[rootCount / 2];
286      }
287
288      this.midKey.set(targetMidKey);
289      return targetMidKey;
290    }
291
292    @Override
293    public BlockWithScanInfo loadDataBlockWithScanInfo(ExtendedCell key, HFileBlock currentBlock,
294      boolean cacheBlocks, boolean pread, boolean isCompaction,
295      DataBlockEncoding expectedDataBlockEncoding, HFile.CachingBlockReader cachingBlockReader)
296      throws IOException {
297      int rootLevelIndex = rootBlockContainingKey(key);
298      if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {
299        return null;
300      }
301
302      // the next indexed key
303      ExtendedCell nextIndexedKey = null;
304
305      // Read the next-level (intermediate or leaf) index block.
306      long currentOffset = blockOffsets[rootLevelIndex];
307      int currentOnDiskSize = blockDataSizes[rootLevelIndex];
308
309      if (rootLevelIndex < blockKeys.length - 1) {
310        nextIndexedKey = blockKeys[rootLevelIndex + 1];
311      } else {
312        nextIndexedKey = KeyValueScanner.NO_NEXT_INDEXED_KEY;
313      }
314
315      int lookupLevel = 1; // How many levels deep we are in our lookup.
316      int index = -1;
317
318      HFileBlock block = null;
319      KeyValue.KeyOnlyKeyValue tmpNextIndexKV = new KeyValue.KeyOnlyKeyValue();
320      while (true) {
321        try {
322          // Must initialize it with null here, because if don't and once an exception happen in
323          // readBlock, then we'll release the previous assigned block twice in the finally block.
324          // (See HBASE-22422)
325          block = null;
326          if (currentBlock != null && currentBlock.getOffset() == currentOffset) {
327            // Avoid reading the same block again, even with caching turned off.
328            // This is crucial for compaction-type workload which might have
329            // caching turned off. This is like a one-block cache inside the
330            // scanner.
331            block = currentBlock;
332          } else {
333            // Call HFile's caching block reader API. We always cache index
334            // blocks, otherwise we might get terrible performance.
335            boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel);
336            BlockType expectedBlockType;
337            if (lookupLevel < searchTreeLevel - 1) {
338              expectedBlockType = BlockType.INTERMEDIATE_INDEX;
339            } else if (lookupLevel == searchTreeLevel - 1) {
340              expectedBlockType = BlockType.LEAF_INDEX;
341            } else {
342              // this also accounts for ENCODED_DATA
343              expectedBlockType = BlockType.DATA;
344            }
345            block = cachingBlockReader.readBlock(currentOffset, currentOnDiskSize, shouldCache,
346              pread, isCompaction, true, expectedBlockType, expectedDataBlockEncoding);
347          }
348
349          if (block == null) {
350            throw new IOException("Failed to read block at offset " + currentOffset
351              + ", onDiskSize=" + currentOnDiskSize);
352          }
353
354          // Found a data block, break the loop and check our level in the tree.
355          if (block.getBlockType().isData()) {
356            break;
357          }
358
359          // Not a data block. This must be a leaf-level or intermediate-level
360          // index block. We don't allow going deeper than searchTreeLevel.
361          if (++lookupLevel > searchTreeLevel) {
362            throw new IOException("Search Tree Level overflow: lookupLevel=" + lookupLevel
363              + ", searchTreeLevel=" + searchTreeLevel);
364          }
365
366          // Locate the entry corresponding to the given key in the non-root
367          // (leaf or intermediate-level) index block.
368          ByteBuff buffer = block.getBufferWithoutHeader();
369          index = HFileBlockIndex.BlockIndexReader.locateNonRootIndexEntry(buffer, key, comparator);
370          if (index == -1) {
371            // This has to be changed
372            // For now change this to key value
373            throw new IOException("The key " + CellUtil.getCellKeyAsString(key) + " is before the"
374              + " first key of the non-root index block " + block);
375          }
376
377          currentOffset = buffer.getLong();
378          currentOnDiskSize = buffer.getInt();
379
380          // Only update next indexed key if there is a next indexed key in the current level
381          byte[] nonRootIndexedKey =
382            HFileBlockIndex.BlockIndexReader.getNonRootIndexedKey(buffer, index + 1);
383          if (nonRootIndexedKey != null) {
384            tmpNextIndexKV.setKey(nonRootIndexedKey, 0, nonRootIndexedKey.length);
385            nextIndexedKey = tmpNextIndexKV;
386          }
387        } finally {
388          if (block != null && !block.getBlockType().isData()) {
389            // Release the block immediately if it is not the data block
390            block.release();
391          }
392        }
393      }
394
395      if (lookupLevel != searchTreeLevel) {
396        assert block.getBlockType().isData();
397        // Though we have retrieved a data block we have found an issue
398        // in the retrieved data block. Hence returned the block so that
399        // the ref count can be decremented
400        if (block != null) {
401          block.release();
402        }
403        throw new IOException("Reached a data block at level " + lookupLevel
404          + " but the number of levels is " + searchTreeLevel);
405      }
406
407      // set the next indexed key for the current block.
408      return new BlockWithScanInfo(block, nextIndexedKey);
409    }
410
411    @Override
412    public int rootBlockContainingKey(Cell key) {
413      // Here the comparator should not be null as this happens for the root-level block
414      int pos = Bytes.binarySearch(blockKeys, key, comparator);
415      // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see
416      // binarySearch's javadoc.
417
418      if (pos >= 0) {
419        // This means this is an exact match with an element of blockKeys.
420        assert pos < blockKeys.length;
421        return pos;
422      }
423
424      // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],
425      // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that
426      // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if
427      // key < blockKeys[0], meaning the file does not contain the given key.
428
429      int i = -pos - 1;
430      assert 0 <= i && i <= blockKeys.length;
431      return i - 1;
432    }
433
434    @Override
435    public String toString() {
436      StringBuilder sb = new StringBuilder();
437      sb.append("size=" + rootCount).append("\n");
438      for (int i = 0; i < rootCount; i++) {
439        sb.append("key=").append((blockKeys[i])).append("\n  offset=").append(blockOffsets[i])
440          .append(", dataSize=" + blockDataSizes[i]).append("\n");
441      }
442      return sb.toString();
443    }
444  }
445}