001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.MID_KEY_METADATA_SIZE;
021
022import java.io.DataInput;
023import java.io.DataInputStream;
024import java.io.DataOutput;
025import java.io.IOException;
026import java.util.concurrent.atomic.AtomicReference;
027import org.apache.hadoop.hbase.Cell;
028import org.apache.hadoop.hbase.CellComparator;
029import org.apache.hadoop.hbase.CellUtil;
030import org.apache.hadoop.hbase.ExtendedCell;
031import org.apache.hadoop.hbase.KeyValue;
032import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
033import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
034import org.apache.hadoop.hbase.nio.ByteBuff;
035import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
036import org.apache.hadoop.hbase.util.Bytes;
037import org.apache.hadoop.hbase.util.ClassSize;
038import org.apache.yetus.audience.InterfaceAudience;
039
040/**
041 * Does not perform any kind of encoding/decoding.
042 */
043@InterfaceAudience.Private
044public class NoOpIndexBlockEncoder implements HFileIndexBlockEncoder {
045
046  public static final NoOpIndexBlockEncoder INSTANCE = new NoOpIndexBlockEncoder();
047
048  /** Cannot be instantiated. Use {@link #INSTANCE} instead. */
049  private NoOpIndexBlockEncoder() {
050  }
051
052  @Override
053  public void saveMetadata(HFile.Writer writer) {
054  }
055
056  @Override
057  public void encode(BlockIndexChunk blockIndexChunk, boolean rootIndexBlock, DataOutput out)
058    throws IOException {
059    if (rootIndexBlock) {
060      writeRoot(blockIndexChunk, out);
061    } else {
062      writeNonRoot(blockIndexChunk, out);
063    }
064  }
065
066  /**
067   * Writes the block index chunk in the non-root index block format. This format contains the
068   * number of entries, an index of integer offsets for quick binary search on variable-length
069   * records, and tuples of block offset, on-disk block size, and the first key for each entry.
070   */
071  private void writeNonRoot(BlockIndexChunk blockIndexChunk, DataOutput out) throws IOException {
072    // The number of entries in the block.
073    out.writeInt(blockIndexChunk.getNumEntries());
074
075    if (
076      blockIndexChunk.getSecondaryIndexOffsetMarks().size() != blockIndexChunk.getBlockKeys().size()
077    ) {
078      throw new IOException("Corrupted block index chunk writer: "
079        + blockIndexChunk.getBlockKeys().size() + " entries but "
080        + blockIndexChunk.getSecondaryIndexOffsetMarks().size() + " secondary index items");
081    }
082
083    // For each entry, write a "secondary index" of relative offsets to the
084    // entries from the end of the secondary index. This works, because at
085    // read time we read the number of entries and know where the secondary
086    // index ends.
087    for (int currentSecondaryIndex : blockIndexChunk.getSecondaryIndexOffsetMarks())
088      out.writeInt(currentSecondaryIndex);
089
090    // We include one other element in the secondary index to calculate the
091    // size of each entry more easily by subtracting secondary index elements.
092    out.writeInt(blockIndexChunk.getCurTotalNonRootEntrySize());
093
094    for (int i = 0; i < blockIndexChunk.getNumEntries(); ++i) {
095      out.writeLong(blockIndexChunk.getBlockOffset(i));
096      out.writeInt(blockIndexChunk.getOnDiskDataSize(i));
097      out.write(blockIndexChunk.getBlockKey(i));
098    }
099  }
100
101  /**
102   * Writes this chunk into the given output stream in the root block index format. This format is
103   * similar to the {@link HFile} version 1 block index format, except that we store on-disk size of
104   * the block instead of its uncompressed size.
105   * @param out the data output stream to write the block index to. Typically a stream writing into
106   *            an {@link HFile} block.
107   */
108  private void writeRoot(BlockIndexChunk blockIndexChunk, DataOutput out) throws IOException {
109    for (int i = 0; i < blockIndexChunk.getNumEntries(); ++i) {
110      out.writeLong(blockIndexChunk.getBlockOffset(i));
111      out.writeInt(blockIndexChunk.getOnDiskDataSize(i));
112      Bytes.writeByteArray(out, blockIndexChunk.getBlockKey(i));
113    }
114  }
115
116  @Override
117  public IndexBlockEncoding getIndexBlockEncoding() {
118    return IndexBlockEncoding.NONE;
119  }
120
121  @Override
122  public EncodedSeeker createSeeker() {
123    return new NoOpEncodedSeeker();
124  }
125
126  @Override
127  public String toString() {
128    return getClass().getSimpleName();
129  }
130
131  protected static class NoOpEncodedSeeker implements EncodedSeeker {
132
133    protected long[] blockOffsets;
134    protected int[] blockDataSizes;
135    protected int rootCount = 0;
136
137    // Mid-key metadata.
138    protected long midLeafBlockOffset = -1;
139    protected int midLeafBlockOnDiskSize = -1;
140    protected int midKeyEntry = -1;
141
142    private ExtendedCell[] blockKeys;
143    private CellComparator comparator;
144    protected int searchTreeLevel;
145
146    /** Pre-computed mid-key */
147    private AtomicReference<ExtendedCell> midKey = new AtomicReference<>();
148
149    @Override
150    public long heapSize() {
151      long heapSize = ClassSize.align(ClassSize.OBJECT);
152
153      // Mid-key metadata.
154      heapSize += MID_KEY_METADATA_SIZE;
155
156      if (blockOffsets != null) {
157        heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length * Bytes.SIZEOF_LONG);
158      }
159
160      if (blockDataSizes != null) {
161        heapSize += ClassSize.align(ClassSize.ARRAY + blockDataSizes.length * Bytes.SIZEOF_INT);
162      }
163
164      if (blockKeys != null) {
165        heapSize += ClassSize.REFERENCE;
166        // Adding array + references overhead
167        heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length * ClassSize.REFERENCE);
168
169        // Adding blockKeys
170        for (Cell key : blockKeys) {
171          heapSize += ClassSize.align(key.heapSize());
172        }
173      }
174      // Add comparator and the midkey atomicreference
175      heapSize += 2 * ClassSize.REFERENCE;
176      // Add rootCount and searchTreeLevel
177      heapSize += 2 * Bytes.SIZEOF_INT;
178
179      return ClassSize.align(heapSize);
180    }
181
182    @Override
183    public boolean isEmpty() {
184      return blockKeys.length == 0;
185    }
186
187    @Override
188    public ExtendedCell getRootBlockKey(int i) {
189      return blockKeys[i];
190    }
191
192    @Override
193    public int getRootBlockCount() {
194      return rootCount;
195    }
196
197    @Override
198    public void initRootIndex(HFileBlock blk, int numEntries, CellComparator comparator,
199      int treeLevel) throws IOException {
200      this.comparator = comparator;
201      this.searchTreeLevel = treeLevel;
202      init(blk, numEntries);
203    }
204
205    private void init(HFileBlock blk, int numEntries) throws IOException {
206      DataInputStream in = readRootIndex(blk, numEntries);
207      // HFileBlock.getByteStream() returns a byte stream for reading the data(excluding checksum)
208      // of root index block, so after reading the root index there is no need to subtract the
209      // checksum bytes.
210      if (in.available() < MID_KEY_METADATA_SIZE) {
211        // No mid-key metadata available.
212        return;
213      }
214      midLeafBlockOffset = in.readLong();
215      midLeafBlockOnDiskSize = in.readInt();
216      midKeyEntry = in.readInt();
217    }
218
219    private DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException {
220      DataInputStream in = blk.getByteStream();
221      readRootIndex(in, numEntries);
222      return in;
223    }
224
225    private void readRootIndex(DataInput in, final int numEntries) throws IOException {
226      blockOffsets = new long[numEntries];
227      initialize(numEntries);
228      blockDataSizes = new int[numEntries];
229
230      // If index size is zero, no index was written.
231      if (numEntries > 0) {
232        for (int i = 0; i < numEntries; ++i) {
233          long offset = in.readLong();
234          int dataSize = in.readInt();
235          byte[] key = Bytes.readByteArray(in);
236          add(key, offset, dataSize);
237        }
238      }
239    }
240
241    private void initialize(int numEntries) {
242      blockKeys = new ExtendedCell[numEntries];
243    }
244
245    private void add(final byte[] key, final long offset, final int dataSize) {
246      blockOffsets[rootCount] = offset;
247      // Create the blockKeys as Cells once when the reader is opened
248      blockKeys[rootCount] = new KeyValue.KeyOnlyKeyValue(key, 0, key.length);
249      blockDataSizes[rootCount] = dataSize;
250      rootCount++;
251    }
252
253    @Override
254    public ExtendedCell midkey(HFile.CachingBlockReader cachingBlockReader) throws IOException {
255      if (rootCount == 0) {
256        throw new IOException("HFile empty");
257      }
258
259      ExtendedCell targetMidKey = this.midKey.get();
260      if (targetMidKey != null) {
261        return targetMidKey;
262      }
263
264      if (midLeafBlockOffset >= 0) {
265        if (cachingBlockReader == null) {
266          throw new IOException(
267            "Have to read the middle leaf block but " + "no block reader available");
268        }
269
270        // Caching, using pread, assuming this is not a compaction.
271        HFileBlock midLeafBlock = cachingBlockReader.readBlock(midLeafBlockOffset,
272          midLeafBlockOnDiskSize, true, true, false, true, BlockType.LEAF_INDEX, null);
273        try {
274          byte[] bytes = HFileBlockIndex.BlockIndexReader
275            .getNonRootIndexedKey(midLeafBlock.getBufferWithoutHeader(), midKeyEntry);
276          assert bytes != null;
277          targetMidKey = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length);
278        } finally {
279          midLeafBlock.release();
280        }
281      } else {
282        // The middle of the root-level index.
283        targetMidKey = blockKeys[rootCount / 2];
284      }
285
286      this.midKey.set(targetMidKey);
287      return targetMidKey;
288    }
289
290    @Override
291    public BlockWithScanInfo loadDataBlockWithScanInfo(ExtendedCell key, HFileBlock currentBlock,
292      boolean cacheBlocks, boolean pread, boolean isCompaction,
293      DataBlockEncoding expectedDataBlockEncoding, HFile.CachingBlockReader cachingBlockReader)
294      throws IOException {
295      int rootLevelIndex = rootBlockContainingKey(key);
296      if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {
297        return null;
298      }
299
300      // the next indexed key
301      ExtendedCell nextIndexedKey = null;
302
303      // Read the next-level (intermediate or leaf) index block.
304      long currentOffset = blockOffsets[rootLevelIndex];
305      int currentOnDiskSize = blockDataSizes[rootLevelIndex];
306
307      if (rootLevelIndex < blockKeys.length - 1) {
308        nextIndexedKey = blockKeys[rootLevelIndex + 1];
309      } else {
310        nextIndexedKey = KeyValueScanner.NO_NEXT_INDEXED_KEY;
311      }
312
313      int lookupLevel = 1; // How many levels deep we are in our lookup.
314      int index = -1;
315
316      HFileBlock block = null;
317      KeyValue.KeyOnlyKeyValue tmpNextIndexKV = new KeyValue.KeyOnlyKeyValue();
318      while (true) {
319        try {
320          // Must initialize it with null here, because if don't and once an exception happen in
321          // readBlock, then we'll release the previous assigned block twice in the finally block.
322          // (See HBASE-22422)
323          block = null;
324          if (currentBlock != null && currentBlock.getOffset() == currentOffset) {
325            // Avoid reading the same block again, even with caching turned off.
326            // This is crucial for compaction-type workload which might have
327            // caching turned off. This is like a one-block cache inside the
328            // scanner.
329            block = currentBlock;
330          } else {
331            // Call HFile's caching block reader API. We always cache index
332            // blocks, otherwise we might get terrible performance.
333            boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel);
334            BlockType expectedBlockType;
335            if (lookupLevel < searchTreeLevel - 1) {
336              expectedBlockType = BlockType.INTERMEDIATE_INDEX;
337            } else if (lookupLevel == searchTreeLevel - 1) {
338              expectedBlockType = BlockType.LEAF_INDEX;
339            } else {
340              // this also accounts for ENCODED_DATA
341              expectedBlockType = BlockType.DATA;
342            }
343            block = cachingBlockReader.readBlock(currentOffset, currentOnDiskSize, shouldCache,
344              pread, isCompaction, true, expectedBlockType, expectedDataBlockEncoding);
345          }
346
347          if (block == null) {
348            throw new IOException("Failed to read block at offset " + currentOffset
349              + ", onDiskSize=" + currentOnDiskSize);
350          }
351
352          // Found a data block, break the loop and check our level in the tree.
353          if (block.getBlockType().isData()) {
354            break;
355          }
356
357          // Not a data block. This must be a leaf-level or intermediate-level
358          // index block. We don't allow going deeper than searchTreeLevel.
359          if (++lookupLevel > searchTreeLevel) {
360            throw new IOException("Search Tree Level overflow: lookupLevel=" + lookupLevel
361              + ", searchTreeLevel=" + searchTreeLevel);
362          }
363
364          // Locate the entry corresponding to the given key in the non-root
365          // (leaf or intermediate-level) index block.
366          ByteBuff buffer = block.getBufferWithoutHeader();
367          index = HFileBlockIndex.BlockIndexReader.locateNonRootIndexEntry(buffer, key, comparator);
368          if (index == -1) {
369            // This has to be changed
370            // For now change this to key value
371            throw new IOException("The key " + CellUtil.getCellKeyAsString(key) + " is before the"
372              + " first key of the non-root index block " + block);
373          }
374
375          currentOffset = buffer.getLong();
376          currentOnDiskSize = buffer.getInt();
377
378          // Only update next indexed key if there is a next indexed key in the current level
379          byte[] nonRootIndexedKey =
380            HFileBlockIndex.BlockIndexReader.getNonRootIndexedKey(buffer, index + 1);
381          if (nonRootIndexedKey != null) {
382            tmpNextIndexKV.setKey(nonRootIndexedKey, 0, nonRootIndexedKey.length);
383            nextIndexedKey = tmpNextIndexKV;
384          }
385        } finally {
386          if (block != null && !block.getBlockType().isData()) {
387            // Release the block immediately if it is not the data block
388            block.release();
389          }
390        }
391      }
392
393      if (lookupLevel != searchTreeLevel) {
394        assert block.getBlockType().isData();
395        // Though we have retrieved a data block we have found an issue
396        // in the retrieved data block. Hence returned the block so that
397        // the ref count can be decremented
398        if (block != null) {
399          block.release();
400        }
401        throw new IOException("Reached a data block at level " + lookupLevel
402          + " but the number of levels is " + searchTreeLevel);
403      }
404
405      // set the next indexed key for the current block.
406      return new BlockWithScanInfo(block, nextIndexedKey);
407    }
408
409    @Override
410    public int rootBlockContainingKey(Cell key) {
411      // Here the comparator should not be null as this happens for the root-level block
412      int pos = Bytes.binarySearch(blockKeys, key, comparator);
413      // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see
414      // binarySearch's javadoc.
415
416      if (pos >= 0) {
417        // This means this is an exact match with an element of blockKeys.
418        assert pos < blockKeys.length;
419        return pos;
420      }
421
422      // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],
423      // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that
424      // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if
425      // key < blockKeys[0], meaning the file does not contain the given key.
426
427      int i = -pos - 1;
428      assert 0 <= i && i <= blockKeys.length;
429      return i - 1;
430    }
431
432    @Override
433    public String toString() {
434      StringBuilder sb = new StringBuilder();
435      sb.append("size=" + rootCount).append("\n");
436      for (int i = 0; i < rootCount; i++) {
437        sb.append("key=").append((blockKeys[i])).append("\n  offset=").append(blockOffsets[i])
438          .append(", dataSize=" + blockDataSizes[i]).append("\n");
439      }
440      return sb.toString();
441    }
442  }
443}