001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile.bucket;
019
020import java.io.IOException;
021import java.nio.ByteBuffer;
022import org.apache.hadoop.hbase.io.hfile.Cacheable;
023import org.apache.hadoop.hbase.nio.ByteBuff;
024import org.apache.hadoop.hbase.util.ByteBufferAllocator;
025import org.apache.hadoop.hbase.util.ByteBufferArray;
026import org.apache.yetus.audience.InterfaceAudience;
027
028/**
029 * IO engine that stores data in memory using an array of ByteBuffers {@link ByteBufferArray}.
030 * <p>
031 * <h2>How it Works</h2> First, see {@link ByteBufferArray} and how it gives a view across multiple
032 * ByteBuffers managed by it internally. This class does the physical BB create and the write and
033 * read to the underlying BBs. So we will create N BBs based on the total BC capacity specified on
034 * create of the ByteBufferArray. So say we have 10 GB of off heap BucketCache, we will create 2560
035 * such BBs inside our ByteBufferArray. <br>
036 * <p>
037 * Now the way BucketCache works is that the entire 10 GB is split into diff sized buckets: by
038 * default from 5 KB to 513 KB. Within each bucket of a particular size, there are usually more than
039 * one bucket 'block'. The way it is calculate in bucketcache is that the total bucketcache size is
040 * divided by 4 (hard-coded currently) * max size option. So using defaults, buckets will be is 4 *
041 * 513kb (the biggest default value) = 2052kb. A bucket of 2052kb at offset zero will serve out
042 * bucket 'blocks' of 5kb, the next bucket will do the next size up and so on up to the maximum
043 * (default) of 513kb). <br>
044 * <p>
045 * When we write blocks to the bucketcache, we will see which bucket size group it best fits. So a 4
046 * KB block size goes to the 5 KB size group. Each of the block writes, writes within its
047 * appropriate bucket. Though the bucket is '4kb' in size, it will occupy one of the 5 KB bucket
048 * 'blocks' (even if actual size of the bucket is less). Bucket 'blocks' will not span buckets. <br>
049 * <p>
050 * But you can see the physical memory under the bucket 'blocks' can be split across the underlying
051 * backing BBs from ByteBufferArray. All is split into 4 MB sized BBs. <br>
052 * <p>
053 * Each Bucket knows its offset in the entire space of BC and when block is written the offset
054 * arrives at ByteBufferArray and it figures which BB to write to. It may so happen that the entire
055 * block to be written does not fit a particular backing ByteBufferArray so the remainder goes to
056 * another BB. See {@link ByteBufferArray#write(long, ByteBuff)}. <br>
057 * So said all these, when we read a block it may be possible that the bytes of that blocks is
058 * physically placed in 2 adjucent BBs. In such case also, we avoid any copy need by having the
059 * MBB...
060 */
061@InterfaceAudience.Private
062public class ByteBufferIOEngine implements IOEngine {
063  private ByteBufferArray bufferArray;
064  private final long capacity;
065
066  /**
067   * Construct the ByteBufferIOEngine with the given capacity n * @throws IOException ideally here
068   * no exception to be thrown from the allocator
069   */
070  public ByteBufferIOEngine(long capacity) throws IOException {
071    this.capacity = capacity;
072    ByteBufferAllocator allocator = (size) -> ByteBuffer.allocateDirect((int) size);
073    bufferArray = new ByteBufferArray(capacity, allocator);
074  }
075
076  @Override
077  public String toString() {
078    return "ioengine=" + this.getClass().getSimpleName() + ", capacity="
079      + String.format("%,d", this.capacity);
080  }
081
082  /**
083   * Memory IO engine is always unable to support persistent storage for the cache n
084   */
085  @Override
086  public boolean isPersistent() {
087    return false;
088  }
089
090  @Override
091  public boolean usesSharedMemory() {
092    return true;
093  }
094
095  @Override
096  public Cacheable read(BucketEntry be) throws IOException {
097    ByteBuffer[] buffers = bufferArray.asSubByteBuffers(be.offset(), be.getLength());
098    // Here the buffer that is created directly refers to the buffer in the actual buckets.
099    // When any cell is referring to the blocks created out of these buckets then it means that
100    // those cells are referring to a shared memory area which if evicted by the BucketCache would
101    // lead to corruption of results. The readers using this block are aware of this fact and do the
102    // necessary action to prevent eviction till the results are either consumed or copied
103    return be.wrapAsCacheable(buffers);
104  }
105
106  /**
107   * Transfers data from the given {@link ByteBuffer} to the buffer array. Position of source will
108   * be advanced by the {@link ByteBuffer#remaining()}.
109   * @param src    the given byte buffer from which bytes are to be read.
110   * @param offset The offset in the ByteBufferArray of the first byte to be written
111   * @throws IOException throws IOException if writing to the array throws exception
112   */
113  @Override
114  public void write(ByteBuffer src, long offset) throws IOException {
115    bufferArray.write(offset, ByteBuff.wrap(src));
116  }
117
118  /**
119   * Transfers data from the given {@link ByteBuff} to the buffer array. Position of source will be
120   * advanced by the {@link ByteBuffer#remaining()}.
121   * @param src    the given byte buffer from which bytes are to be read.
122   * @param offset The offset in the ByteBufferArray of the first byte to be written
123   * @throws IOException throws IOException if writing to the array throws exception
124   */
125  @Override
126  public void write(ByteBuff src, long offset) throws IOException {
127    bufferArray.write(offset, src);
128  }
129
130  /**
131   * No operation for the sync in the memory IO engine
132   */
133  @Override
134  public void sync() {
135    // Nothing to do.
136  }
137
138  /**
139   * No operation for the shutdown in the memory IO engine
140   */
141  @Override
142  public void shutdown() {
143    // Nothing to do.
144  }
145}