001/**
002 * Copyright The Apache Software Foundation
003 *
004 * Licensed to the Apache Software Foundation (ASF) under one or more
005 * contributor license agreements. See the NOTICE file distributed with this
006 * work for additional information regarding copyright ownership. The ASF
007 * licenses this file to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 * http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
015 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
016 * License for the specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.hadoop.hbase.io.hfile.bucket;
020
021import java.io.IOException;
022import java.nio.ByteBuffer;
023
024import org.apache.yetus.audience.InterfaceAudience;
025import org.apache.hadoop.hbase.io.hfile.Cacheable;
026import org.apache.hadoop.hbase.nio.ByteBuff;
027import org.apache.hadoop.hbase.util.ByteBufferAllocator;
028import org.apache.hadoop.hbase.util.ByteBufferArray;
029
030/**
031 * IO engine that stores data in memory using an array of ByteBuffers {@link ByteBufferArray}.
032 * <p>
033 * <h2>How it Works</h2> First, see {@link ByteBufferArray} and how it gives a view across multiple
034 * ByteBuffers managed by it internally. This class does the physical BB create and the write and
035 * read to the underlying BBs. So we will create N BBs based on the total BC capacity specified on
036 * create of the ByteBufferArray. So say we have 10 GB of off heap BucketCache, we will create 2560
037 * such BBs inside our ByteBufferArray. <br>
038 * <p>
039 * Now the way BucketCache works is that the entire 10 GB is split into diff sized buckets: by
040 * default from 5 KB to 513 KB. Within each bucket of a particular size, there are usually more than
041 * one bucket 'block'. The way it is calculate in bucketcache is that the total bucketcache size is
042 * divided by 4 (hard-coded currently) * max size option. So using defaults, buckets will be is 4 *
043 * 513kb (the biggest default value) = 2052kb. A bucket of 2052kb at offset zero will serve out
044 * bucket 'blocks' of 5kb, the next bucket will do the next size up and so on up to the maximum
045 * (default) of 513kb). <br>
046 * <p>
047 * When we write blocks to the bucketcache, we will see which bucket size group it best fits. So a 4
048 * KB block size goes to the 5 KB size group. Each of the block writes, writes within its
049 * appropriate bucket. Though the bucket is '4kb' in size, it will occupy one of the 5 KB bucket
050 * 'blocks' (even if actual size of the bucket is less). Bucket 'blocks' will not span buckets. <br>
051 * <p>
052 * But you can see the physical memory under the bucket 'blocks' can be split across the underlying
053 * backing BBs from ByteBufferArray. All is split into 4 MB sized BBs. <br>
054 * <p>
055 * Each Bucket knows its offset in the entire space of BC and when block is written the offset
056 * arrives at ByteBufferArray and it figures which BB to write to. It may so happen that the entire
057 * block to be written does not fit a particular backing ByteBufferArray so the remainder goes to
058 * another BB. See {@link ByteBufferArray#write(long, ByteBuff)}. <br>
059 * So said all these, when we read a block it may be possible that the bytes of that blocks is
060 * physically placed in 2 adjucent BBs. In such case also, we avoid any copy need by having the
061 * MBB...
062 */
063@InterfaceAudience.Private
064public class ByteBufferIOEngine implements IOEngine {
065  private ByteBufferArray bufferArray;
066  private final long capacity;
067
068  /**
069   * Construct the ByteBufferIOEngine with the given capacity
070   * @param capacity
071   * @throws IOException ideally here no exception to be thrown from the allocator
072   */
073  public ByteBufferIOEngine(long capacity) throws IOException {
074    this.capacity = capacity;
075    ByteBufferAllocator allocator = (size) -> ByteBuffer.allocateDirect((int) size);
076    bufferArray = new ByteBufferArray(capacity, allocator);
077  }
078
079  @Override
080  public String toString() {
081    return "ioengine=" + this.getClass().getSimpleName() + ", capacity=" +
082      String.format("%,d", this.capacity);
083  }
084
085  /**
086   * Memory IO engine is always unable to support persistent storage for the
087   * cache
088   * @return false
089   */
090  @Override
091  public boolean isPersistent() {
092    return false;
093  }
094
095  @Override
096  public boolean usesSharedMemory() {
097    return true;
098  }
099
100  @Override
101  public Cacheable read(BucketEntry be) throws IOException {
102    ByteBuffer[] buffers = bufferArray.asSubByteBuffers(be.offset(), be.getLength());
103    // Here the buffer that is created directly refers to the buffer in the actual buckets.
104    // When any cell is referring to the blocks created out of these buckets then it means that
105    // those cells are referring to a shared memory area which if evicted by the BucketCache would
106    // lead to corruption of results. The readers using this block are aware of this fact and do the
107    // necessary action to prevent eviction till the results are either consumed or copied
108    return be.wrapAsCacheable(buffers);
109  }
110
111  /**
112   * Transfers data from the given {@link ByteBuffer} to the buffer array. Position of source will
113   * be advanced by the {@link ByteBuffer#remaining()}.
114   * @param src the given byte buffer from which bytes are to be read.
115   * @param offset The offset in the ByteBufferArray of the first byte to be written
116   * @throws IOException throws IOException if writing to the array throws exception
117   */
118  @Override
119  public void write(ByteBuffer src, long offset) throws IOException {
120    bufferArray.write(offset, ByteBuff.wrap(src));
121  }
122
123  /**
124   * Transfers data from the given {@link ByteBuff} to the buffer array. Position of source will be
125   * advanced by the {@link ByteBuffer#remaining()}.
126   * @param src the given byte buffer from which bytes are to be read.
127   * @param offset The offset in the ByteBufferArray of the first byte to be written
128   * @throws IOException throws IOException if writing to the array throws exception
129   */
130  @Override
131  public void write(ByteBuff src, long offset) throws IOException {
132    bufferArray.write(offset, src);
133  }
134
135  /**
136   * No operation for the sync in the memory IO engine
137   */
138  @Override
139  public void sync() {
140    // Nothing to do.
141  }
142
143  /**
144   * No operation for the shutdown in the memory IO engine
145   */
146  @Override
147  public void shutdown() {
148    // Nothing to do.
149  }
150}