001/**
002 * Copyright The Apache Software Foundation
003 *
004 * Licensed to the Apache Software Foundation (ASF) under one or more
005 * contributor license agreements. See the NOTICE file distributed with this
006 * work for additional information regarding copyright ownership. The ASF
007 * licenses this file to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 * http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
015 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
016 * License for the specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.hadoop.hbase.io.hfile.bucket;
020
021import java.io.IOException;
022import java.nio.ByteBuffer;
023
024import org.apache.yetus.audience.InterfaceAudience;
025import org.apache.hadoop.hbase.io.hfile.Cacheable;
026import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
027import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
028import org.apache.hadoop.hbase.nio.ByteBuff;
029import org.apache.hadoop.hbase.util.ByteBufferAllocator;
030import org.apache.hadoop.hbase.util.ByteBufferArray;
031
032/**
033 * IO engine that stores data in memory using an array of ByteBuffers
034 * {@link ByteBufferArray}.
035 *
036 *<h2>How it Works</h2>
037 * First, see {@link ByteBufferArray} and how it gives a view across multiple ByteBuffers managed
038 * by it internally. This class does the physical BB create and the write and read to the
039 * underlying BBs. So we will create N BBs based on the total BC capacity specified on create
040 * of the ByteBufferArray. So say we have 10 GB of off heap BucketCache, we will create 2560 such
041 * BBs inside our ByteBufferArray.
042 * 
043 * <p>Now the way BucketCache works is that the entire 10 GB is split into diff sized buckets: by
044 * default from 5 KB to 513 KB. Within each bucket of a particular size, there are
045 * usually more than one bucket 'block'. The way it is calculate in bucketcache is that the total
046 * bucketcache size is divided by 4 (hard-coded currently) * max size option. So using defaults,
047 * buckets will be is 4 * 513kb (the biggest default value) = 2052kb. A bucket of 2052kb at offset
048 * zero will serve out bucket 'blocks' of 5kb, the next bucket will do the next size up and so on
049 * up to the maximum (default) of 513kb).
050 * 
051 * <p>When we write blocks to the bucketcache, we will see which bucket size group it best fits.
052 * So a 4 KB block size goes to the 5 KB size group. Each of the block writes, writes within its
053 * appropriate bucket. Though the bucket is '4kb' in size, it will occupy one of the 
054 * 5 KB bucket 'blocks' (even if actual size of the bucket is less). Bucket 'blocks' will not span
055 * buckets.
056 * 
057 * <p>But you can see the physical memory under the bucket 'blocks' can be split across the
058 * underlying backing BBs from ByteBufferArray. All is split into 4 MB sized BBs.
059 * 
060 * <p>Each Bucket knows its offset in the entire space of BC and when block is written the offset
061 * arrives at ByteBufferArray and it figures which BB to write to. It may so happen that the entire
062 * block to be written does not fit a particular backing ByteBufferArray so the remainder goes to
063 * another BB. See {@link ByteBufferArray#putMultiple(long, int, byte[])}.
064
065So said all these, when we read a block it may be possible that the bytes of that blocks is physically placed in 2 adjucent BBs.  In such case also, we avoid any copy need by having the MBB...
066 */
067@InterfaceAudience.Private
068public class ByteBufferIOEngine implements IOEngine {
069  private ByteBufferArray bufferArray;
070  private final long capacity;
071
072  /**
073   * Construct the ByteBufferIOEngine with the given capacity
074   * @param capacity
075   * @throws IOException ideally here no exception to be thrown from the allocator
076   */
077  public ByteBufferIOEngine(long capacity)
078      throws IOException {
079    this.capacity = capacity;
080    ByteBufferAllocator allocator = new ByteBufferAllocator() {
081      @Override
082      public ByteBuffer allocate(long size) throws IOException {
083        return ByteBuffer.allocateDirect((int) size);
084      }
085    };
086    bufferArray = new ByteBufferArray(capacity, allocator);
087  }
088
089  @Override
090  public String toString() {
091    return "ioengine=" + this.getClass().getSimpleName() + ", capacity=" +
092      String.format("%,d", this.capacity);
093  }
094
095  /**
096   * Memory IO engine is always unable to support persistent storage for the
097   * cache
098   * @return false
099   */
100  @Override
101  public boolean isPersistent() {
102    return false;
103  }
104
105  @Override
106  public boolean usesSharedMemory() {
107    return true;
108  }
109
110  @Override
111  public Cacheable read(long offset, int length, CacheableDeserializer<Cacheable> deserializer)
112      throws IOException {
113    ByteBuff dstBuffer = bufferArray.asSubByteBuff(offset, length);
114    // Here the buffer that is created directly refers to the buffer in the actual buckets.
115    // When any cell is referring to the blocks created out of these buckets then it means that
116    // those cells are referring to a shared memory area which if evicted by the BucketCache would
117    // lead to corruption of results. Hence we set the type of the buffer as SHARED_MEMORY
118    // so that the readers using this block are aware of this fact and do the necessary action
119    // to prevent eviction till the results are either consumed or copied
120    return deserializer.deserialize(dstBuffer, true, MemoryType.SHARED);
121  }
122
123  /**
124   * Transfers data from the given byte buffer to the buffer array
125   * @param srcBuffer the given byte buffer from which bytes are to be read
126   * @param offset The offset in the ByteBufferArray of the first byte to be
127   *          written
128   * @throws IOException throws IOException if writing to the array throws exception
129   */
130  @Override
131  public void write(ByteBuffer srcBuffer, long offset) throws IOException {
132    assert srcBuffer.hasArray();
133    bufferArray.putMultiple(offset, srcBuffer.remaining(), srcBuffer.array(),
134        srcBuffer.arrayOffset());
135  }
136
137  @Override
138  public void write(ByteBuff srcBuffer, long offset) throws IOException {
139    // When caching block into BucketCache there will be single buffer backing for this HFileBlock.
140    // This will work for now. But from the DFS itself if we get DBB then this may not hold true.
141    assert srcBuffer.hasArray();
142    bufferArray.putMultiple(offset, srcBuffer.remaining(), srcBuffer.array(),
143        srcBuffer.arrayOffset());
144  }
145  /**
146   * No operation for the sync in the memory IO engine
147   */
148  @Override
149  public void sync() {
150    // Nothing to do.
151  }
152
153  /**
154   * No operation for the shutdown in the memory IO engine
155   */
156  @Override
157  public void shutdown() {
158    // Nothing to do.
159  }
160}