001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile.bucket; 019 020import java.io.IOException; 021import java.nio.ByteBuffer; 022import org.apache.hadoop.hbase.io.hfile.Cacheable; 023import org.apache.hadoop.hbase.nio.ByteBuff; 024import org.apache.hadoop.hbase.util.ByteBufferAllocator; 025import org.apache.hadoop.hbase.util.ByteBufferArray; 026import org.apache.yetus.audience.InterfaceAudience; 027 028/** 029 * IO engine that stores data in memory using an array of ByteBuffers {@link ByteBufferArray}. 030 * <p> 031 * <h2>How it Works</h2> First, see {@link ByteBufferArray} and how it gives a view across multiple 032 * ByteBuffers managed by it internally. This class does the physical BB create and the write and 033 * read to the underlying BBs. So we will create N BBs based on the total BC capacity specified on 034 * create of the ByteBufferArray. So say we have 10 GB of off heap BucketCache, we will create 2560 035 * such BBs inside our ByteBufferArray. <br> 036 * <p> 037 * Now the way BucketCache works is that the entire 10 GB is split into diff sized buckets: by 038 * default from 5 KB to 513 KB. Within each bucket of a particular size, there are usually more than 039 * one bucket 'block'. The way it is calculate in bucketcache is that the total bucketcache size is 040 * divided by 4 (hard-coded currently) * max size option. So using defaults, buckets will be is 4 * 041 * 513kb (the biggest default value) = 2052kb. A bucket of 2052kb at offset zero will serve out 042 * bucket 'blocks' of 5kb, the next bucket will do the next size up and so on up to the maximum 043 * (default) of 513kb). <br> 044 * <p> 045 * When we write blocks to the bucketcache, we will see which bucket size group it best fits. So a 4 046 * KB block size goes to the 5 KB size group. Each of the block writes, writes within its 047 * appropriate bucket. Though the bucket is '4kb' in size, it will occupy one of the 5 KB bucket 048 * 'blocks' (even if actual size of the bucket is less). Bucket 'blocks' will not span buckets. <br> 049 * <p> 050 * But you can see the physical memory under the bucket 'blocks' can be split across the underlying 051 * backing BBs from ByteBufferArray. All is split into 4 MB sized BBs. <br> 052 * <p> 053 * Each Bucket knows its offset in the entire space of BC and when block is written the offset 054 * arrives at ByteBufferArray and it figures which BB to write to. It may so happen that the entire 055 * block to be written does not fit a particular backing ByteBufferArray so the remainder goes to 056 * another BB. See {@link ByteBufferArray#write(long, ByteBuff)}. <br> 057 * So said all these, when we read a block it may be possible that the bytes of that blocks is 058 * physically placed in 2 adjucent BBs. In such case also, we avoid any copy need by having the 059 * MBB... 060 */ 061@InterfaceAudience.Private 062public class ByteBufferIOEngine implements IOEngine { 063 private ByteBufferArray bufferArray; 064 private final long capacity; 065 066 /** 067 * Construct the ByteBufferIOEngine with the given capacity n * @throws IOException ideally here 068 * no exception to be thrown from the allocator 069 */ 070 public ByteBufferIOEngine(long capacity) throws IOException { 071 this.capacity = capacity; 072 ByteBufferAllocator allocator = (size) -> ByteBuffer.allocateDirect((int) size); 073 bufferArray = new ByteBufferArray(capacity, allocator); 074 } 075 076 @Override 077 public String toString() { 078 return "ioengine=" + this.getClass().getSimpleName() + ", capacity=" 079 + String.format("%,d", this.capacity); 080 } 081 082 /** 083 * Memory IO engine is always unable to support persistent storage for the cache n 084 */ 085 @Override 086 public boolean isPersistent() { 087 return false; 088 } 089 090 @Override 091 public boolean usesSharedMemory() { 092 return true; 093 } 094 095 @Override 096 public Cacheable read(BucketEntry be) throws IOException { 097 ByteBuffer[] buffers = bufferArray.asSubByteBuffers(be.offset(), be.getLength()); 098 // Here the buffer that is created directly refers to the buffer in the actual buckets. 099 // When any cell is referring to the blocks created out of these buckets then it means that 100 // those cells are referring to a shared memory area which if evicted by the BucketCache would 101 // lead to corruption of results. The readers using this block are aware of this fact and do the 102 // necessary action to prevent eviction till the results are either consumed or copied 103 return be.wrapAsCacheable(buffers); 104 } 105 106 /** 107 * Transfers data from the given {@link ByteBuffer} to the buffer array. Position of source will 108 * be advanced by the {@link ByteBuffer#remaining()}. 109 * @param src the given byte buffer from which bytes are to be read. 110 * @param offset The offset in the ByteBufferArray of the first byte to be written 111 * @throws IOException throws IOException if writing to the array throws exception 112 */ 113 @Override 114 public void write(ByteBuffer src, long offset) throws IOException { 115 bufferArray.write(offset, ByteBuff.wrap(src)); 116 } 117 118 /** 119 * Transfers data from the given {@link ByteBuff} to the buffer array. Position of source will be 120 * advanced by the {@link ByteBuffer#remaining()}. 121 * @param src the given byte buffer from which bytes are to be read. 122 * @param offset The offset in the ByteBufferArray of the first byte to be written 123 * @throws IOException throws IOException if writing to the array throws exception 124 */ 125 @Override 126 public void write(ByteBuff src, long offset) throws IOException { 127 bufferArray.write(offset, src); 128 } 129 130 /** 131 * No operation for the sync in the memory IO engine 132 */ 133 @Override 134 public void sync() { 135 // Nothing to do. 136 } 137 138 /** 139 * No operation for the shutdown in the memory IO engine 140 */ 141 @Override 142 public void shutdown() { 143 // Nothing to do. 144 } 145}