001/** 002 * Copyright The Apache Software Foundation 003 * 004 * Licensed to the Apache Software Foundation (ASF) under one or more 005 * contributor license agreements. See the NOTICE file distributed with this 006 * work for additional information regarding copyright ownership. The ASF 007 * licenses this file to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 015 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 016 * License for the specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.hadoop.hbase.io.hfile.bucket; 020 021import java.io.IOException; 022import java.nio.ByteBuffer; 023 024import org.apache.yetus.audience.InterfaceAudience; 025import org.apache.hadoop.hbase.io.hfile.Cacheable; 026import org.apache.hadoop.hbase.nio.ByteBuff; 027import org.apache.hadoop.hbase.util.ByteBufferAllocator; 028import org.apache.hadoop.hbase.util.ByteBufferArray; 029 030/** 031 * IO engine that stores data in memory using an array of ByteBuffers {@link ByteBufferArray}. 032 * <p> 033 * <h2>How it Works</h2> First, see {@link ByteBufferArray} and how it gives a view across multiple 034 * ByteBuffers managed by it internally. This class does the physical BB create and the write and 035 * read to the underlying BBs. So we will create N BBs based on the total BC capacity specified on 036 * create of the ByteBufferArray. So say we have 10 GB of off heap BucketCache, we will create 2560 037 * such BBs inside our ByteBufferArray. <br> 038 * <p> 039 * Now the way BucketCache works is that the entire 10 GB is split into diff sized buckets: by 040 * default from 5 KB to 513 KB. Within each bucket of a particular size, there are usually more than 041 * one bucket 'block'. The way it is calculate in bucketcache is that the total bucketcache size is 042 * divided by 4 (hard-coded currently) * max size option. So using defaults, buckets will be is 4 * 043 * 513kb (the biggest default value) = 2052kb. A bucket of 2052kb at offset zero will serve out 044 * bucket 'blocks' of 5kb, the next bucket will do the next size up and so on up to the maximum 045 * (default) of 513kb). <br> 046 * <p> 047 * When we write blocks to the bucketcache, we will see which bucket size group it best fits. So a 4 048 * KB block size goes to the 5 KB size group. Each of the block writes, writes within its 049 * appropriate bucket. Though the bucket is '4kb' in size, it will occupy one of the 5 KB bucket 050 * 'blocks' (even if actual size of the bucket is less). Bucket 'blocks' will not span buckets. <br> 051 * <p> 052 * But you can see the physical memory under the bucket 'blocks' can be split across the underlying 053 * backing BBs from ByteBufferArray. All is split into 4 MB sized BBs. <br> 054 * <p> 055 * Each Bucket knows its offset in the entire space of BC and when block is written the offset 056 * arrives at ByteBufferArray and it figures which BB to write to. It may so happen that the entire 057 * block to be written does not fit a particular backing ByteBufferArray so the remainder goes to 058 * another BB. See {@link ByteBufferArray#write(long, ByteBuff)}. <br> 059 * So said all these, when we read a block it may be possible that the bytes of that blocks is 060 * physically placed in 2 adjucent BBs. In such case also, we avoid any copy need by having the 061 * MBB... 062 */ 063@InterfaceAudience.Private 064public class ByteBufferIOEngine implements IOEngine { 065 private ByteBufferArray bufferArray; 066 private final long capacity; 067 068 /** 069 * Construct the ByteBufferIOEngine with the given capacity 070 * @param capacity 071 * @throws IOException ideally here no exception to be thrown from the allocator 072 */ 073 public ByteBufferIOEngine(long capacity) throws IOException { 074 this.capacity = capacity; 075 ByteBufferAllocator allocator = (size) -> ByteBuffer.allocateDirect((int) size); 076 bufferArray = new ByteBufferArray(capacity, allocator); 077 } 078 079 @Override 080 public String toString() { 081 return "ioengine=" + this.getClass().getSimpleName() + ", capacity=" + 082 String.format("%,d", this.capacity); 083 } 084 085 /** 086 * Memory IO engine is always unable to support persistent storage for the 087 * cache 088 * @return false 089 */ 090 @Override 091 public boolean isPersistent() { 092 return false; 093 } 094 095 @Override 096 public boolean usesSharedMemory() { 097 return true; 098 } 099 100 @Override 101 public Cacheable read(BucketEntry be) throws IOException { 102 ByteBuffer[] buffers = bufferArray.asSubByteBuffers(be.offset(), be.getLength()); 103 // Here the buffer that is created directly refers to the buffer in the actual buckets. 104 // When any cell is referring to the blocks created out of these buckets then it means that 105 // those cells are referring to a shared memory area which if evicted by the BucketCache would 106 // lead to corruption of results. The readers using this block are aware of this fact and do the 107 // necessary action to prevent eviction till the results are either consumed or copied 108 return be.wrapAsCacheable(buffers); 109 } 110 111 /** 112 * Transfers data from the given {@link ByteBuffer} to the buffer array. Position of source will 113 * be advanced by the {@link ByteBuffer#remaining()}. 114 * @param src the given byte buffer from which bytes are to be read. 115 * @param offset The offset in the ByteBufferArray of the first byte to be written 116 * @throws IOException throws IOException if writing to the array throws exception 117 */ 118 @Override 119 public void write(ByteBuffer src, long offset) throws IOException { 120 bufferArray.write(offset, ByteBuff.wrap(src)); 121 } 122 123 /** 124 * Transfers data from the given {@link ByteBuff} to the buffer array. Position of source will be 125 * advanced by the {@link ByteBuffer#remaining()}. 126 * @param src the given byte buffer from which bytes are to be read. 127 * @param offset The offset in the ByteBufferArray of the first byte to be written 128 * @throws IOException throws IOException if writing to the array throws exception 129 */ 130 @Override 131 public void write(ByteBuff src, long offset) throws IOException { 132 bufferArray.write(offset, src); 133 } 134 135 /** 136 * No operation for the sync in the memory IO engine 137 */ 138 @Override 139 public void sync() { 140 // Nothing to do. 141 } 142 143 /** 144 * No operation for the shutdown in the memory IO engine 145 */ 146 @Override 147 public void shutdown() { 148 // Nothing to do. 149 } 150}