001/** 002 * Copyright The Apache Software Foundation 003 * 004 * Licensed to the Apache Software Foundation (ASF) under one or more 005 * contributor license agreements. See the NOTICE file distributed with this 006 * work for additional information regarding copyright ownership. The ASF 007 * licenses this file to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 015 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 016 * License for the specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.hadoop.hbase.io.hfile.bucket; 020 021import java.io.IOException; 022import java.nio.ByteBuffer; 023 024import org.apache.yetus.audience.InterfaceAudience; 025import org.apache.hadoop.hbase.io.hfile.Cacheable; 026import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer; 027import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType; 028import org.apache.hadoop.hbase.nio.ByteBuff; 029import org.apache.hadoop.hbase.util.ByteBufferAllocator; 030import org.apache.hadoop.hbase.util.ByteBufferArray; 031 032/** 033 * IO engine that stores data in memory using an array of ByteBuffers 034 * {@link ByteBufferArray}. 035 * 036 *<h2>How it Works</h2> 037 * First, see {@link ByteBufferArray} and how it gives a view across multiple ByteBuffers managed 038 * by it internally. This class does the physical BB create and the write and read to the 039 * underlying BBs. So we will create N BBs based on the total BC capacity specified on create 040 * of the ByteBufferArray. So say we have 10 GB of off heap BucketCache, we will create 2560 such 041 * BBs inside our ByteBufferArray. 042 * 043 * <p>Now the way BucketCache works is that the entire 10 GB is split into diff sized buckets: by 044 * default from 5 KB to 513 KB. Within each bucket of a particular size, there are 045 * usually more than one bucket 'block'. The way it is calculate in bucketcache is that the total 046 * bucketcache size is divided by 4 (hard-coded currently) * max size option. So using defaults, 047 * buckets will be is 4 * 513kb (the biggest default value) = 2052kb. A bucket of 2052kb at offset 048 * zero will serve out bucket 'blocks' of 5kb, the next bucket will do the next size up and so on 049 * up to the maximum (default) of 513kb). 050 * 051 * <p>When we write blocks to the bucketcache, we will see which bucket size group it best fits. 052 * So a 4 KB block size goes to the 5 KB size group. Each of the block writes, writes within its 053 * appropriate bucket. Though the bucket is '4kb' in size, it will occupy one of the 054 * 5 KB bucket 'blocks' (even if actual size of the bucket is less). Bucket 'blocks' will not span 055 * buckets. 056 * 057 * <p>But you can see the physical memory under the bucket 'blocks' can be split across the 058 * underlying backing BBs from ByteBufferArray. All is split into 4 MB sized BBs. 059 * 060 * <p>Each Bucket knows its offset in the entire space of BC and when block is written the offset 061 * arrives at ByteBufferArray and it figures which BB to write to. It may so happen that the entire 062 * block to be written does not fit a particular backing ByteBufferArray so the remainder goes to 063 * another BB. See {@link ByteBufferArray#putMultiple(long, int, byte[])}. 064 065So said all these, when we read a block it may be possible that the bytes of that blocks is physically placed in 2 adjucent BBs. In such case also, we avoid any copy need by having the MBB... 066 */ 067@InterfaceAudience.Private 068public class ByteBufferIOEngine implements IOEngine { 069 private ByteBufferArray bufferArray; 070 private final long capacity; 071 072 /** 073 * Construct the ByteBufferIOEngine with the given capacity 074 * @param capacity 075 * @throws IOException ideally here no exception to be thrown from the allocator 076 */ 077 public ByteBufferIOEngine(long capacity) 078 throws IOException { 079 this.capacity = capacity; 080 ByteBufferAllocator allocator = new ByteBufferAllocator() { 081 @Override 082 public ByteBuffer allocate(long size) throws IOException { 083 return ByteBuffer.allocateDirect((int) size); 084 } 085 }; 086 bufferArray = new ByteBufferArray(capacity, allocator); 087 } 088 089 @Override 090 public String toString() { 091 return "ioengine=" + this.getClass().getSimpleName() + ", capacity=" + 092 String.format("%,d", this.capacity); 093 } 094 095 /** 096 * Memory IO engine is always unable to support persistent storage for the 097 * cache 098 * @return false 099 */ 100 @Override 101 public boolean isPersistent() { 102 return false; 103 } 104 105 @Override 106 public boolean usesSharedMemory() { 107 return true; 108 } 109 110 @Override 111 public Cacheable read(long offset, int length, CacheableDeserializer<Cacheable> deserializer) 112 throws IOException { 113 ByteBuff dstBuffer = bufferArray.asSubByteBuff(offset, length); 114 // Here the buffer that is created directly refers to the buffer in the actual buckets. 115 // When any cell is referring to the blocks created out of these buckets then it means that 116 // those cells are referring to a shared memory area which if evicted by the BucketCache would 117 // lead to corruption of results. Hence we set the type of the buffer as SHARED_MEMORY 118 // so that the readers using this block are aware of this fact and do the necessary action 119 // to prevent eviction till the results are either consumed or copied 120 return deserializer.deserialize(dstBuffer, true, MemoryType.SHARED); 121 } 122 123 /** 124 * Transfers data from the given byte buffer to the buffer array 125 * @param srcBuffer the given byte buffer from which bytes are to be read 126 * @param offset The offset in the ByteBufferArray of the first byte to be 127 * written 128 * @throws IOException throws IOException if writing to the array throws exception 129 */ 130 @Override 131 public void write(ByteBuffer srcBuffer, long offset) throws IOException { 132 assert srcBuffer.hasArray(); 133 bufferArray.putMultiple(offset, srcBuffer.remaining(), srcBuffer.array(), 134 srcBuffer.arrayOffset()); 135 } 136 137 @Override 138 public void write(ByteBuff srcBuffer, long offset) throws IOException { 139 // When caching block into BucketCache there will be single buffer backing for this HFileBlock. 140 // This will work for now. But from the DFS itself if we get DBB then this may not hold true. 141 assert srcBuffer.hasArray(); 142 bufferArray.putMultiple(offset, srcBuffer.remaining(), srcBuffer.array(), 143 srcBuffer.arrayOffset()); 144 } 145 /** 146 * No operation for the sync in the memory IO engine 147 */ 148 @Override 149 public void sync() { 150 // Nothing to do. 151 } 152 153 /** 154 * No operation for the shutdown in the memory IO engine 155 */ 156 @Override 157 public void shutdown() { 158 // Nothing to do. 159 } 160}