View Javadoc

1   /**
2    * Copyright The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements. See the NOTICE file distributed with this
6    * work for additional information regarding copyright ownership. The ASF
7    * licenses this file to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance with the License.
9    * You may obtain a copy of the License at
10   *
11   * http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
16   * License for the specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile.bucket;
20  
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  
24  import org.apache.hadoop.hbase.classification.InterfaceAudience;
25  import org.apache.hadoop.hbase.io.hfile.Cacheable;
26  import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
27  import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
28  import org.apache.hadoop.hbase.nio.ByteBuff;
29  import org.apache.hadoop.hbase.util.ByteBufferAllocator;
30  import org.apache.hadoop.hbase.util.ByteBufferArray;
31  
32  /**
33   * IO engine that stores data in memory using an array of ByteBuffers
34   * {@link ByteBufferArray}.
35   *
36   *<h2>How it Works</h2>
37   * First, see {@link ByteBufferArray} and how it gives a view across multiple ByteBuffers managed
38   * by it internally. This class does the physical BB create and the write and read to the
39   * underlying BBs. So we will create N BBs based on the total BC capacity specified on create
40   * of the ByteBufferArray. So say we have 10 GB of off heap BucketCache, we will create 2560 such
41   * BBs inside our ByteBufferArray.
42   * 
43   * <p>Now the way BucketCache works is that the entire 10 GB is split into diff sized buckets: by
44   * default from 5 KB to 513 KB. Within each bucket of a particular size, there are
45   * usually more than one bucket 'block'. The way it is calculate in bucketcache is that the total
46   * bucketcache size is divided by 4 (hard-coded currently) * max size option. So using defaults,
47   * buckets will be is 4 * 513kb (the biggest default value) = 2052kb. A bucket of 2052kb at offset
48   * zero will serve out bucket 'blocks' of 5kb, the next bucket will do the next size up and so on
49   * up to the maximum (default) of 513kb).
50   * 
51   * <p>When we write blocks to the bucketcache, we will see which bucket size group it best fits.
52   * So a 4 KB block size goes to the 5 KB size group. Each of the block writes, writes within its
53   * appropriate bucket. Though the bucket is '4kb' in size, it will occupy one of the 
54   * 5 KB bucket 'blocks' (even if actual size of the bucket is less). Bucket 'blocks' will not span
55   * buckets.
56   * 
57   * <p>But you can see the physical memory under the bucket 'blocks' can be split across the
58   * underlying backing BBs from ByteBufferArray. All is split into 4 MB sized BBs.
59   * 
60   * <p>Each Bucket knows its offset in the entire space of BC and when block is written the offset
61   * arrives at ByteBufferArray and it figures which BB to write to. It may so happen that the entire
62   * block to be written does not fit a particular backing ByteBufferArray so the remainder goes to
63   * another BB. See {@link ByteBufferArray#putMultiple(long, int, byte[])}.
64  
65  So said all these, when we read a block it may be possible that the bytes of that blocks is physically placed in 2 adjucent BBs.  In such case also, we avoid any copy need by having the MBB...
66   */
67  @InterfaceAudience.Private
68  public class ByteBufferIOEngine implements IOEngine {
69    private ByteBufferArray bufferArray;
70    private final long capacity;
71    private final boolean direct;
72  
73    /**
74     * Construct the ByteBufferIOEngine with the given capacity
75     * @param capacity
76     * @param direct true if allocate direct buffer
77     * @throws IOException ideally here no exception to be thrown from the allocator
78     */
79    public ByteBufferIOEngine(long capacity, boolean direct)
80        throws IOException {
81      this.capacity = capacity;
82      this.direct = direct;
83      ByteBufferAllocator allocator = new ByteBufferAllocator() {
84        @Override
85        public ByteBuffer allocate(long size, boolean directByteBuffer)
86            throws IOException {
87          if (directByteBuffer) {
88            return ByteBuffer.allocateDirect((int) size);
89          } else {
90            return ByteBuffer.allocate((int) size);
91          }
92        }
93      };
94      bufferArray = new ByteBufferArray(capacity, direct, allocator);
95    }
96  
97    @Override
98    public String toString() {
99      return "ioengine=" + this.getClass().getSimpleName() + ", capacity=" +
100       String.format("%,d", this.capacity) + ", direct=" + this.direct;
101   }
102 
103   /**
104    * Memory IO engine is always unable to support persistent storage for the
105    * cache
106    * @return false
107    */
108   @Override
109   public boolean isPersistent() {
110     return false;
111   }
112 
113   @Override
114   public Cacheable read(long offset, int length, CacheableDeserializer<Cacheable> deserializer)
115       throws IOException {
116     ByteBuff dstBuffer = bufferArray.asSubByteBuff(offset, length);
117     // Here the buffer that is created directly refers to the buffer in the actual buckets.
118     // When any cell is referring to the blocks created out of these buckets then it means that
119     // those cells are referring to a shared memory area which if evicted by the BucketCache would
120     // lead to corruption of results. Hence we set the type of the buffer as SHARED_MEMORY
121     // so that the readers using this block are aware of this fact and do the necessary action
122     // to prevent eviction till the results are either consumed or copied
123     return deserializer.deserialize(dstBuffer, true, MemoryType.SHARED);
124   }
125 
126   /**
127    * Transfers data from the given byte buffer to the buffer array
128    * @param srcBuffer the given byte buffer from which bytes are to be read
129    * @param offset The offset in the ByteBufferArray of the first byte to be
130    *          written
131    * @throws IOException throws IOException if writing to the array throws exception
132    */
133   @Override
134   public void write(ByteBuffer srcBuffer, long offset) throws IOException {
135     assert srcBuffer.hasArray();
136     bufferArray.putMultiple(offset, srcBuffer.remaining(), srcBuffer.array(),
137         srcBuffer.arrayOffset());
138   }
139 
140   @Override
141   public void write(ByteBuff srcBuffer, long offset) throws IOException {
142     // When caching block into BucketCache there will be single buffer backing for this HFileBlock.
143     // This will work for now. But from the DFS itself if we get DBB then this may not hold true.
144     assert srcBuffer.hasArray();
145     bufferArray.putMultiple(offset, srcBuffer.remaining(), srcBuffer.array(),
146         srcBuffer.arrayOffset());
147   }
148   /**
149    * No operation for the sync in the memory IO engine
150    */
151   @Override
152   public void sync() {
153     // Nothing to do.
154   }
155 
156   /**
157    * No operation for the shutdown in the memory IO engine
158    */
159   @Override
160   public void shutdown() {
161     // Nothing to do.
162   }
163 }