001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.io;
020
021import java.nio.ByteBuffer;
022import java.util.ArrayList;
023import java.util.List;
024import java.util.Queue;
025import java.util.concurrent.ConcurrentLinkedQueue;
026import java.util.concurrent.atomic.AtomicInteger;
027import java.util.concurrent.atomic.LongAdder;
028
029import sun.nio.ch.DirectBuffer;
030
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.nio.ByteBuff;
034import org.apache.hadoop.hbase.nio.SingleByteBuff;
035import org.apache.yetus.audience.InterfaceAudience;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
040import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
041
042/**
043 * ByteBuffAllocator is a nio ByteBuffer pool.
044 * It returns {@link ByteBuff}s which are wrappers of offheap {@link ByteBuffer} usually. If we are
045 * sure that the returned ByteBuffs have reached the end of their life cycle, we must call
046 * {@link ByteBuff#release()} to return buffers to the pool otherwise the pool will leak. If the
047 * desired memory size is larger than what the ByteBufferPool has available, we'll downgrade to
048 * allocate ByteBuffers from the heap. Increase the ByteBufferPool size if detect this case.<br/>
049 * <br/>
050 * For better memory/pool utilization, there is a lower bound named
051 * <code>minSizeForReservoirUse</code> in this allocator, and if the desired size is less than
052 * <code>minSizeForReservoirUse</code>, the allocator will just allocate the ByteBuffer from heap
053 * and let the JVM manage memory, because it better to not waste pool slots allocating a single
054 * fixed-size ByteBuffer for a small object.<br/>
055 * <br/>
056 * This pool can be used anywhere it makes sense managing memory. Currently used at least by RPC.
057 */
058@InterfaceAudience.Private
059public class ByteBuffAllocator {
060
061  private static final Logger LOG = LoggerFactory.getLogger(ByteBuffAllocator.class);
062
063  // The on-heap allocator is mostly used for testing but also has some non-test usage such as
064  // for scanning snapshot. This implementation will just allocate ByteBuffers from heap but
065  // wrapped by ByteBuff.
066  public static final ByteBuffAllocator HEAP = ByteBuffAllocator.createOnHeap();
067
068  public static final String ALLOCATOR_POOL_ENABLED_KEY = "hbase.server.allocator.pool.enabled";
069
070  public static final String MAX_BUFFER_COUNT_KEY = "hbase.server.allocator.max.buffer.count";
071
072  public static final String BUFFER_SIZE_KEY = "hbase.server.allocator.buffer.size";
073
074  public static final String MIN_ALLOCATE_SIZE_KEY = "hbase.server.allocator.minimal.allocate.size";
075
076  /**
077   * @deprecated since 2.3.0 and will be removed in 4.0.0. Use
078   *   {@link ByteBuffAllocator#ALLOCATOR_POOL_ENABLED_KEY} instead.
079   */
080  @Deprecated
081  public static final String DEPRECATED_ALLOCATOR_POOL_ENABLED_KEY =
082      "hbase.ipc.server.reservoir.enabled";
083
084  /**
085   * @deprecated since 2.3.0 and will be removed in 4.0.0. Use
086   *   {@link ByteBuffAllocator#MAX_BUFFER_COUNT_KEY} instead.
087   */
088  @Deprecated
089  static final String DEPRECATED_MAX_BUFFER_COUNT_KEY = "hbase.ipc.server.reservoir.initial.max";
090
091  /**
092   * @deprecated since 2.3.0 and will be removed in 4.0.0. Use
093   *   {@link ByteBuffAllocator#BUFFER_SIZE_KEY} instead.
094   */
095  @Deprecated
096  static final String DEPRECATED_BUFFER_SIZE_KEY = "hbase.ipc.server.reservoir.initial.buffer.size";
097
098  /**
099   * The hbase.ipc.server.reservoir.initial.max and hbase.ipc.server.reservoir.initial.buffer.size
100   * were introduced in HBase2.0.0, while in HBase3.0.0 the two config keys will be replaced by
101   * {@link ByteBuffAllocator#MAX_BUFFER_COUNT_KEY} and {@link ByteBuffAllocator#BUFFER_SIZE_KEY}.
102   * Also the hbase.ipc.server.reservoir.enabled will be replaced by
103   * hbase.server.allocator.pool.enabled. Keep the three old config keys here for HBase2.x
104   * compatibility.
105   */
106  static {
107    Configuration.addDeprecation(DEPRECATED_ALLOCATOR_POOL_ENABLED_KEY, ALLOCATOR_POOL_ENABLED_KEY);
108    Configuration.addDeprecation(DEPRECATED_MAX_BUFFER_COUNT_KEY, MAX_BUFFER_COUNT_KEY);
109    Configuration.addDeprecation(DEPRECATED_BUFFER_SIZE_KEY, BUFFER_SIZE_KEY);
110  }
111
112  /**
113   * There're some reasons why better to choose 65KB(rather than 64KB) as the default buffer size:
114   * <p>
115   * 1. Almost all of the data blocks have the block size: 64KB + delta, whose delta is very small,
116   * depends on the size of lastKeyValue. If we set buffer.size=64KB, then each block will be
117   * allocated as a MultiByteBuff: one 64KB DirectByteBuffer and delta bytes HeapByteBuffer, the
118   * HeapByteBuffer will increase the GC pressure. Ideally, we should let the data block to be
119   * allocated as a SingleByteBuff, it has simpler data structure, faster access speed, less heap
120   * usage.
121   * <p>
122   * 2. Since the blocks are MultiByteBuff when using buffer.size=64KB, so we have to calculate the
123   * checksum by an temp heap copying (see HBASE-21917), while if it's a SingleByteBuff, we can
124   * speed the checksum by calling the hadoop' checksum in native lib, which is more faster.
125   * <p>
126   * For performance comparison, please see HBASE-22483.
127   */
128  public static final int DEFAULT_BUFFER_SIZE = 65 * 1024;
129
130  public static final Recycler NONE = () -> {
131  };
132
133  public interface Recycler {
134    void free();
135  }
136
137  private final boolean reservoirEnabled;
138  private final int bufSize;
139  private final int maxBufCount;
140  private final AtomicInteger usedBufCount = new AtomicInteger(0);
141
142  private boolean maxPoolSizeInfoLevelLogged = false;
143
144  // If the desired size is at least this size, it'll allocated from ByteBufferPool, otherwise it'll
145  // allocated from heap for better utilization. We make this to be 1/6th of the pool buffer size.
146  private final int minSizeForReservoirUse;
147
148  private final Queue<ByteBuffer> buffers = new ConcurrentLinkedQueue<>();
149
150  // Metrics to track the pool allocation bytes and heap allocation bytes. If heap allocation
151  // bytes is increasing so much, then we may need to increase the max.buffer.count .
152  private final LongAdder poolAllocationBytes = new LongAdder();
153  private final LongAdder heapAllocationBytes = new LongAdder();
154  private long lastPoolAllocationBytes = 0;
155  private long lastHeapAllocationBytes = 0;
156
157  /**
158   * Initialize an {@link ByteBuffAllocator} which will try to allocate ByteBuffers from off-heap if
159   * reservoir is enabled and the reservoir has enough buffers, otherwise the allocator will just
160   * allocate the insufficient buffers from on-heap to meet the requirement.
161   * @param conf which get the arguments to initialize the allocator.
162   * @param reservoirEnabled indicate whether the reservoir is enabled or disabled. NOTICE: if
163   *          reservoir is enabled, then we will use the pool allocator to allocate off-heap
164   *          ByteBuffers and use the HEAP allocator to allocate heap ByteBuffers. Otherwise if
165   *          reservoir is disabled then all allocations will happen in HEAP instance.
166   * @return ByteBuffAllocator to manage the byte buffers.
167   */
168  public static ByteBuffAllocator create(Configuration conf, boolean reservoirEnabled) {
169    if (conf.get(DEPRECATED_BUFFER_SIZE_KEY) != null
170        || conf.get(DEPRECATED_MAX_BUFFER_COUNT_KEY) != null) {
171      LOG.warn("The config keys {} and {} are deprecated now, instead please use {} and {}. In "
172            + "future release we will remove the two deprecated configs.",
173        DEPRECATED_BUFFER_SIZE_KEY, DEPRECATED_MAX_BUFFER_COUNT_KEY, BUFFER_SIZE_KEY,
174        MAX_BUFFER_COUNT_KEY);
175    }
176    int poolBufSize = conf.getInt(BUFFER_SIZE_KEY, DEFAULT_BUFFER_SIZE);
177    if (reservoirEnabled) {
178      // The max number of buffers to be pooled in the ByteBufferPool. The default value been
179      // selected based on the #handlers configured. When it is read request, 2 MB is the max size
180      // at which we will send back one RPC request. Means max we need 2 MB for creating the
181      // response cell block. (Well it might be much lesser than this because in 2 MB size calc, we
182      // include the heap size overhead of each cells also.) Considering 2 MB, we will need
183      // (2 * 1024 * 1024) / poolBufSize buffers to make the response cell block. Pool buffer size
184      // is by default 64 KB.
185      // In case of read request, at the end of the handler process, we will make the response
186      // cellblock and add the Call to connection's response Q and a single Responder thread takes
187      // connections and responses from that one by one and do the socket write. So there is chances
188      // that by the time a handler originated response is actually done writing to socket and so
189      // released the BBs it used, the handler might have processed one more read req. On an avg 2x
190      // we consider and consider that also for the max buffers to pool
191      int bufsForTwoMB = (2 * 1024 * 1024) / poolBufSize;
192      int maxBuffCount =
193          conf.getInt(MAX_BUFFER_COUNT_KEY, conf.getInt(HConstants.REGION_SERVER_HANDLER_COUNT,
194            HConstants.DEFAULT_REGION_SERVER_HANDLER_COUNT) * bufsForTwoMB * 2);
195      int minSizeForReservoirUse = conf.getInt(MIN_ALLOCATE_SIZE_KEY, poolBufSize / 6);
196      return new ByteBuffAllocator(true, maxBuffCount, poolBufSize, minSizeForReservoirUse);
197    } else {
198      return HEAP;
199    }
200  }
201
202  /**
203   * Initialize an {@link ByteBuffAllocator} which only allocate ByteBuffer from on-heap, it's
204   * designed for testing purpose or disabled reservoir case.
205   * @return allocator to allocate on-heap ByteBuffer.
206   */
207  private static ByteBuffAllocator createOnHeap() {
208    return new ByteBuffAllocator(false, 0, DEFAULT_BUFFER_SIZE, Integer.MAX_VALUE);
209  }
210
211  @VisibleForTesting
212  ByteBuffAllocator(boolean reservoirEnabled, int maxBufCount, int bufSize,
213      int minSizeForReservoirUse) {
214    this.reservoirEnabled = reservoirEnabled;
215    this.maxBufCount = maxBufCount;
216    this.bufSize = bufSize;
217    this.minSizeForReservoirUse = minSizeForReservoirUse;
218  }
219
220  public boolean isReservoirEnabled() {
221    return reservoirEnabled;
222  }
223
224  public long getHeapAllocationBytes() {
225    return heapAllocationBytes.sum();
226  }
227
228  public long getPoolAllocationBytes() {
229    return poolAllocationBytes.sum();
230  }
231
232  public int getBufferSize() {
233    return this.bufSize;
234  }
235
236  public int getUsedBufferCount() {
237    return this.usedBufCount.intValue();
238  }
239
240  /**
241   * The {@link ConcurrentLinkedQueue#size()} is O(N) complexity and time-consuming, so DO NOT use
242   * the method except in UT.
243   */
244  @VisibleForTesting
245  public int getFreeBufferCount() {
246    return this.buffers.size();
247  }
248
249  public int getTotalBufferCount() {
250    return maxBufCount;
251  }
252
253  public static long getHeapAllocationBytes(ByteBuffAllocator... allocators) {
254    long heapAllocBytes = 0;
255    for (ByteBuffAllocator alloc : Sets.newHashSet(allocators)) {
256      heapAllocBytes += alloc.getHeapAllocationBytes();
257    }
258    return heapAllocBytes;
259  }
260
261  public static double getHeapAllocationRatio(ByteBuffAllocator... allocators) {
262    double heapDelta = 0.0, poolDelta = 0.0;
263    long heapAllocBytes, poolAllocBytes;
264    // If disabled the pool allocator, then we use the global HEAP allocator. otherwise we use
265    // the pool allocator to allocate offheap ByteBuffers and use the HEAP to allocate heap
266    // ByteBuffers. So here we use a HashSet to remove the duplicated allocator object in disable
267    // case.
268    for (ByteBuffAllocator alloc : Sets.newHashSet(allocators)) {
269      heapAllocBytes = alloc.heapAllocationBytes.sum();
270      poolAllocBytes = alloc.poolAllocationBytes.sum();
271      heapDelta += (heapAllocBytes - alloc.lastHeapAllocationBytes);
272      poolDelta += (poolAllocBytes - alloc.lastPoolAllocationBytes);
273      alloc.lastHeapAllocationBytes = heapAllocBytes;
274      alloc.lastPoolAllocationBytes = poolAllocBytes;
275    }
276    // Calculate the heap allocation ratio.
277    if (Math.abs(heapDelta + poolDelta) < 1e-3) {
278      return 0.0;
279    }
280    return heapDelta / (heapDelta + poolDelta);
281  }
282
283  /**
284   * Allocate an buffer with buffer size from ByteBuffAllocator, Note to call the
285   * {@link ByteBuff#release()} if no need any more, otherwise the memory leak happen in NIO
286   * ByteBuffer pool.
287   * @return an ByteBuff with the buffer size.
288   */
289  public SingleByteBuff allocateOneBuffer() {
290    if (isReservoirEnabled()) {
291      ByteBuffer bb = getBuffer();
292      if (bb != null) {
293        return new SingleByteBuff(() -> putbackBuffer(bb), bb);
294      }
295    }
296    // Allocated from heap, let the JVM free its memory.
297    return (SingleByteBuff) ByteBuff.wrap(allocateOnHeap(bufSize));
298  }
299
300  private ByteBuffer allocateOnHeap(int size) {
301    heapAllocationBytes.add(size);
302    return ByteBuffer.allocate(size);
303  }
304
305  /**
306   * Allocate size bytes from the ByteBufAllocator, Note to call the {@link ByteBuff#release()} if
307   * no need any more, otherwise the memory leak happen in NIO ByteBuffer pool.
308   * @param size to allocate
309   * @return an ByteBuff with the desired size.
310   */
311  public ByteBuff allocate(int size) {
312    if (size < 0) {
313      throw new IllegalArgumentException("size to allocate should >=0");
314    }
315    // If disabled the reservoir, just allocate it from on-heap.
316    if (!isReservoirEnabled() || size == 0) {
317      return ByteBuff.wrap(allocateOnHeap(size));
318    }
319    int reminder = size % bufSize;
320    int len = size / bufSize + (reminder > 0 ? 1 : 0);
321    List<ByteBuffer> bbs = new ArrayList<>(len);
322    // Allocate from ByteBufferPool until the remaining is less than minSizeForReservoirUse or
323    // reservoir is exhausted.
324    int remain = size;
325    while (remain >= minSizeForReservoirUse) {
326      ByteBuffer bb = this.getBuffer();
327      if (bb == null) {
328        break;
329      }
330      bbs.add(bb);
331      remain -= bufSize;
332    }
333    int lenFromReservoir = bbs.size();
334    if (remain > 0) {
335      // If the last ByteBuffer is too small or the reservoir can not provide more ByteBuffers, we
336      // just allocate the ByteBuffer from on-heap.
337      bbs.add(allocateOnHeap(remain));
338    }
339    ByteBuff bb = ByteBuff.wrap(bbs, () -> {
340      for (int i = 0; i < lenFromReservoir; i++) {
341        this.putbackBuffer(bbs.get(i));
342      }
343    });
344    bb.limit(size);
345    return bb;
346  }
347
348  /**
349   * Free all direct buffers if allocated, mainly used for testing.
350   */
351  @VisibleForTesting
352  public void clean() {
353    while (!buffers.isEmpty()) {
354      ByteBuffer b = buffers.poll();
355      if (b instanceof DirectBuffer) {
356        DirectBuffer db = (DirectBuffer) b;
357        if (db.cleaner() != null) {
358          db.cleaner().clean();
359        }
360      }
361    }
362    this.usedBufCount.set(0);
363    this.maxPoolSizeInfoLevelLogged = false;
364    this.poolAllocationBytes.reset();
365    this.heapAllocationBytes.reset();
366    this.lastPoolAllocationBytes = 0;
367    this.lastHeapAllocationBytes = 0;
368  }
369
370  /**
371   * @return One free DirectByteBuffer from the pool. If no free ByteBuffer and we have not reached
372   *         the maximum pool size, it will create a new one and return. In case of max pool size
373   *         also reached, will return null. When pool returned a ByteBuffer, make sure to return it
374   *         back to pool after use.
375   */
376  private ByteBuffer getBuffer() {
377    ByteBuffer bb = buffers.poll();
378    if (bb != null) {
379      // To reset the limit to capacity and position to 0, must clear here.
380      bb.clear();
381      poolAllocationBytes.add(bufSize);
382      return bb;
383    }
384    while (true) {
385      int c = this.usedBufCount.intValue();
386      if (c >= this.maxBufCount) {
387        if (!maxPoolSizeInfoLevelLogged) {
388          LOG.info("Pool already reached its max capacity : {} and no free buffers now. Consider "
389              + "increasing the value for '{}' ?",
390            maxBufCount, MAX_BUFFER_COUNT_KEY);
391          maxPoolSizeInfoLevelLogged = true;
392        }
393        return null;
394      }
395      if (!this.usedBufCount.compareAndSet(c, c + 1)) {
396        continue;
397      }
398      poolAllocationBytes.add(bufSize);
399      return ByteBuffer.allocateDirect(bufSize);
400    }
401  }
402
403  /**
404   * Return back a ByteBuffer after its use. Don't read/write the ByteBuffer after the returning.
405   * @param buf ByteBuffer to return.
406   */
407  private void putbackBuffer(ByteBuffer buf) {
408    if (buf.capacity() != bufSize || (reservoirEnabled ^ buf.isDirect())) {
409      LOG.warn("Trying to put a buffer, not created by this pool! Will be just ignored");
410      return;
411    }
412    buffers.offer(buf);
413  }
414}