View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.BLOCK_CACHE_SIZE_MAX_RANGE_KEY;
22  import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.BLOCK_CACHE_SIZE_MIN_RANGE_KEY;
23  import static org.apache.hadoop.hbase.HConstants.HFILE_BLOCK_CACHE_SIZE_KEY;
24  import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.MEMSTORE_SIZE_MAX_RANGE_KEY;
25  import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.MEMSTORE_SIZE_MIN_RANGE_KEY;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.hbase.HConstants;
32  import org.apache.hadoop.hbase.io.util.HeapMemorySizeUtil;
33  import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.TunerContext;
34  import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.TunerResult;
35  import org.apache.hadoop.hbase.util.RollingStatCalculator;
36  
37  /**
38   * The default implementation for the HeapMemoryTuner. This will do statistical checks on
39   * number of evictions, cache misses and flushes to decide whether there should be changes
40   * in the heap size of memstore/block cache. During each tuner operation tuner takes a step
41   * which can either be INCREASE_BLOCK_CACHE_SIZE (increase block cache size),
42   * INCREASE_MEMSTORE_SIZE (increase memstore size) and by default it is NEUTRAL (no change).
43   * We say block cache is sufficient when there is no block cache eviction at all or major amount of
44   * memory allocated to block cache is empty, similarly we say memory allocated for memstore is
45   * sufficient when there is no memstore flushes because of heap pressure or major amount of
46   * memory allocated to memstore is empty. If both are sufficient we do nothing, if exactly one of
47   * them is found to be sufficient we decrease its size by <i>step</i> and increase the other by
48   * same amount. If none of them is sufficient we do statistical analysis on number of cache misses
49   * and flushes to determine tuner direction. Based on these statistics we decide the tuner
50   * direction. If we are not confident about which step direction to take we do nothing and wait for
51   * next iteration. On expectation we will be tuning for at least 10% tuner calls. The number of
52   * past periods to consider for statistics calculation can be specified in config by
53   * <i>hbase.regionserver.heapmemory.autotuner.lookup.periods</i>. Also these many initial calls to
54   * tuner will be ignored (cache is warming up and we leave the system to reach steady state).
55   * After the tuner takes a step, in next call we insure that last call was indeed helpful and did
56   * not do us any harm. If not then we revert the previous step. The step size is dynamic and it
57   * changes based on current and past few tuning directions and their step sizes. We maintain a
58   * parameter <i>decayingAvgTunerStepSize</i> which is sum of past tuner steps with
59   * sign(positive for increase in memstore and negative for increase in block cache). But rather
60   * than simple sum it is calculated by giving more priority to the recent tuning steps.
61   * When last few tuner steps were NETURAL then we assume we are restarting the tuning process and
62   * step size is updated to maximum allowed size which can be specified  in config by
63   * <i>hbase.regionserver.heapmemory.autotuner.step.max</i>. If in a particular tuning operation
64   * the step direction is opposite to what indicated by <i>decayingTunerStepSizeSum</i>
65   * we decrease the step size by half. Step size does not change in other tuning operations.
66   * When step size gets below a certain threshold then the following tuner operations are
67   * considered to be neutral. The minimum step size can be specified  in config by
68   * <i>hbase.regionserver.heapmemory.autotuner.step.min</i>.
69   */
70  @InterfaceAudience.Private
71  class DefaultHeapMemoryTuner implements HeapMemoryTuner {
72    public static final String MAX_STEP_KEY = "hbase.regionserver.heapmemory.autotuner.step.max";
73    public static final String MIN_STEP_KEY = "hbase.regionserver.heapmemory.autotuner.step.min";
74    public static final String SUFFICIENT_MEMORY_LEVEL_KEY =
75        "hbase.regionserver.heapmemory.autotuner.sufficient.memory.level";
76    public static final String LOOKUP_PERIODS_KEY =
77        "hbase.regionserver.heapmemory.autotuner.lookup.periods";
78    public static final String NUM_PERIODS_TO_IGNORE =
79        "hbase.regionserver.heapmemory.autotuner.ignored.periods";
80    // Maximum step size that the tuner can take
81    public static final float DEFAULT_MAX_STEP_VALUE = 0.04f; // 4%
82    // Minimum step size that the tuner can take
83    public static final float DEFAULT_MIN_STEP_VALUE = 0.00125f; // 0.125%
84    // If current block cache size or memstore size in use is below this level relative to memory
85    // provided to it then corresponding component will be considered to have sufficient memory
86    public static final float DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE = 0.5f; // 50%
87    // Number of tuner periods that will be considered while calculating mean and deviation
88    // If set to zero, all stats will be calculated from the start
89    public static final int DEFAULT_LOOKUP_PERIODS = 60;
90    public static final int DEFAULT_NUM_PERIODS_IGNORED = 60;
91    private static final TunerResult NO_OP_TUNER_RESULT = new TunerResult(false);
92    // If deviation of tuner step size gets below this value then it means past few periods were
93    // NEUTRAL(given that last tuner period was also NEUTRAL).
94    private static final double TUNER_STEP_EPS = 1e-6;
95  
96    private Log LOG = LogFactory.getLog(DefaultHeapMemoryTuner.class);
97    private TunerResult TUNER_RESULT = new TunerResult(true);
98    private Configuration conf;
99    private float sufficientMemoryLevel = DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE;
100   private float maximumStepSize = DEFAULT_MAX_STEP_VALUE;
101   private float minimumStepSize = DEFAULT_MIN_STEP_VALUE;
102   private int tunerLookupPeriods = DEFAULT_LOOKUP_PERIODS;
103   private int numPeriodsToIgnore = DEFAULT_NUM_PERIODS_IGNORED;
104   // Counter to ignore few initial periods while cache is still warming up
105   // Memory tuner will do no operation for the first "tunerLookupPeriods"
106   private int ignoreInitialPeriods = 0;
107 
108   private float globalMemStorePercentMinRange;
109   private float globalMemStorePercentMaxRange;
110   private float blockCachePercentMinRange;
111   private float blockCachePercentMaxRange;
112   // Store statistics about the corresponding parameters for memory tuning
113   private RollingStatCalculator rollingStatsForCacheMisses;
114   private RollingStatCalculator rollingStatsForFlushes;
115   private RollingStatCalculator rollingStatsForEvictions;
116   private RollingStatCalculator rollingStatsForTunerSteps;
117   // Set step size to max value for tuning, this step size will adjust dynamically while tuning
118   private float step = DEFAULT_MAX_STEP_VALUE;
119   private StepDirection prevTuneDirection = StepDirection.NEUTRAL;
120   //positive means memstore's size was increased
121   //It is not just arithmetic sum of past tuner periods. More priority is given to recent
122   //tuning steps.
123   private double decayingTunerStepSizeSum = 0;
124 
125   @Override
126   public TunerResult tune(TunerContext context) {
127     float curMemstoreSize = context.getCurMemStoreSize();
128     float curBlockCacheSize = context.getCurBlockCacheSize();
129     addToRollingStats(context);
130 
131     if (ignoreInitialPeriods < numPeriodsToIgnore) {
132       // Ignoring the first few tuner periods
133       ignoreInitialPeriods++;
134       rollingStatsForTunerSteps.insertDataValue(0);
135       return NO_OP_TUNER_RESULT;
136     }
137     StepDirection newTuneDirection = getTuneDirection(context);
138 
139     float newMemstoreSize;
140     float newBlockCacheSize;
141 
142     // Adjusting step size for tuning to get to steady state or restart from steady state.
143     // Even if the step size was 4% and 32 GB memory size, we will be shifting 1 GB back and forth
144     // per tuner operation and it can affect the performance of cluster so we keep on decreasing
145     // step size until everything settles.
146     if (prevTuneDirection == StepDirection.NEUTRAL
147         && newTuneDirection != StepDirection.NEUTRAL
148         && rollingStatsForTunerSteps.getDeviation() < TUNER_STEP_EPS) {
149       // Restarting the tuning from steady state and setting step size to maximum.
150       // The deviation cannot be that low if last period was neutral and some recent periods were
151       // not neutral.
152       step = maximumStepSize;
153     } else if ((newTuneDirection == StepDirection.INCREASE_MEMSTORE_SIZE
154         && decayingTunerStepSizeSum < 0) ||
155         (newTuneDirection == StepDirection.INCREASE_BLOCK_CACHE_SIZE
156         && decayingTunerStepSizeSum > 0)) {
157       // Current step is opposite of past tuner actions so decrease the step size to reach steady
158       // state.
159       step = step/2.00f;
160     }
161     if (step < minimumStepSize) {
162       // If step size is too small then we do nothing.
163       LOG.debug("Tuner step size is too low; we will not perform any tuning this time.");
164       step = 0.0f;
165       newTuneDirection = StepDirection.NEUTRAL;
166     }
167     // Increase / decrease the memstore / block cahce sizes depending on new tuner step.
168     float globalMemstoreLowerMark = HeapMemorySizeUtil.getGlobalMemStoreLowerMark(conf,
169         curMemstoreSize);
170     // We don't want to exert immediate pressure on memstore. So, we decrease its size gracefully;
171     // we set a minimum bar in the middle of the total memstore size and the lower limit.
172     float minMemstoreSize = ((globalMemstoreLowerMark + 1) * curMemstoreSize) / 2.00f;
173 
174     switch (newTuneDirection) {
175     case INCREASE_BLOCK_CACHE_SIZE:
176         if (curMemstoreSize - step < minMemstoreSize) {
177           step = curMemstoreSize - minMemstoreSize;
178         }
179         newMemstoreSize = curMemstoreSize - step;
180         newBlockCacheSize = curBlockCacheSize + step;
181         rollingStatsForTunerSteps.insertDataValue(-(int)(step*100000));
182         decayingTunerStepSizeSum = (decayingTunerStepSizeSum - step)/2.00f;
183         break;
184     case INCREASE_MEMSTORE_SIZE:
185         newBlockCacheSize = curBlockCacheSize - step;
186         newMemstoreSize = curMemstoreSize + step;
187         rollingStatsForTunerSteps.insertDataValue((int)(step*100000));
188         decayingTunerStepSizeSum = (decayingTunerStepSizeSum + step)/2.00f;
189         break;
190     default:
191         prevTuneDirection = StepDirection.NEUTRAL;
192         rollingStatsForTunerSteps.insertDataValue(0);
193         decayingTunerStepSizeSum = (decayingTunerStepSizeSum)/2.00f;
194         return NO_OP_TUNER_RESULT;
195     }
196     // Check we are within max/min bounds.
197     if (newMemstoreSize > globalMemStorePercentMaxRange) {
198       newMemstoreSize = globalMemStorePercentMaxRange;
199     } else if (newMemstoreSize < globalMemStorePercentMinRange) {
200       newMemstoreSize = globalMemStorePercentMinRange;
201     }
202     if (newBlockCacheSize > blockCachePercentMaxRange) {
203       newBlockCacheSize = blockCachePercentMaxRange;
204     } else if (newBlockCacheSize < blockCachePercentMinRange) {
205       newBlockCacheSize = blockCachePercentMinRange;
206     }
207     TUNER_RESULT.setBlockCacheSize(newBlockCacheSize);
208     TUNER_RESULT.setMemstoreSize(newMemstoreSize);
209     prevTuneDirection = newTuneDirection;
210     return TUNER_RESULT;
211   }
212 
213   /**
214    * Determine best direction of tuning base on given context.
215    * @param context The tuner context.
216    * @return tuning direction.
217    */
218   private StepDirection getTuneDirection(TunerContext context) {
219     StepDirection newTuneDirection = StepDirection.NEUTRAL;
220     long blockedFlushCount = context.getBlockedFlushCount();
221     long unblockedFlushCount = context.getUnblockedFlushCount();
222     long evictCount = context.getEvictCount();
223     long cacheMissCount = context.getCacheMissCount();
224     long totalFlushCount = blockedFlushCount+unblockedFlushCount;
225     float curMemstoreSize = context.getCurMemStoreSize();
226     float curBlockCacheSize = context.getCurBlockCacheSize();
227     StringBuilder tunerLog = new StringBuilder();
228     // We can consider memstore or block cache to be sufficient if
229     // we are using only a minor fraction of what have been already provided to it.
230     boolean earlyMemstoreSufficientCheck = totalFlushCount == 0
231         || context.getCurMemStoreUsed() < curMemstoreSize * sufficientMemoryLevel;
232     boolean earlyBlockCacheSufficientCheck = evictCount == 0 ||
233         context.getCurBlockCacheUsed() < curBlockCacheSize * sufficientMemoryLevel;
234     if (earlyMemstoreSufficientCheck && earlyBlockCacheSufficientCheck) {
235       // Both memstore and block cache memory seems to be sufficient. No operation required.
236       newTuneDirection = StepDirection.NEUTRAL;
237     } else if (earlyMemstoreSufficientCheck) {
238       // Increase the block cache size and corresponding decrease in memstore size.
239       newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE;
240     } else if (earlyBlockCacheSufficientCheck) {
241       // Increase the memstore size and corresponding decrease in block cache size.
242       newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
243     } else {
244       // Early checks for sufficient memory failed. Tuning memory based on past statistics.
245       // Boolean indicator to show if we need to revert previous step or not.
246       boolean isReverting = false;
247       switch (prevTuneDirection) {
248       // Here we are using number of evictions rather than cache misses because it is more
249       // strong indicator for deficient cache size. Improving caching is what we
250       // would like to optimize for in steady state.
251       case INCREASE_BLOCK_CACHE_SIZE:
252         if ((double)evictCount > rollingStatsForEvictions.getMean() ||
253             (double)totalFlushCount > rollingStatsForFlushes.getMean() +
254                 rollingStatsForFlushes.getDeviation()/2.00) {
255           // Reverting previous step as it was not useful.
256           // Tuning failed to decrease evictions or tuning resulted in large number of flushes.
257           newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
258           tunerLog.append("We will revert previous tuning");
259           if ((double)evictCount > rollingStatsForEvictions.getMean()) {
260             tunerLog.append(" because we could not decrease evictions sufficiently.");
261           } else {
262             tunerLog.append(" because the number of flushes rose significantly.");
263           }
264           isReverting = true;
265         }
266         break;
267       case INCREASE_MEMSTORE_SIZE:
268         if ((double)totalFlushCount > rollingStatsForFlushes.getMean() ||
269             (double)evictCount > rollingStatsForEvictions.getMean() +
270                 rollingStatsForEvictions.getDeviation()/2.00) {
271           // Reverting previous step as it was not useful.
272           // Tuning failed to decrease flushes or tuning resulted in large number of evictions.
273           newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE;
274           tunerLog.append("We will revert previous tuning");
275           if ((double)totalFlushCount > rollingStatsForFlushes.getMean()) {
276             tunerLog.append(" because we could not decrease flushes sufficiently.");
277           } else {
278             tunerLog.append(" because number of evictions rose significantly.");
279           }
280           isReverting = true;
281         }
282         break;
283       default:
284         // Last step was neutral, revert doesn't not apply here.
285         break;
286       }
287       // If we are not reverting. We try to tune memory sizes by looking at cache misses / flushes.
288       if (!isReverting){
289         // mean +- deviation*0.8 is considered to be normal
290         // below it its consider low and above it is considered high.
291         // We can safely assume that the number cache misses, flushes are normally distributed over
292         // past periods and hence on all the above mentioned classes (normal, high and low)
293         // are likely to occur with probability 56%, 22%, 22% respectively. Hence there is at
294         // least ~10% probability that we will not fall in NEUTRAL step.
295         // This optimization solution is feedback based and we revert when we
296         // dont find our steps helpful. Hence we want to do tuning only when we have clear
297         // indications because too many unnecessary tuning may affect the performance of cluster.
298         if ((double)cacheMissCount < rollingStatsForCacheMisses.getMean() -
299             rollingStatsForCacheMisses.getDeviation()*0.80 &&
300             (double)totalFlushCount < rollingStatsForFlushes.getMean() -
301                 rollingStatsForFlushes.getDeviation()*0.80) {
302           // Everything is fine no tuning required
303           newTuneDirection = StepDirection.NEUTRAL;
304         } else if ((double)cacheMissCount > rollingStatsForCacheMisses.getMean() +
305             rollingStatsForCacheMisses.getDeviation()*0.80 &&
306             (double)totalFlushCount < rollingStatsForFlushes.getMean() -
307                 rollingStatsForFlushes.getDeviation()*0.80) {
308           // more misses , increasing cache size
309           newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE;
310           tunerLog.append(
311               "Going to increase block cache size due to increase in number of cache misses.");
312         } else if ((double)cacheMissCount < rollingStatsForCacheMisses.getMean() -
313             rollingStatsForCacheMisses.getDeviation()*0.80 &&
314             (double)totalFlushCount > rollingStatsForFlushes.getMean() +
315                 rollingStatsForFlushes.getDeviation()*0.80) {
316           // more flushes , increasing memstore size
317           newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
318           tunerLog.append("Going to increase memstore size due to increase in number of flushes.");
319         } else if (blockedFlushCount > 0 && prevTuneDirection == StepDirection.NEUTRAL) {
320           // we do not want blocked flushes
321           newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
322           tunerLog.append("Going to increase memstore size due to"
323               + blockedFlushCount + " blocked flushes.");
324         } else {
325           // Default. Not enough facts to do tuning.
326           tunerLog.append("Going to do nothing because we "
327               + "could not determine best tuning direction");
328           newTuneDirection = StepDirection.NEUTRAL;
329         }
330       }
331     }
332     if (LOG.isDebugEnabled()) {
333       LOG.debug(tunerLog.toString());
334     }
335     return newTuneDirection;
336   }
337 
338   /**
339    * Add the given context to the rolling tuner stats.
340    * @param context The tuner context.
341    */
342   private void addToRollingStats(TunerContext context) {
343     rollingStatsForCacheMisses.insertDataValue(context.getCacheMissCount());
344     rollingStatsForFlushes.insertDataValue(context.getBlockedFlushCount() +
345         context.getUnblockedFlushCount());
346     rollingStatsForEvictions.insertDataValue(context.getEvictCount());
347   }
348 
349   @Override
350   public Configuration getConf() {
351     return this.conf;
352   }
353 
354   @Override
355   public void setConf(Configuration conf) {
356     this.conf = conf;
357     this.maximumStepSize = conf.getFloat(MAX_STEP_KEY, DEFAULT_MAX_STEP_VALUE);
358     this.minimumStepSize = conf.getFloat(MIN_STEP_KEY, DEFAULT_MIN_STEP_VALUE);
359     this.step = this.maximumStepSize;
360     this.sufficientMemoryLevel = conf.getFloat(SUFFICIENT_MEMORY_LEVEL_KEY,
361         DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE);
362     this.tunerLookupPeriods = conf.getInt(LOOKUP_PERIODS_KEY, DEFAULT_LOOKUP_PERIODS);
363     this.blockCachePercentMinRange = conf.getFloat(BLOCK_CACHE_SIZE_MIN_RANGE_KEY,
364         conf.getFloat(HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT));
365     this.blockCachePercentMaxRange = conf.getFloat(BLOCK_CACHE_SIZE_MAX_RANGE_KEY,
366         conf.getFloat(HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT));
367     this.globalMemStorePercentMinRange = conf.getFloat(MEMSTORE_SIZE_MIN_RANGE_KEY,
368         HeapMemorySizeUtil.getGlobalMemStorePercent(conf, false));
369     this.globalMemStorePercentMaxRange = conf.getFloat(MEMSTORE_SIZE_MAX_RANGE_KEY,
370         HeapMemorySizeUtil.getGlobalMemStorePercent(conf, false));
371     // Default value of periods to ignore is number of lookup periods
372     this.numPeriodsToIgnore = conf.getInt(NUM_PERIODS_TO_IGNORE, this.tunerLookupPeriods);
373     this.rollingStatsForCacheMisses = new RollingStatCalculator(this.tunerLookupPeriods);
374     this.rollingStatsForFlushes = new RollingStatCalculator(this.tunerLookupPeriods);
375     this.rollingStatsForEvictions = new RollingStatCalculator(this.tunerLookupPeriods);
376     this.rollingStatsForTunerSteps = new RollingStatCalculator(this.tunerLookupPeriods);
377   }
378 
379   private enum StepDirection{
380     // block cache size was increased
381     INCREASE_BLOCK_CACHE_SIZE,
382     // memstore size was increased
383     INCREASE_MEMSTORE_SIZE,
384     // no operation was performed
385     NEUTRAL
386   }
387 }