001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.regionserver; 020 021import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.BLOCK_CACHE_SIZE_MAX_RANGE_KEY; 022import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.BLOCK_CACHE_SIZE_MIN_RANGE_KEY; 023import static org.apache.hadoop.hbase.HConstants.HFILE_BLOCK_CACHE_SIZE_KEY; 024import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.MEMSTORE_SIZE_MAX_RANGE_KEY; 025import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.MEMSTORE_SIZE_MIN_RANGE_KEY; 026 027import org.apache.yetus.audience.InterfaceAudience; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.hbase.HConstants; 032import org.apache.hadoop.hbase.io.util.MemorySizeUtil; 033import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.TunerContext; 034import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.TunerResult; 035import org.apache.hadoop.hbase.util.RollingStatCalculator; 036 037/** 038 * The default implementation for the HeapMemoryTuner. This will do statistical checks on 039 * number of evictions, cache misses and flushes to decide whether there should be changes 040 * in the heap size of memstore/block cache. During each tuner operation tuner takes a step 041 * which can either be INCREASE_BLOCK_CACHE_SIZE (increase block cache size), 042 * INCREASE_MEMSTORE_SIZE (increase memstore size) and by default it is NEUTRAL (no change). 043 * We say block cache is sufficient when there is no block cache eviction at all or major amount of 044 * memory allocated to block cache is empty, similarly we say memory allocated for memstore is 045 * sufficient when there is no memstore flushes because of heap pressure or major amount of 046 * memory allocated to memstore is empty. If both are sufficient we do nothing, if exactly one of 047 * them is found to be sufficient we decrease its size by <i>step</i> and increase the other by 048 * same amount. If none of them is sufficient we do statistical analysis on number of cache misses 049 * and flushes to determine tuner direction. Based on these statistics we decide the tuner 050 * direction. If we are not confident about which step direction to take we do nothing and wait for 051 * next iteration. On expectation we will be tuning for at least 10% tuner calls. The number of 052 * past periods to consider for statistics calculation can be specified in config by 053 * <i>hbase.regionserver.heapmemory.autotuner.lookup.periods</i>. Also these many initial calls to 054 * tuner will be ignored (cache is warming up and we leave the system to reach steady state). 055 * After the tuner takes a step, in next call we insure that last call was indeed helpful and did 056 * not do us any harm. If not then we revert the previous step. The step size is dynamic and it 057 * changes based on current and past few tuning directions and their step sizes. We maintain a 058 * parameter <i>decayingAvgTunerStepSize</i> which is sum of past tuner steps with 059 * sign(positive for increase in memstore and negative for increase in block cache). But rather 060 * than simple sum it is calculated by giving more priority to the recent tuning steps. 061 * When last few tuner steps were NETURAL then we assume we are restarting the tuning process and 062 * step size is updated to maximum allowed size which can be specified in config by 063 * <i>hbase.regionserver.heapmemory.autotuner.step.max</i>. If in a particular tuning operation 064 * the step direction is opposite to what indicated by <i>decayingTunerStepSizeSum</i> 065 * we decrease the step size by half. Step size does not change in other tuning operations. 066 * When step size gets below a certain threshold then the following tuner operations are 067 * considered to be neutral. The minimum step size can be specified in config by 068 * <i>hbase.regionserver.heapmemory.autotuner.step.min</i>. 069 */ 070@InterfaceAudience.Private 071class DefaultHeapMemoryTuner implements HeapMemoryTuner { 072 public static final String MAX_STEP_KEY = "hbase.regionserver.heapmemory.autotuner.step.max"; 073 public static final String MIN_STEP_KEY = "hbase.regionserver.heapmemory.autotuner.step.min"; 074 public static final String SUFFICIENT_MEMORY_LEVEL_KEY = 075 "hbase.regionserver.heapmemory.autotuner.sufficient.memory.level"; 076 public static final String LOOKUP_PERIODS_KEY = 077 "hbase.regionserver.heapmemory.autotuner.lookup.periods"; 078 public static final String NUM_PERIODS_TO_IGNORE = 079 "hbase.regionserver.heapmemory.autotuner.ignored.periods"; 080 // Maximum step size that the tuner can take 081 public static final float DEFAULT_MAX_STEP_VALUE = 0.04f; // 4% 082 // Minimum step size that the tuner can take 083 public static final float DEFAULT_MIN_STEP_VALUE = 0.00125f; // 0.125% 084 // If current block cache size or memstore size in use is below this level relative to memory 085 // provided to it then corresponding component will be considered to have sufficient memory 086 public static final float DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE = 0.5f; // 50% 087 // Number of tuner periods that will be considered while calculating mean and deviation 088 // If set to zero, all stats will be calculated from the start 089 public static final int DEFAULT_LOOKUP_PERIODS = 60; 090 public static final int DEFAULT_NUM_PERIODS_IGNORED = 60; 091 private static final TunerResult NO_OP_TUNER_RESULT = new TunerResult(false); 092 // If deviation of tuner step size gets below this value then it means past few periods were 093 // NEUTRAL(given that last tuner period was also NEUTRAL). 094 private static final double TUNER_STEP_EPS = 1e-6; 095 096 private Logger LOG = LoggerFactory.getLogger(DefaultHeapMemoryTuner.class); 097 private TunerResult TUNER_RESULT = new TunerResult(true); 098 private Configuration conf; 099 private float sufficientMemoryLevel = DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE; 100 private float maximumStepSize = DEFAULT_MAX_STEP_VALUE; 101 private float minimumStepSize = DEFAULT_MIN_STEP_VALUE; 102 private int tunerLookupPeriods = DEFAULT_LOOKUP_PERIODS; 103 private int numPeriodsToIgnore = DEFAULT_NUM_PERIODS_IGNORED; 104 // Counter to ignore few initial periods while cache is still warming up 105 // Memory tuner will do no operation for the first "tunerLookupPeriods" 106 private int ignoreInitialPeriods = 0; 107 108 private float globalMemStorePercentMinRange; 109 private float globalMemStorePercentMaxRange; 110 private float blockCachePercentMinRange; 111 private float blockCachePercentMaxRange; 112 113 private float globalMemStoreLimitLowMarkPercent; 114 115 // Store statistics about the corresponding parameters for memory tuning 116 private RollingStatCalculator rollingStatsForCacheMisses; 117 private RollingStatCalculator rollingStatsForFlushes; 118 private RollingStatCalculator rollingStatsForEvictions; 119 private RollingStatCalculator rollingStatsForTunerSteps; 120 // Set step size to max value for tuning, this step size will adjust dynamically while tuning 121 private float step = DEFAULT_MAX_STEP_VALUE; 122 private StepDirection prevTuneDirection = StepDirection.NEUTRAL; 123 //positive means memstore's size was increased 124 //It is not just arithmetic sum of past tuner periods. More priority is given to recent 125 //tuning steps. 126 private double decayingTunerStepSizeSum = 0; 127 128 @Override 129 public TunerResult tune(TunerContext context) { 130 float curMemstoreSize = context.getCurMemStoreSize(); 131 float curBlockCacheSize = context.getCurBlockCacheSize(); 132 addToRollingStats(context); 133 134 if (ignoreInitialPeriods < numPeriodsToIgnore) { 135 // Ignoring the first few tuner periods 136 ignoreInitialPeriods++; 137 rollingStatsForTunerSteps.insertDataValue(0); 138 return NO_OP_TUNER_RESULT; 139 } 140 StepDirection newTuneDirection = getTuneDirection(context); 141 142 long blockedFlushCount = context.getBlockedFlushCount(); 143 long unblockedFlushCount = context.getUnblockedFlushCount(); 144 long totalOnheapFlushCount = blockedFlushCount + unblockedFlushCount; 145 boolean offheapMemstore = context.isOffheapMemStore(); 146 float newMemstoreSize; 147 float newBlockCacheSize; 148 149 // Adjusting step size for tuning to get to steady state or restart from steady state. 150 // Even if the step size was 4% and 32 GB memory size, we will be shifting 1 GB back and forth 151 // per tuner operation and it can affect the performance of cluster so we keep on decreasing 152 // step size until everything settles. 153 if (prevTuneDirection == StepDirection.NEUTRAL 154 && newTuneDirection != StepDirection.NEUTRAL 155 && rollingStatsForTunerSteps.getDeviation() < TUNER_STEP_EPS) { 156 // Restarting the tuning from steady state and setting step size to maximum. 157 // The deviation cannot be that low if last period was neutral and some recent periods were 158 // not neutral. 159 step = maximumStepSize; 160 } else if ((newTuneDirection == StepDirection.INCREASE_MEMSTORE_SIZE 161 && decayingTunerStepSizeSum < 0) || 162 (newTuneDirection == StepDirection.INCREASE_BLOCK_CACHE_SIZE 163 && decayingTunerStepSizeSum > 0)) { 164 // Current step is opposite of past tuner actions so decrease the step size to reach steady 165 // state. 166 if (!offheapMemstore && step != minimumStepSize) { 167 // we leave the step to be at minimumStepSize for offheap memstore 168 step = step / 2.00f; 169 } 170 } 171 if (step < minimumStepSize) { 172 // If step size is too small then we do nothing. 173 LOG.debug("Tuner step size is too low; we will not perform any tuning this time."); 174 step = 0.0f; 175 newTuneDirection = StepDirection.NEUTRAL; 176 } 177 // There are no flushes due to onheap pressure and 178 // we have an offheap memstore and we are in need of more block_cache size. 179 if (totalOnheapFlushCount == 0 && offheapMemstore 180 && newTuneDirection == StepDirection.INCREASE_BLOCK_CACHE_SIZE) { 181 // we are sure that there are flushes only due to offheap pressure 182 // So don't do the memstore decrease equal to the step size. Instead do minimum stepSize 183 // decrease. But even if we have some flushes due to heap then it is better we tune 184 // the existing way. 185 step = minimumStepSize; 186 } 187 // Increase / decrease the memstore / block cache sizes depending on new tuner step. 188 // We don't want to exert immediate pressure on memstore. So, we decrease its size gracefully; 189 // we set a minimum bar in the middle of the total memstore size and the lower limit. 190 float minMemstoreSize = ((globalMemStoreLimitLowMarkPercent + 1) * curMemstoreSize) / 2.00f; 191 192 switch (newTuneDirection) { 193 case INCREASE_BLOCK_CACHE_SIZE: 194 if (curMemstoreSize - step < minMemstoreSize) { 195 step = curMemstoreSize - minMemstoreSize; 196 } 197 newMemstoreSize = curMemstoreSize - step; 198 newBlockCacheSize = curBlockCacheSize + step; 199 rollingStatsForTunerSteps.insertDataValue(-(int)(step*100000)); 200 decayingTunerStepSizeSum = (decayingTunerStepSizeSum - step)/2.00f; 201 break; 202 case INCREASE_MEMSTORE_SIZE: 203 newBlockCacheSize = curBlockCacheSize - step; 204 newMemstoreSize = curMemstoreSize + step; 205 rollingStatsForTunerSteps.insertDataValue((int)(step*100000)); 206 decayingTunerStepSizeSum = (decayingTunerStepSizeSum + step)/2.00f; 207 break; 208 default: 209 prevTuneDirection = StepDirection.NEUTRAL; 210 rollingStatsForTunerSteps.insertDataValue(0); 211 decayingTunerStepSizeSum = (decayingTunerStepSizeSum)/2.00f; 212 return NO_OP_TUNER_RESULT; 213 } 214 // Check we are within max/min bounds. 215 if (newMemstoreSize > globalMemStorePercentMaxRange) { 216 newMemstoreSize = globalMemStorePercentMaxRange; 217 } else if (newMemstoreSize < globalMemStorePercentMinRange) { 218 newMemstoreSize = globalMemStorePercentMinRange; 219 } 220 if (newBlockCacheSize > blockCachePercentMaxRange) { 221 newBlockCacheSize = blockCachePercentMaxRange; 222 } else if (newBlockCacheSize < blockCachePercentMinRange) { 223 newBlockCacheSize = blockCachePercentMinRange; 224 } 225 TUNER_RESULT.setBlockCacheSize(newBlockCacheSize); 226 TUNER_RESULT.setMemStoreSize(newMemstoreSize); 227 prevTuneDirection = newTuneDirection; 228 return TUNER_RESULT; 229 } 230 231 /** 232 * Determine best direction of tuning base on given context. 233 * @param context The tuner context. 234 * @return tuning direction. 235 */ 236 private StepDirection getTuneDirection(TunerContext context) { 237 StepDirection newTuneDirection = StepDirection.NEUTRAL; 238 long blockedFlushCount = context.getBlockedFlushCount(); 239 long unblockedFlushCount = context.getUnblockedFlushCount(); 240 long evictCount = context.getEvictCount(); 241 long cacheMissCount = context.getCacheMissCount(); 242 long totalFlushCount = blockedFlushCount + unblockedFlushCount; 243 float curMemstoreSize = context.getCurMemStoreSize(); 244 float curBlockCacheSize = context.getCurBlockCacheSize(); 245 StringBuilder tunerLog = new StringBuilder(); 246 // We can consider memstore or block cache to be sufficient if 247 // we are using only a minor fraction of what have been already provided to it. 248 boolean earlyMemstoreSufficientCheck = totalFlushCount == 0 249 || context.getCurMemStoreUsed() < curMemstoreSize * sufficientMemoryLevel; 250 boolean earlyBlockCacheSufficientCheck = evictCount == 0 || 251 context.getCurBlockCacheUsed() < curBlockCacheSize * sufficientMemoryLevel; 252 if (earlyMemstoreSufficientCheck && earlyBlockCacheSufficientCheck) { 253 // Both memstore and block cache memory seems to be sufficient. No operation required. 254 newTuneDirection = StepDirection.NEUTRAL; 255 } else if (earlyMemstoreSufficientCheck) { 256 // Increase the block cache size and corresponding decrease in memstore size. 257 newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE; 258 } else if (earlyBlockCacheSufficientCheck) { 259 // Increase the memstore size and corresponding decrease in block cache size. 260 newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE; 261 } else { 262 // Early checks for sufficient memory failed. Tuning memory based on past statistics. 263 // Boolean indicator to show if we need to revert previous step or not. 264 boolean isReverting = false; 265 switch (prevTuneDirection) { 266 // Here we are using number of evictions rather than cache misses because it is more 267 // strong indicator for deficient cache size. Improving caching is what we 268 // would like to optimize for in steady state. 269 case INCREASE_BLOCK_CACHE_SIZE: 270 if ((double)evictCount > rollingStatsForEvictions.getMean() || 271 (double)totalFlushCount > rollingStatsForFlushes.getMean() + 272 rollingStatsForFlushes.getDeviation()/2.00) { 273 // Reverting previous step as it was not useful. 274 // Tuning failed to decrease evictions or tuning resulted in large number of flushes. 275 newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE; 276 tunerLog.append("We will revert previous tuning"); 277 if ((double)evictCount > rollingStatsForEvictions.getMean()) { 278 tunerLog.append(" because we could not decrease evictions sufficiently."); 279 } else { 280 tunerLog.append(" because the number of flushes rose significantly."); 281 } 282 isReverting = true; 283 } 284 break; 285 case INCREASE_MEMSTORE_SIZE: 286 if ((double)totalFlushCount > rollingStatsForFlushes.getMean() || 287 (double)evictCount > rollingStatsForEvictions.getMean() + 288 rollingStatsForEvictions.getDeviation()/2.00) { 289 // Reverting previous step as it was not useful. 290 // Tuning failed to decrease flushes or tuning resulted in large number of evictions. 291 newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE; 292 tunerLog.append("We will revert previous tuning"); 293 if ((double)totalFlushCount > rollingStatsForFlushes.getMean()) { 294 tunerLog.append(" because we could not decrease flushes sufficiently."); 295 } else { 296 tunerLog.append(" because number of evictions rose significantly."); 297 } 298 isReverting = true; 299 } 300 break; 301 default: 302 // Last step was neutral, revert doesn't not apply here. 303 break; 304 } 305 // If we are not reverting. We try to tune memory sizes by looking at cache misses / flushes. 306 if (!isReverting){ 307 // mean +- deviation*0.8 is considered to be normal 308 // below it its consider low and above it is considered high. 309 // We can safely assume that the number cache misses, flushes are normally distributed over 310 // past periods and hence on all the above mentioned classes (normal, high and low) 311 // are likely to occur with probability 56%, 22%, 22% respectively. Hence there is at 312 // least ~10% probability that we will not fall in NEUTRAL step. 313 // This optimization solution is feedback based and we revert when we 314 // dont find our steps helpful. Hence we want to do tuning only when we have clear 315 // indications because too many unnecessary tuning may affect the performance of cluster. 316 if ((double)cacheMissCount < rollingStatsForCacheMisses.getMean() - 317 rollingStatsForCacheMisses.getDeviation()*0.80 && 318 (double)totalFlushCount < rollingStatsForFlushes.getMean() - 319 rollingStatsForFlushes.getDeviation()*0.80) { 320 // Everything is fine no tuning required 321 newTuneDirection = StepDirection.NEUTRAL; 322 } else if ((double)cacheMissCount > rollingStatsForCacheMisses.getMean() + 323 rollingStatsForCacheMisses.getDeviation()*0.80 && 324 (double)totalFlushCount < rollingStatsForFlushes.getMean() - 325 rollingStatsForFlushes.getDeviation()*0.80) { 326 // more misses , increasing cache size 327 newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE; 328 tunerLog.append( 329 "Going to increase block cache size due to increase in number of cache misses."); 330 } else if ((double)cacheMissCount < rollingStatsForCacheMisses.getMean() - 331 rollingStatsForCacheMisses.getDeviation()*0.80 && 332 (double)totalFlushCount > rollingStatsForFlushes.getMean() + 333 rollingStatsForFlushes.getDeviation()*0.80) { 334 // more flushes , increasing memstore size 335 newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE; 336 tunerLog.append("Going to increase memstore size due to increase in number of flushes."); 337 } else if (blockedFlushCount > 0 && prevTuneDirection == StepDirection.NEUTRAL) { 338 // we do not want blocked flushes 339 newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE; 340 tunerLog.append("Going to increase memstore size due to" 341 + blockedFlushCount + " blocked flushes."); 342 } else { 343 // Default. Not enough facts to do tuning. 344 tunerLog.append("Going to do nothing because we " 345 + "could not determine best tuning direction"); 346 newTuneDirection = StepDirection.NEUTRAL; 347 } 348 } 349 } 350 if (LOG.isDebugEnabled()) { 351 LOG.debug(tunerLog.toString()); 352 } 353 return newTuneDirection; 354 } 355 356 /** 357 * Add the given context to the rolling tuner stats. 358 * @param context The tuner context. 359 */ 360 private void addToRollingStats(TunerContext context) { 361 rollingStatsForCacheMisses.insertDataValue(context.getCacheMissCount()); 362 rollingStatsForFlushes 363 .insertDataValue(context.getBlockedFlushCount() + context.getUnblockedFlushCount()); 364 rollingStatsForEvictions.insertDataValue(context.getEvictCount()); 365 } 366 367 @Override 368 public Configuration getConf() { 369 return this.conf; 370 } 371 372 @Override 373 public void setConf(Configuration conf) { 374 this.conf = conf; 375 this.maximumStepSize = conf.getFloat(MAX_STEP_KEY, DEFAULT_MAX_STEP_VALUE); 376 this.minimumStepSize = conf.getFloat(MIN_STEP_KEY, DEFAULT_MIN_STEP_VALUE); 377 this.step = this.maximumStepSize; 378 this.sufficientMemoryLevel = conf.getFloat(SUFFICIENT_MEMORY_LEVEL_KEY, 379 DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE); 380 this.tunerLookupPeriods = conf.getInt(LOOKUP_PERIODS_KEY, DEFAULT_LOOKUP_PERIODS); 381 this.blockCachePercentMinRange = conf.getFloat(BLOCK_CACHE_SIZE_MIN_RANGE_KEY, 382 conf.getFloat(HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT)); 383 this.blockCachePercentMaxRange = conf.getFloat(BLOCK_CACHE_SIZE_MAX_RANGE_KEY, 384 conf.getFloat(HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT)); 385 this.globalMemStorePercentMinRange = conf.getFloat(MEMSTORE_SIZE_MIN_RANGE_KEY, 386 MemorySizeUtil.getGlobalMemStoreHeapPercent(conf, false)); 387 this.globalMemStorePercentMaxRange = conf.getFloat(MEMSTORE_SIZE_MAX_RANGE_KEY, 388 MemorySizeUtil.getGlobalMemStoreHeapPercent(conf, false)); 389 this.globalMemStoreLimitLowMarkPercent = MemorySizeUtil.getGlobalMemStoreHeapLowerMark(conf, 390 true); 391 // Default value of periods to ignore is number of lookup periods 392 this.numPeriodsToIgnore = conf.getInt(NUM_PERIODS_TO_IGNORE, this.tunerLookupPeriods); 393 this.rollingStatsForCacheMisses = new RollingStatCalculator(this.tunerLookupPeriods); 394 this.rollingStatsForFlushes = new RollingStatCalculator(this.tunerLookupPeriods); 395 this.rollingStatsForEvictions = new RollingStatCalculator(this.tunerLookupPeriods); 396 this.rollingStatsForTunerSteps = new RollingStatCalculator(this.tunerLookupPeriods); 397 } 398 399 private enum StepDirection{ 400 // block cache size was increased 401 INCREASE_BLOCK_CACHE_SIZE, 402 // memstore size was increased 403 INCREASE_MEMSTORE_SIZE, 404 // no operation was performed 405 NEUTRAL 406 } 407}