001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.HConstants.HFILE_BLOCK_CACHE_SIZE_KEY; 021import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.BLOCK_CACHE_SIZE_MAX_RANGE_KEY; 022import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.BLOCK_CACHE_SIZE_MIN_RANGE_KEY; 023import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.MEMSTORE_SIZE_MAX_RANGE_KEY; 024import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.MEMSTORE_SIZE_MIN_RANGE_KEY; 025 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.hbase.HConstants; 028import org.apache.hadoop.hbase.io.util.MemorySizeUtil; 029import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.TunerContext; 030import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.TunerResult; 031import org.apache.hadoop.hbase.util.RollingStatCalculator; 032import org.apache.yetus.audience.InterfaceAudience; 033import org.slf4j.Logger; 034import org.slf4j.LoggerFactory; 035 036/** 037 * The default implementation for the HeapMemoryTuner. This will do statistical checks on number of 038 * evictions, cache misses and flushes to decide whether there should be changes in the heap size of 039 * memstore/block cache. During each tuner operation tuner takes a step which can either be 040 * INCREASE_BLOCK_CACHE_SIZE (increase block cache size), INCREASE_MEMSTORE_SIZE (increase memstore 041 * size) and by default it is NEUTRAL (no change). We say block cache is sufficient when there is no 042 * block cache eviction at all or major amount of memory allocated to block cache is empty, 043 * similarly we say memory allocated for memstore is sufficient when there is no memstore flushes 044 * because of heap pressure or major amount of memory allocated to memstore is empty. If both are 045 * sufficient we do nothing, if exactly one of them is found to be sufficient we decrease its size 046 * by <i>step</i> and increase the other by same amount. If none of them is sufficient we do 047 * statistical analysis on number of cache misses and flushes to determine tuner direction. Based on 048 * these statistics we decide the tuner direction. If we are not confident about which step 049 * direction to take we do nothing and wait for next iteration. On expectation we will be tuning for 050 * at least 10% tuner calls. The number of past periods to consider for statistics calculation can 051 * be specified in config by <i>hbase.regionserver.heapmemory.autotuner.lookup.periods</i>. Also 052 * these many initial calls to tuner will be ignored (cache is warming up and we leave the system to 053 * reach steady state). After the tuner takes a step, in next call we insure that last call was 054 * indeed helpful and did not do us any harm. If not then we revert the previous step. The step size 055 * is dynamic and it changes based on current and past few tuning directions and their step sizes. 056 * We maintain a parameter <i>decayingAvgTunerStepSize</i> which is sum of past tuner steps with 057 * sign(positive for increase in memstore and negative for increase in block cache). But rather than 058 * simple sum it is calculated by giving more priority to the recent tuning steps. When last few 059 * tuner steps were NETURAL then we assume we are restarting the tuning process and step size is 060 * updated to maximum allowed size which can be specified in config by 061 * <i>hbase.regionserver.heapmemory.autotuner.step.max</i>. If in a particular tuning operation the 062 * step direction is opposite to what indicated by <i>decayingTunerStepSizeSum</i> we decrease the 063 * step size by half. Step size does not change in other tuning operations. When step size gets 064 * below a certain threshold then the following tuner operations are considered to be neutral. The 065 * minimum step size can be specified in config by 066 * <i>hbase.regionserver.heapmemory.autotuner.step.min</i>. 067 */ 068@InterfaceAudience.Private 069class DefaultHeapMemoryTuner implements HeapMemoryTuner { 070 public static final String MAX_STEP_KEY = "hbase.regionserver.heapmemory.autotuner.step.max"; 071 public static final String MIN_STEP_KEY = "hbase.regionserver.heapmemory.autotuner.step.min"; 072 public static final String SUFFICIENT_MEMORY_LEVEL_KEY = 073 "hbase.regionserver.heapmemory.autotuner.sufficient.memory.level"; 074 public static final String LOOKUP_PERIODS_KEY = 075 "hbase.regionserver.heapmemory.autotuner.lookup.periods"; 076 public static final String NUM_PERIODS_TO_IGNORE = 077 "hbase.regionserver.heapmemory.autotuner.ignored.periods"; 078 // Maximum step size that the tuner can take 079 public static final float DEFAULT_MAX_STEP_VALUE = 0.04f; // 4% 080 // Minimum step size that the tuner can take 081 public static final float DEFAULT_MIN_STEP_VALUE = 0.00125f; // 0.125% 082 // If current block cache size or memstore size in use is below this level relative to memory 083 // provided to it then corresponding component will be considered to have sufficient memory 084 public static final float DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE = 0.5f; // 50% 085 // Number of tuner periods that will be considered while calculating mean and deviation 086 // If set to zero, all stats will be calculated from the start 087 public static final int DEFAULT_LOOKUP_PERIODS = 60; 088 public static final int DEFAULT_NUM_PERIODS_IGNORED = 60; 089 private static final TunerResult NO_OP_TUNER_RESULT = new TunerResult(false); 090 // If deviation of tuner step size gets below this value then it means past few periods were 091 // NEUTRAL(given that last tuner period was also NEUTRAL). 092 private static final double TUNER_STEP_EPS = 1e-6; 093 094 private Logger LOG = LoggerFactory.getLogger(DefaultHeapMemoryTuner.class); 095 private TunerResult TUNER_RESULT = new TunerResult(true); 096 private Configuration conf; 097 private float sufficientMemoryLevel = DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE; 098 private float maximumStepSize = DEFAULT_MAX_STEP_VALUE; 099 private float minimumStepSize = DEFAULT_MIN_STEP_VALUE; 100 private int tunerLookupPeriods = DEFAULT_LOOKUP_PERIODS; 101 private int numPeriodsToIgnore = DEFAULT_NUM_PERIODS_IGNORED; 102 // Counter to ignore few initial periods while cache is still warming up 103 // Memory tuner will do no operation for the first "tunerLookupPeriods" 104 private int ignoreInitialPeriods = 0; 105 106 private float globalMemStorePercentMinRange; 107 private float globalMemStorePercentMaxRange; 108 private float blockCachePercentMinRange; 109 private float blockCachePercentMaxRange; 110 111 private float globalMemStoreLimitLowMarkPercent; 112 113 // Store statistics about the corresponding parameters for memory tuning 114 private RollingStatCalculator rollingStatsForCacheMisses; 115 private RollingStatCalculator rollingStatsForFlushes; 116 private RollingStatCalculator rollingStatsForEvictions; 117 private RollingStatCalculator rollingStatsForTunerSteps; 118 // Set step size to max value for tuning, this step size will adjust dynamically while tuning 119 private float step = DEFAULT_MAX_STEP_VALUE; 120 private StepDirection prevTuneDirection = StepDirection.NEUTRAL; 121 // positive means memstore's size was increased 122 // It is not just arithmetic sum of past tuner periods. More priority is given to recent 123 // tuning steps. 124 private double decayingTunerStepSizeSum = 0; 125 126 @Override 127 public TunerResult tune(TunerContext context) { 128 float curMemstoreSize = context.getCurMemStoreSize(); 129 float curBlockCacheSize = context.getCurBlockCacheSize(); 130 addToRollingStats(context); 131 132 if (ignoreInitialPeriods < numPeriodsToIgnore) { 133 // Ignoring the first few tuner periods 134 ignoreInitialPeriods++; 135 rollingStatsForTunerSteps.insertDataValue(0); 136 LOG.info("Ignoring initial tuning periods: {} so far, {} to ignore", ignoreInitialPeriods, 137 numPeriodsToIgnore); 138 return NO_OP_TUNER_RESULT; 139 } 140 StepDirection newTuneDirection = getTuneDirection(context); 141 142 long blockedFlushCount = context.getBlockedFlushCount(); 143 long unblockedFlushCount = context.getUnblockedFlushCount(); 144 long totalOnheapFlushCount = blockedFlushCount + unblockedFlushCount; 145 boolean offheapMemstore = context.isOffheapMemStore(); 146 float newMemstoreSize; 147 float newBlockCacheSize; 148 149 // Adjusting step size for tuning to get to steady state or restart from steady state. 150 // Even if the step size was 4% and 32 GB memory size, we will be shifting 1 GB back and forth 151 // per tuner operation and it can affect the performance of cluster so we keep on decreasing 152 // step size until everything settles. 153 if ( 154 prevTuneDirection == StepDirection.NEUTRAL && newTuneDirection != StepDirection.NEUTRAL 155 && rollingStatsForTunerSteps.getDeviation() < TUNER_STEP_EPS 156 ) { 157 // Restarting the tuning from steady state and setting step size to maximum. 158 // The deviation cannot be that low if last period was neutral and some recent periods were 159 // not neutral. 160 step = maximumStepSize; 161 } else if ( 162 (newTuneDirection == StepDirection.INCREASE_MEMSTORE_SIZE && decayingTunerStepSizeSum < 0) 163 || (newTuneDirection == StepDirection.INCREASE_BLOCK_CACHE_SIZE 164 && decayingTunerStepSizeSum > 0) 165 ) { 166 // Current step is opposite of past tuner actions so decrease the step size to reach steady 167 // state. 168 if (!offheapMemstore && step != minimumStepSize) { 169 // we leave the step to be at minimumStepSize for offheap memstore 170 step = step / 2.00f; 171 } 172 } 173 if (step < minimumStepSize) { 174 // If step size is too small then we do nothing. 175 LOG.debug("Tuner step size is too low; we will not perform any tuning this time."); 176 step = 0.0f; 177 newTuneDirection = StepDirection.NEUTRAL; 178 } 179 // There are no flushes due to onheap pressure and 180 // we have an offheap memstore and we are in need of more block_cache size. 181 if ( 182 totalOnheapFlushCount == 0 && offheapMemstore 183 && newTuneDirection == StepDirection.INCREASE_BLOCK_CACHE_SIZE 184 ) { 185 // we are sure that there are flushes only due to offheap pressure 186 // So don't do the memstore decrease equal to the step size. Instead do minimum stepSize 187 // decrease. But even if we have some flushes due to heap then it is better we tune 188 // the existing way. 189 step = minimumStepSize; 190 } 191 // Increase / decrease the memstore / block cache sizes depending on new tuner step. 192 // We don't want to exert immediate pressure on memstore. So, we decrease its size gracefully; 193 // we set a minimum bar in the middle of the total memstore size and the lower limit. 194 float minMemstoreSize = ((globalMemStoreLimitLowMarkPercent + 1) * curMemstoreSize) / 2.00f; 195 196 switch (newTuneDirection) { 197 case INCREASE_BLOCK_CACHE_SIZE: 198 if (curMemstoreSize - step < minMemstoreSize) { 199 step = curMemstoreSize - minMemstoreSize; 200 } 201 newMemstoreSize = curMemstoreSize - step; 202 newBlockCacheSize = curBlockCacheSize + step; 203 rollingStatsForTunerSteps.insertDataValue(-(int) (step * 100000)); 204 decayingTunerStepSizeSum = (decayingTunerStepSizeSum - step) / 2.00f; 205 break; 206 case INCREASE_MEMSTORE_SIZE: 207 newBlockCacheSize = curBlockCacheSize - step; 208 newMemstoreSize = curMemstoreSize + step; 209 rollingStatsForTunerSteps.insertDataValue((int) (step * 100000)); 210 decayingTunerStepSizeSum = (decayingTunerStepSizeSum + step) / 2.00f; 211 break; 212 default: 213 prevTuneDirection = StepDirection.NEUTRAL; 214 rollingStatsForTunerSteps.insertDataValue(0); 215 decayingTunerStepSizeSum = (decayingTunerStepSizeSum) / 2.00f; 216 return NO_OP_TUNER_RESULT; 217 } 218 // Check we are within max/min bounds. 219 if (newMemstoreSize > globalMemStorePercentMaxRange) { 220 newMemstoreSize = globalMemStorePercentMaxRange; 221 } else if (newMemstoreSize < globalMemStorePercentMinRange) { 222 newMemstoreSize = globalMemStorePercentMinRange; 223 } 224 if (newBlockCacheSize > blockCachePercentMaxRange) { 225 newBlockCacheSize = blockCachePercentMaxRange; 226 } else if (newBlockCacheSize < blockCachePercentMinRange) { 227 newBlockCacheSize = blockCachePercentMinRange; 228 } 229 TUNER_RESULT.setBlockCacheSize(newBlockCacheSize); 230 TUNER_RESULT.setMemStoreSize(newMemstoreSize); 231 prevTuneDirection = newTuneDirection; 232 return TUNER_RESULT; 233 } 234 235 /** 236 * Determine best direction of tuning base on given context. 237 * @param context The tuner context. 238 * @return tuning direction. 239 */ 240 private StepDirection getTuneDirection(TunerContext context) { 241 StepDirection newTuneDirection = StepDirection.NEUTRAL; 242 long blockedFlushCount = context.getBlockedFlushCount(); 243 long unblockedFlushCount = context.getUnblockedFlushCount(); 244 long evictCount = context.getEvictCount(); 245 long cacheMissCount = context.getCacheMissCount(); 246 long totalFlushCount = blockedFlushCount + unblockedFlushCount; 247 float curMemstoreSize = context.getCurMemStoreSize(); 248 float curBlockCacheSize = context.getCurBlockCacheSize(); 249 StringBuilder tunerLog = new StringBuilder(); 250 // We can consider memstore or block cache to be sufficient if 251 // we are using only a minor fraction of what have been already provided to it. 252 boolean earlyMemstoreSufficientCheck = totalFlushCount == 0 253 || context.getCurMemStoreUsed() < curMemstoreSize * sufficientMemoryLevel; 254 boolean earlyBlockCacheSufficientCheck = 255 evictCount == 0 || context.getCurBlockCacheUsed() < curBlockCacheSize * sufficientMemoryLevel; 256 if (earlyMemstoreSufficientCheck && earlyBlockCacheSufficientCheck) { 257 // Both memstore and block cache memory seems to be sufficient. No operation required. 258 newTuneDirection = StepDirection.NEUTRAL; 259 tunerLog.append("Going to do nothing because no changes are needed."); 260 } else if (earlyMemstoreSufficientCheck) { 261 // Increase the block cache size and corresponding decrease in memstore size. 262 newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE; 263 tunerLog.append("Going to increase the block cache size."); 264 } else if (earlyBlockCacheSufficientCheck) { 265 // Increase the memstore size and corresponding decrease in block cache size. 266 newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE; 267 tunerLog.append("Going to increase the memstore size."); 268 } else { 269 // Early checks for sufficient memory failed. Tuning memory based on past statistics. 270 // Boolean indicator to show if we need to revert previous step or not. 271 boolean isReverting = false; 272 switch (prevTuneDirection) { 273 // Here we are using number of evictions rather than cache misses because it is more 274 // strong indicator for deficient cache size. Improving caching is what we 275 // would like to optimize for in steady state. 276 case INCREASE_BLOCK_CACHE_SIZE: 277 if ( 278 (double) evictCount > rollingStatsForEvictions.getMean() || (double) totalFlushCount 279 > rollingStatsForFlushes.getMean() + rollingStatsForFlushes.getDeviation() / 2.00 280 ) { 281 // Reverting previous step as it was not useful. 282 // Tuning failed to decrease evictions or tuning resulted in large number of flushes. 283 newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE; 284 tunerLog.append("We will revert previous tuning"); 285 if ((double) evictCount > rollingStatsForEvictions.getMean()) { 286 tunerLog.append(" because we could not decrease evictions sufficiently."); 287 } else { 288 tunerLog.append(" because the number of flushes rose significantly."); 289 } 290 isReverting = true; 291 } 292 break; 293 case INCREASE_MEMSTORE_SIZE: 294 if ( 295 (double) totalFlushCount > rollingStatsForFlushes.getMean() 296 || (double) evictCount > rollingStatsForEvictions.getMean() 297 + rollingStatsForEvictions.getDeviation() / 2.00 298 ) { 299 // Reverting previous step as it was not useful. 300 // Tuning failed to decrease flushes or tuning resulted in large number of evictions. 301 newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE; 302 tunerLog.append("We will revert previous tuning"); 303 if ((double) totalFlushCount > rollingStatsForFlushes.getMean()) { 304 tunerLog.append(" because we could not decrease flushes sufficiently."); 305 } else { 306 tunerLog.append(" because number of evictions rose significantly."); 307 } 308 isReverting = true; 309 } 310 break; 311 default: 312 // Last step was neutral, revert doesn't not apply here. 313 break; 314 } 315 // If we are not reverting. We try to tune memory sizes by looking at cache misses / flushes. 316 if (!isReverting) { 317 // mean +- deviation*0.8 is considered to be normal 318 // below it its consider low and above it is considered high. 319 // We can safely assume that the number cache misses, flushes are normally distributed over 320 // past periods and hence on all the above mentioned classes (normal, high and low) 321 // are likely to occur with probability 56%, 22%, 22% respectively. Hence there is at 322 // least ~10% probability that we will not fall in NEUTRAL step. 323 // This optimization solution is feedback based and we revert when we 324 // dont find our steps helpful. Hence we want to do tuning only when we have clear 325 // indications because too many unnecessary tuning may affect the performance of cluster. 326 if ( 327 (double) cacheMissCount 328 < rollingStatsForCacheMisses.getMean() 329 - rollingStatsForCacheMisses.getDeviation() * 0.80 330 && (double) totalFlushCount 331 < rollingStatsForFlushes.getMean() - rollingStatsForFlushes.getDeviation() * 0.80 332 ) { 333 // Everything is fine no tuning required 334 newTuneDirection = StepDirection.NEUTRAL; 335 } else if ( 336 (double) cacheMissCount 337 > rollingStatsForCacheMisses.getMean() 338 + rollingStatsForCacheMisses.getDeviation() * 0.80 339 && (double) totalFlushCount 340 < rollingStatsForFlushes.getMean() - rollingStatsForFlushes.getDeviation() * 0.80 341 ) { 342 // more misses , increasing cache size 343 newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE; 344 tunerLog.append( 345 "Going to increase block cache size due to increase in number of cache misses."); 346 } else if ( 347 (double) cacheMissCount 348 < rollingStatsForCacheMisses.getMean() 349 - rollingStatsForCacheMisses.getDeviation() * 0.80 350 && (double) totalFlushCount 351 > rollingStatsForFlushes.getMean() + rollingStatsForFlushes.getDeviation() * 0.80 352 ) { 353 // more flushes , increasing memstore size 354 newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE; 355 tunerLog.append("Going to increase memstore size due to increase in number of flushes."); 356 } else if (blockedFlushCount > 0 && prevTuneDirection == StepDirection.NEUTRAL) { 357 // we do not want blocked flushes 358 newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE; 359 tunerLog.append( 360 "Going to increase memstore size due to" + blockedFlushCount + " blocked flushes."); 361 } else { 362 // Default. Not enough facts to do tuning. 363 tunerLog.append( 364 "Going to do nothing because we " + "could not determine best tuning direction"); 365 newTuneDirection = StepDirection.NEUTRAL; 366 } 367 } 368 } 369 // Log NEUTRAL decisions at DEBUG, because they are the most frequent and not that interesting. 370 // Log other decisions at INFO because they are making meaningful operational changes. 371 switch (newTuneDirection) { 372 case NEUTRAL: 373 if (LOG.isDebugEnabled()) { 374 LOG.debug(tunerLog.toString()); 375 } 376 break; 377 default: 378 LOG.info(tunerLog.toString()); 379 break; 380 } 381 return newTuneDirection; 382 } 383 384 /** 385 * Add the given context to the rolling tuner stats. 386 * @param context The tuner context. 387 */ 388 private void addToRollingStats(TunerContext context) { 389 rollingStatsForCacheMisses.insertDataValue(context.getCacheMissCount()); 390 rollingStatsForFlushes 391 .insertDataValue(context.getBlockedFlushCount() + context.getUnblockedFlushCount()); 392 rollingStatsForEvictions.insertDataValue(context.getEvictCount()); 393 } 394 395 @Override 396 public Configuration getConf() { 397 return this.conf; 398 } 399 400 @Override 401 public void setConf(Configuration conf) { 402 this.conf = conf; 403 this.maximumStepSize = conf.getFloat(MAX_STEP_KEY, DEFAULT_MAX_STEP_VALUE); 404 this.minimumStepSize = conf.getFloat(MIN_STEP_KEY, DEFAULT_MIN_STEP_VALUE); 405 this.step = this.maximumStepSize; 406 this.sufficientMemoryLevel = 407 conf.getFloat(SUFFICIENT_MEMORY_LEVEL_KEY, DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE); 408 this.tunerLookupPeriods = conf.getInt(LOOKUP_PERIODS_KEY, DEFAULT_LOOKUP_PERIODS); 409 this.blockCachePercentMinRange = conf.getFloat(BLOCK_CACHE_SIZE_MIN_RANGE_KEY, 410 conf.getFloat(HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT)); 411 this.blockCachePercentMaxRange = conf.getFloat(BLOCK_CACHE_SIZE_MAX_RANGE_KEY, 412 conf.getFloat(HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT)); 413 this.globalMemStorePercentMinRange = conf.getFloat(MEMSTORE_SIZE_MIN_RANGE_KEY, 414 MemorySizeUtil.getGlobalMemStoreHeapPercent(conf, false)); 415 this.globalMemStorePercentMaxRange = conf.getFloat(MEMSTORE_SIZE_MAX_RANGE_KEY, 416 MemorySizeUtil.getGlobalMemStoreHeapPercent(conf, false)); 417 this.globalMemStoreLimitLowMarkPercent = 418 MemorySizeUtil.getGlobalMemStoreHeapLowerMark(conf, true); 419 // Default value of periods to ignore is number of lookup periods 420 this.numPeriodsToIgnore = conf.getInt(NUM_PERIODS_TO_IGNORE, this.tunerLookupPeriods); 421 this.rollingStatsForCacheMisses = new RollingStatCalculator(this.tunerLookupPeriods); 422 this.rollingStatsForFlushes = new RollingStatCalculator(this.tunerLookupPeriods); 423 this.rollingStatsForEvictions = new RollingStatCalculator(this.tunerLookupPeriods); 424 this.rollingStatsForTunerSteps = new RollingStatCalculator(this.tunerLookupPeriods); 425 } 426 427 private enum StepDirection { 428 // block cache size was increased 429 INCREASE_BLOCK_CACHE_SIZE, 430 // memstore size was increased 431 INCREASE_MEMSTORE_SIZE, 432 // no operation was performed 433 NEUTRAL 434 } 435}