001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.chaos.factories; 019 020import java.util.Map; 021import java.util.Properties; 022import java.util.Set; 023import org.apache.hadoop.hbase.IntegrationTestingUtility; 024import org.apache.hadoop.hbase.TableName; 025import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; 026import org.apache.hadoop.hbase.util.ReflectionUtils; 027import org.slf4j.Logger; 028import org.slf4j.LoggerFactory; 029 030import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap; 031 032/** 033 * Base class of the factory that will create a ChaosMonkey. 034 */ 035public abstract class MonkeyFactory { 036 private static final Logger LOG = LoggerFactory.getLogger(MonkeyFactory.class); 037 038 protected TableName tableName; 039 protected Set<String> columnFamilies; 040 protected IntegrationTestingUtility util; 041 protected Properties properties = new Properties(); 042 043 protected long action1Period; 044 protected long action2Period; 045 protected long action3Period; 046 protected long action4Period; 047 protected long moveRegionsMaxTime; 048 protected long moveRegionsSleepTime; 049 protected long moveRandomRegionSleepTime; 050 protected long restartRandomRSSleepTime; 051 protected long batchRestartRSSleepTime; 052 protected float batchRestartRSRatio; 053 protected long restartActiveMasterSleepTime; 054 protected long rollingBatchRestartRSSleepTime; 055 protected float rollingBatchRestartRSRatio; 056 protected long restartRsHoldingMetaSleepTime; 057 protected float compactTableRatio; 058 protected float compactRandomRegionRatio; 059 protected long decreaseHFileSizeSleepTime; 060 protected long decreaseHFileSizeMinHFileSize; 061 protected float decreaseHFileSizeHFileSizeJitter; 062 protected long gracefulRollingRestartTSSLeepTime; 063 protected long rollingBatchSuspendRSSleepTime; 064 protected float rollingBatchSuspendRSRatio; 065 protected long snapshotTableTtl; 066 067 protected long cpuLoadDuration; 068 protected long cpuLoadProcesses; 069 protected long networkIssueTimeout; 070 protected long networkIssueDuration; 071 protected float networkIssueRation; 072 protected long networkIssueDelay; 073 protected String networkIssueInterface; 074 protected long fillDiskTimeout; 075 protected String fillDiskPath; 076 protected long fillDiskFileSize; 077 protected long fillDiskIssueduration; 078 079 protected long restartRandomRsExceptMetaSleepTime; 080 protected long restartActiveNameNodeSleepTime; 081 protected long restartRandomDataNodeSleepTime; 082 protected long restartRandomJournalNodeSleepTime; 083 protected long restartRandomZKNodeSleepTime; 084 085 /** 086 * How often to introduce the chaos. If too frequent, sequence of kills on minicluster can cause 087 * test to fail when Put runs out of retries. 088 */ 089 protected long chaosEveryMilliSec; 090 protected long waitForUnbalanceMilliSec; 091 protected long waitForKillMilliSec; 092 protected long waitAfterBalanceMilliSec; 093 protected boolean killMetaRs; 094 095 public MonkeyFactory setTableName(TableName tableName) { 096 this.tableName = tableName; 097 return this; 098 } 099 100 public MonkeyFactory setColumnFamilies(Set<String> columnFamilies) { 101 this.columnFamilies = columnFamilies; 102 return this; 103 } 104 105 public MonkeyFactory setUtil(IntegrationTestingUtility util) { 106 this.util = util; 107 return this; 108 } 109 110 public MonkeyFactory setProperties(Properties props) { 111 if (props != null) { 112 this.properties = props; 113 } 114 return this; 115 } 116 117 protected final void loadProperties() { 118 action1Period = 119 Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD, 120 MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + "")); 121 action2Period = 122 Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION2_PERIOD, 123 MonkeyConstants.DEFAULT_PERIODIC_ACTION2_PERIOD + "")); 124 action3Period = 125 Long.parseLong(this.properties.getProperty(MonkeyConstants.COMPOSITE_ACTION3_PERIOD, 126 MonkeyConstants.DEFAULT_COMPOSITE_ACTION3_PERIOD + "")); 127 action4Period = 128 Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION4_PERIOD, 129 MonkeyConstants.DEFAULT_PERIODIC_ACTION4_PERIOD + "")); 130 moveRegionsMaxTime = 131 Long.parseLong(this.properties.getProperty(MonkeyConstants.MOVE_REGIONS_MAX_TIME, 132 MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME + "")); 133 moveRegionsSleepTime = 134 Long.parseLong(this.properties.getProperty(MonkeyConstants.MOVE_REGIONS_SLEEP_TIME, 135 MonkeyConstants.DEFAULT_MOVE_REGIONS_SLEEP_TIME + "")); 136 moveRandomRegionSleepTime = 137 Long.parseLong(this.properties.getProperty(MonkeyConstants.MOVE_RANDOM_REGION_SLEEP_TIME, 138 MonkeyConstants.DEFAULT_MOVE_RANDOM_REGION_SLEEP_TIME + "")); 139 restartRandomRSSleepTime = 140 Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_SLEEP_TIME, 141 MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_SLEEP_TIME + "")); 142 batchRestartRSSleepTime = 143 Long.parseLong(this.properties.getProperty(MonkeyConstants.BATCH_RESTART_RS_SLEEP_TIME, 144 MonkeyConstants.DEFAULT_BATCH_RESTART_RS_SLEEP_TIME + "")); 145 batchRestartRSRatio = 146 Float.parseFloat(this.properties.getProperty(MonkeyConstants.BATCH_RESTART_RS_RATIO, 147 MonkeyConstants.DEFAULT_BATCH_RESTART_RS_RATIO + "")); 148 restartActiveMasterSleepTime = 149 Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME, 150 MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + "")); 151 rollingBatchRestartRSSleepTime = Long 152 .parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME, 153 MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + "")); 154 rollingBatchRestartRSRatio = 155 Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO, 156 MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + "")); 157 restartRsHoldingMetaSleepTime = 158 Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RS_HOLDING_META_SLEEP_TIME, 159 MonkeyConstants.DEFAULT_RESTART_RS_HOLDING_META_SLEEP_TIME + "")); 160 compactTableRatio = 161 Float.parseFloat(this.properties.getProperty(MonkeyConstants.COMPACT_TABLE_ACTION_RATIO, 162 MonkeyConstants.DEFAULT_COMPACT_TABLE_ACTION_RATIO + "")); 163 compactRandomRegionRatio = 164 Float.parseFloat(this.properties.getProperty(MonkeyConstants.COMPACT_RANDOM_REGION_RATIO, 165 MonkeyConstants.DEFAULT_COMPACT_RANDOM_REGION_RATIO + "")); 166 decreaseHFileSizeSleepTime = 167 Long.parseLong(this.properties.getProperty(MonkeyConstants.DECREASE_HFILE_SIZE_SLEEP_TIME, 168 MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME + "")); 169 decreaseHFileSizeMinHFileSize = 170 Long.parseLong(this.properties.getProperty(MonkeyConstants.DECREASE_HFILE_SIZE_MIN_HFILE_SIZE, 171 MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_MIN_HFILE_SIZE + "")); 172 decreaseHFileSizeHFileSizeJitter = Float 173 .parseFloat(this.properties.getProperty(MonkeyConstants.DECREASE_HFILE_SIZE_HFILE_SIZE_JITTER, 174 MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_HFILE_SIZE_JITTER + "")); 175 gracefulRollingRestartTSSLeepTime = 176 Long.parseLong(this.properties.getProperty(MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME, 177 MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + "")); 178 rollingBatchSuspendRSSleepTime = Long 179 .parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME, 180 MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME + "")); 181 rollingBatchSuspendRSRatio = 182 Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO, 183 MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + "")); 184 snapshotTableTtl = 185 Long.parseLong(this.properties.getProperty(MonkeyConstants.SNAPSHOT_TABLE_TTL, 186 MonkeyConstants.DEFAULT_SNAPSHOT_TABLE_TTL + "")); 187 188 cpuLoadDuration = Long.parseLong(this.properties.getProperty(MonkeyConstants.CPU_LOAD_DURATION, 189 MonkeyConstants.DEFAULT_CPU_LOAD_DURATION + "")); 190 cpuLoadProcesses = 191 Long.parseLong(this.properties.getProperty(MonkeyConstants.CPU_LOAD_PROCESSES, 192 MonkeyConstants.DEFAULT_CPU_LOAD_PROCESSES + "")); 193 networkIssueTimeout = 194 Long.parseLong(this.properties.getProperty(MonkeyConstants.NETWORK_ISSUE_COMMAND_TIMEOUT, 195 MonkeyConstants.DEFAULT_NETWORK_ISSUE_COMMAND_TIMEOUT + "")); 196 networkIssueDuration = 197 Long.parseLong(this.properties.getProperty(MonkeyConstants.NETWORK_ISSUE_DURATION, 198 MonkeyConstants.DEFAULT_NETWORK_ISSUE_DURATION + "")); 199 networkIssueRation = 200 Float.parseFloat(this.properties.getProperty(MonkeyConstants.NETWORK_ISSUE_RATIO, 201 MonkeyConstants.DEFAULT_NETWORK_ISSUE_RATIO + "")); 202 networkIssueDelay = 203 Long.parseLong(this.properties.getProperty(MonkeyConstants.NETWORK_ISSUE_DELAY, 204 MonkeyConstants.DEFAULT_NETWORK_ISSUE_DELAY + "")); 205 networkIssueInterface = this.properties.getProperty(MonkeyConstants.NETWORK_ISSUE_INTERFACE, 206 MonkeyConstants.DEFAULT_NETWORK_ISSUE_INTERFACE + ""); 207 fillDiskTimeout = 208 Long.parseLong(this.properties.getProperty(MonkeyConstants.FILL_DISK_COMMAND_TIMEOUT, 209 MonkeyConstants.DEFAULT_FILL_DISK_COMMAND_TIMEOUT + "")); 210 fillDiskPath = this.properties.getProperty(MonkeyConstants.FILL_DISK_PATH, 211 MonkeyConstants.DEFAULT_FILL_DISK_PATH + ""); 212 fillDiskFileSize = 213 Long.parseLong(this.properties.getProperty(MonkeyConstants.FILL_DISK_FILE_SIZE, 214 MonkeyConstants.DEFAULT_FILL_DISK_FILE_SIZE + "")); 215 fillDiskIssueduration = 216 Long.parseLong(this.properties.getProperty(MonkeyConstants.FILL_DISK_ISSUE_DURATION, 217 MonkeyConstants.DEFAULT_FILL_DISK_ISSUE_DURATION + "")); 218 219 restartRandomRsExceptMetaSleepTime = Long 220 .parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME, 221 MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME + "")); 222 restartActiveNameNodeSleepTime = 223 Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_NAMENODE_SLEEP_TIME, 224 MonkeyConstants.DEFAULT_RESTART_ACTIVE_NAMENODE_SLEEP_TIME + "")); 225 restartRandomDataNodeSleepTime = 226 Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_DATANODE_SLEEP_TIME, 227 MonkeyConstants.DEFAULT_RESTART_RANDOM_DATANODE_SLEEP_TIME + "")); 228 restartRandomJournalNodeSleepTime = Long 229 .parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_JOURNALNODE_SLEEP_TIME, 230 MonkeyConstants.DEFAULT_RESTART_RANDOM_JOURNALNODE_SLEEP_TIME + "")); 231 restartRandomZKNodeSleepTime = 232 Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_ZKNODE_SLEEP_TIME, 233 MonkeyConstants.DEFAULT_RESTART_RANDOM_ZKNODE_SLEEP_TIME + "")); 234 235 chaosEveryMilliSec = 236 Long.parseLong(this.properties.getProperty(MonkeyConstants.UNBALANCE_CHAOS_EVERY_MS, 237 MonkeyConstants.DEFAULT_UNBALANCE_CHAOS_EVERY_MS + "")); 238 waitForUnbalanceMilliSec = 239 Long.parseLong(this.properties.getProperty(MonkeyConstants.UNBALANCE_WAIT_FOR_UNBALANCE_MS, 240 MonkeyConstants.DEFAULT_UNBALANCE_WAIT_FOR_UNBALANCE_MS + "")); 241 waitForKillMilliSec = 242 Long.parseLong(this.properties.getProperty(MonkeyConstants.UNBALANCE_WAIT_FOR_KILLS_MS, 243 MonkeyConstants.DEFAULT_UNBALANCE_WAIT_FOR_KILLS_MS + "")); 244 waitAfterBalanceMilliSec = 245 Long.parseLong(this.properties.getProperty(MonkeyConstants.UNBALANCE_WAIT_AFTER_BALANCE_MS, 246 MonkeyConstants.DEFAULT_UNBALANCE_WAIT_AFTER_BALANCE_MS + "")); 247 killMetaRs = 248 Boolean.parseBoolean(this.properties.getProperty(MonkeyConstants.UNBALANCE_KILL_META_RS, 249 MonkeyConstants.DEFAULT_UNBALANCE_KILL_META_RS + "")); 250 } 251 252 public abstract ChaosMonkey build(); 253 254 public static final String CALM = "calm"; 255 // TODO: the name has become a misnomer since the default (not-slow) monkey has been removed 256 public static final String SLOW_DETERMINISTIC = "slowDeterministic"; 257 public static final String UNBALANCE = "unbalance"; 258 public static final String SERVER_KILLING = "serverKilling"; 259 public static final String STRESS_AM = "stressAM"; 260 public static final String NO_KILL = "noKill"; 261 public static final String MASTER_KILLING = "masterKilling"; 262 public static final String MOB_NO_KILL = "mobNoKill"; 263 public static final String MOB_SLOW_DETERMINISTIC = "mobSlowDeterministic"; 264 public static final String SERVER_AND_DEPENDENCIES_KILLING = "serverAndDependenciesKilling"; 265 public static final String DISTRIBUTED_ISSUES = "distributedIssues"; 266 public static final String DATA_ISSUES = "dataIssues"; 267 public static final String CONFIGURABLE_SLOW_DETERMINISTIC = "configurableSlowDeterministic"; 268 269 public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap 270 .<String, MonkeyFactory> builder().put(CALM, new CalmMonkeyFactory()) 271 .put(SLOW_DETERMINISTIC, new SlowDeterministicMonkeyFactory()) 272 .put(UNBALANCE, new UnbalanceMonkeyFactory()) 273 .put(SERVER_KILLING, new ServerKillingMonkeyFactory()) 274 .put(STRESS_AM, new StressAssignmentManagerMonkeyFactory()) 275 .put(NO_KILL, new NoKillMonkeyFactory()).put(MASTER_KILLING, new MasterKillingMonkeyFactory()) 276 .put(MOB_NO_KILL, new MobNoKillMonkeyFactory()) 277 .put(MOB_SLOW_DETERMINISTIC, new MobNoKillMonkeyFactory()) 278 .put(SERVER_AND_DEPENDENCIES_KILLING, new ServerAndDependenciesKillingMonkeyFactory()) 279 .put(DISTRIBUTED_ISSUES, new DistributedIssuesMonkeyFactory()) 280 .put(DATA_ISSUES, new DataIssuesMonkeyFactory()) 281 .put(CONFIGURABLE_SLOW_DETERMINISTIC, new ConfigurableSlowDeterministicMonkeyFactory()).build(); 282 283 public static MonkeyFactory getFactory(String factoryName) { 284 MonkeyFactory fact = FACTORIES.get(factoryName); 285 if (fact == null && factoryName != null && !factoryName.isEmpty()) { 286 Class<? extends MonkeyFactory> klass = null; 287 try { 288 klass = Class.forName(factoryName).asSubclass(MonkeyFactory.class); 289 if (klass != null) { 290 LOG.info("Instantiating {}", klass.getName()); 291 fact = ReflectionUtils.newInstance(klass); 292 } 293 } catch (Exception e) { 294 LOG.error("Error trying to create " + factoryName + " could not load it by class name"); 295 return null; 296 } 297 } 298 return fact; 299 } 300}