001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.chaos.factories;
019
020import java.util.Map;
021import java.util.Properties;
022import java.util.Set;
023import org.apache.hadoop.hbase.IntegrationTestingUtility;
024import org.apache.hadoop.hbase.TableName;
025import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey;
026import org.apache.hadoop.hbase.util.ReflectionUtils;
027import org.slf4j.Logger;
028import org.slf4j.LoggerFactory;
029
030import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap;
031
032/**
033 * Base class of the factory that will create a ChaosMonkey.
034 */
035public abstract class MonkeyFactory {
036  private static final Logger LOG = LoggerFactory.getLogger(MonkeyFactory.class);
037
038  protected TableName tableName;
039  protected Set<String> columnFamilies;
040  protected IntegrationTestingUtility util;
041  protected Properties properties = new Properties();
042
043  protected long action1Period;
044  protected long action2Period;
045  protected long action3Period;
046  protected long action4Period;
047  protected long moveRegionsMaxTime;
048  protected long moveRegionsSleepTime;
049  protected long moveRandomRegionSleepTime;
050  protected long restartRandomRSSleepTime;
051  protected long batchRestartRSSleepTime;
052  protected float batchRestartRSRatio;
053  protected long restartActiveMasterSleepTime;
054  protected long rollingBatchRestartRSSleepTime;
055  protected float rollingBatchRestartRSRatio;
056  protected long restartRsHoldingMetaSleepTime;
057  protected float compactTableRatio;
058  protected float compactRandomRegionRatio;
059  protected long decreaseHFileSizeSleepTime;
060  protected long decreaseHFileSizeMinHFileSize;
061  protected float decreaseHFileSizeHFileSizeJitter;
062  protected long gracefulRollingRestartTSSLeepTime;
063  protected long rollingBatchSuspendRSSleepTime;
064  protected float rollingBatchSuspendRSRatio;
065  protected long snapshotTableTtl;
066
067  protected long cpuLoadDuration;
068  protected long cpuLoadProcesses;
069  protected long networkIssueTimeout;
070  protected long networkIssueDuration;
071  protected float networkIssueRation;
072  protected long networkIssueDelay;
073  protected String networkIssueInterface;
074  protected long fillDiskTimeout;
075  protected String fillDiskPath;
076  protected long fillDiskFileSize;
077  protected long fillDiskIssueduration;
078
079  protected long restartRandomRsExceptMetaSleepTime;
080  protected long restartActiveNameNodeSleepTime;
081  protected long restartRandomDataNodeSleepTime;
082  protected long restartRandomJournalNodeSleepTime;
083  protected long restartRandomZKNodeSleepTime;
084
085  /**
086   * How often to introduce the chaos. If too frequent, sequence of kills on minicluster can cause
087   * test to fail when Put runs out of retries.
088   */
089  protected long chaosEveryMilliSec;
090  protected long waitForUnbalanceMilliSec;
091  protected long waitForKillMilliSec;
092  protected long waitAfterBalanceMilliSec;
093  protected boolean killMetaRs;
094
095  public MonkeyFactory setTableName(TableName tableName) {
096    this.tableName = tableName;
097    return this;
098  }
099
100  public MonkeyFactory setColumnFamilies(Set<String> columnFamilies) {
101    this.columnFamilies = columnFamilies;
102    return this;
103  }
104
105  public MonkeyFactory setUtil(IntegrationTestingUtility util) {
106    this.util = util;
107    return this;
108  }
109
110  public MonkeyFactory setProperties(Properties props) {
111    if (props != null) {
112      this.properties = props;
113    }
114    return this;
115  }
116
117  protected final void loadProperties() {
118    action1Period =
119      Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD,
120        MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + ""));
121    action2Period =
122      Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION2_PERIOD,
123        MonkeyConstants.DEFAULT_PERIODIC_ACTION2_PERIOD + ""));
124    action3Period =
125      Long.parseLong(this.properties.getProperty(MonkeyConstants.COMPOSITE_ACTION3_PERIOD,
126        MonkeyConstants.DEFAULT_COMPOSITE_ACTION3_PERIOD + ""));
127    action4Period =
128      Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION4_PERIOD,
129        MonkeyConstants.DEFAULT_PERIODIC_ACTION4_PERIOD + ""));
130    moveRegionsMaxTime =
131      Long.parseLong(this.properties.getProperty(MonkeyConstants.MOVE_REGIONS_MAX_TIME,
132        MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME + ""));
133    moveRegionsSleepTime =
134      Long.parseLong(this.properties.getProperty(MonkeyConstants.MOVE_REGIONS_SLEEP_TIME,
135        MonkeyConstants.DEFAULT_MOVE_REGIONS_SLEEP_TIME + ""));
136    moveRandomRegionSleepTime =
137      Long.parseLong(this.properties.getProperty(MonkeyConstants.MOVE_RANDOM_REGION_SLEEP_TIME,
138        MonkeyConstants.DEFAULT_MOVE_RANDOM_REGION_SLEEP_TIME + ""));
139    restartRandomRSSleepTime =
140      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_SLEEP_TIME,
141        MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_SLEEP_TIME + ""));
142    batchRestartRSSleepTime =
143      Long.parseLong(this.properties.getProperty(MonkeyConstants.BATCH_RESTART_RS_SLEEP_TIME,
144        MonkeyConstants.DEFAULT_BATCH_RESTART_RS_SLEEP_TIME + ""));
145    batchRestartRSRatio =
146      Float.parseFloat(this.properties.getProperty(MonkeyConstants.BATCH_RESTART_RS_RATIO,
147        MonkeyConstants.DEFAULT_BATCH_RESTART_RS_RATIO + ""));
148    restartActiveMasterSleepTime =
149      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME,
150        MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + ""));
151    rollingBatchRestartRSSleepTime = Long
152      .parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME,
153        MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + ""));
154    rollingBatchRestartRSRatio =
155      Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO,
156        MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + ""));
157    restartRsHoldingMetaSleepTime =
158      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RS_HOLDING_META_SLEEP_TIME,
159        MonkeyConstants.DEFAULT_RESTART_RS_HOLDING_META_SLEEP_TIME + ""));
160    compactTableRatio =
161      Float.parseFloat(this.properties.getProperty(MonkeyConstants.COMPACT_TABLE_ACTION_RATIO,
162        MonkeyConstants.DEFAULT_COMPACT_TABLE_ACTION_RATIO + ""));
163    compactRandomRegionRatio =
164      Float.parseFloat(this.properties.getProperty(MonkeyConstants.COMPACT_RANDOM_REGION_RATIO,
165        MonkeyConstants.DEFAULT_COMPACT_RANDOM_REGION_RATIO + ""));
166    decreaseHFileSizeSleepTime =
167      Long.parseLong(this.properties.getProperty(MonkeyConstants.DECREASE_HFILE_SIZE_SLEEP_TIME,
168        MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME + ""));
169    decreaseHFileSizeMinHFileSize =
170      Long.parseLong(this.properties.getProperty(MonkeyConstants.DECREASE_HFILE_SIZE_MIN_HFILE_SIZE,
171        MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_MIN_HFILE_SIZE + ""));
172    decreaseHFileSizeHFileSizeJitter = Float
173      .parseFloat(this.properties.getProperty(MonkeyConstants.DECREASE_HFILE_SIZE_HFILE_SIZE_JITTER,
174        MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_HFILE_SIZE_JITTER + ""));
175    gracefulRollingRestartTSSLeepTime =
176      Long.parseLong(this.properties.getProperty(MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME,
177        MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + ""));
178    rollingBatchSuspendRSSleepTime = Long
179      .parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME,
180        MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME + ""));
181    rollingBatchSuspendRSRatio =
182      Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO,
183        MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + ""));
184    snapshotTableTtl =
185      Long.parseLong(this.properties.getProperty(MonkeyConstants.SNAPSHOT_TABLE_TTL,
186        MonkeyConstants.DEFAULT_SNAPSHOT_TABLE_TTL + ""));
187
188    cpuLoadDuration = Long.parseLong(this.properties.getProperty(MonkeyConstants.CPU_LOAD_DURATION,
189      MonkeyConstants.DEFAULT_CPU_LOAD_DURATION + ""));
190    cpuLoadProcesses =
191      Long.parseLong(this.properties.getProperty(MonkeyConstants.CPU_LOAD_PROCESSES,
192        MonkeyConstants.DEFAULT_CPU_LOAD_PROCESSES + ""));
193    networkIssueTimeout =
194      Long.parseLong(this.properties.getProperty(MonkeyConstants.NETWORK_ISSUE_COMMAND_TIMEOUT,
195        MonkeyConstants.DEFAULT_NETWORK_ISSUE_COMMAND_TIMEOUT + ""));
196    networkIssueDuration =
197      Long.parseLong(this.properties.getProperty(MonkeyConstants.NETWORK_ISSUE_DURATION,
198        MonkeyConstants.DEFAULT_NETWORK_ISSUE_DURATION + ""));
199    networkIssueRation =
200      Float.parseFloat(this.properties.getProperty(MonkeyConstants.NETWORK_ISSUE_RATIO,
201        MonkeyConstants.DEFAULT_NETWORK_ISSUE_RATIO + ""));
202    networkIssueDelay =
203      Long.parseLong(this.properties.getProperty(MonkeyConstants.NETWORK_ISSUE_DELAY,
204        MonkeyConstants.DEFAULT_NETWORK_ISSUE_DELAY + ""));
205    networkIssueInterface = this.properties.getProperty(MonkeyConstants.NETWORK_ISSUE_INTERFACE,
206      MonkeyConstants.DEFAULT_NETWORK_ISSUE_INTERFACE + "");
207    fillDiskTimeout =
208      Long.parseLong(this.properties.getProperty(MonkeyConstants.FILL_DISK_COMMAND_TIMEOUT,
209        MonkeyConstants.DEFAULT_FILL_DISK_COMMAND_TIMEOUT + ""));
210    fillDiskPath = this.properties.getProperty(MonkeyConstants.FILL_DISK_PATH,
211      MonkeyConstants.DEFAULT_FILL_DISK_PATH + "");
212    fillDiskFileSize =
213      Long.parseLong(this.properties.getProperty(MonkeyConstants.FILL_DISK_FILE_SIZE,
214        MonkeyConstants.DEFAULT_FILL_DISK_FILE_SIZE + ""));
215    fillDiskIssueduration =
216      Long.parseLong(this.properties.getProperty(MonkeyConstants.FILL_DISK_ISSUE_DURATION,
217        MonkeyConstants.DEFAULT_FILL_DISK_ISSUE_DURATION + ""));
218
219    restartRandomRsExceptMetaSleepTime = Long
220      .parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME,
221        MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME + ""));
222    restartActiveNameNodeSleepTime =
223      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_NAMENODE_SLEEP_TIME,
224        MonkeyConstants.DEFAULT_RESTART_ACTIVE_NAMENODE_SLEEP_TIME + ""));
225    restartRandomDataNodeSleepTime =
226      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_DATANODE_SLEEP_TIME,
227        MonkeyConstants.DEFAULT_RESTART_RANDOM_DATANODE_SLEEP_TIME + ""));
228    restartRandomJournalNodeSleepTime = Long
229      .parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_JOURNALNODE_SLEEP_TIME,
230        MonkeyConstants.DEFAULT_RESTART_RANDOM_JOURNALNODE_SLEEP_TIME + ""));
231    restartRandomZKNodeSleepTime =
232      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_ZKNODE_SLEEP_TIME,
233        MonkeyConstants.DEFAULT_RESTART_RANDOM_ZKNODE_SLEEP_TIME + ""));
234
235    chaosEveryMilliSec =
236      Long.parseLong(this.properties.getProperty(MonkeyConstants.UNBALANCE_CHAOS_EVERY_MS,
237        MonkeyConstants.DEFAULT_UNBALANCE_CHAOS_EVERY_MS + ""));
238    waitForUnbalanceMilliSec =
239      Long.parseLong(this.properties.getProperty(MonkeyConstants.UNBALANCE_WAIT_FOR_UNBALANCE_MS,
240        MonkeyConstants.DEFAULT_UNBALANCE_WAIT_FOR_UNBALANCE_MS + ""));
241    waitForKillMilliSec =
242      Long.parseLong(this.properties.getProperty(MonkeyConstants.UNBALANCE_WAIT_FOR_KILLS_MS,
243        MonkeyConstants.DEFAULT_UNBALANCE_WAIT_FOR_KILLS_MS + ""));
244    waitAfterBalanceMilliSec =
245      Long.parseLong(this.properties.getProperty(MonkeyConstants.UNBALANCE_WAIT_AFTER_BALANCE_MS,
246        MonkeyConstants.DEFAULT_UNBALANCE_WAIT_AFTER_BALANCE_MS + ""));
247    killMetaRs =
248      Boolean.parseBoolean(this.properties.getProperty(MonkeyConstants.UNBALANCE_KILL_META_RS,
249        MonkeyConstants.DEFAULT_UNBALANCE_KILL_META_RS + ""));
250  }
251
252  public abstract ChaosMonkey build();
253
254  public static final String CALM = "calm";
255  // TODO: the name has become a misnomer since the default (not-slow) monkey has been removed
256  public static final String SLOW_DETERMINISTIC = "slowDeterministic";
257  public static final String UNBALANCE = "unbalance";
258  public static final String SERVER_KILLING = "serverKilling";
259  public static final String STRESS_AM = "stressAM";
260  public static final String NO_KILL = "noKill";
261  public static final String MASTER_KILLING = "masterKilling";
262  public static final String MOB_NO_KILL = "mobNoKill";
263  public static final String MOB_SLOW_DETERMINISTIC = "mobSlowDeterministic";
264  public static final String SERVER_AND_DEPENDENCIES_KILLING = "serverAndDependenciesKilling";
265  public static final String DISTRIBUTED_ISSUES = "distributedIssues";
266  public static final String DATA_ISSUES = "dataIssues";
267  public static final String CONFIGURABLE_SLOW_DETERMINISTIC = "configurableSlowDeterministic";
268
269  public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap
270    .<String, MonkeyFactory> builder().put(CALM, new CalmMonkeyFactory())
271    .put(SLOW_DETERMINISTIC, new SlowDeterministicMonkeyFactory())
272    .put(UNBALANCE, new UnbalanceMonkeyFactory())
273    .put(SERVER_KILLING, new ServerKillingMonkeyFactory())
274    .put(STRESS_AM, new StressAssignmentManagerMonkeyFactory())
275    .put(NO_KILL, new NoKillMonkeyFactory()).put(MASTER_KILLING, new MasterKillingMonkeyFactory())
276    .put(MOB_NO_KILL, new MobNoKillMonkeyFactory())
277    .put(MOB_SLOW_DETERMINISTIC, new MobNoKillMonkeyFactory())
278    .put(SERVER_AND_DEPENDENCIES_KILLING, new ServerAndDependenciesKillingMonkeyFactory())
279    .put(DISTRIBUTED_ISSUES, new DistributedIssuesMonkeyFactory())
280    .put(DATA_ISSUES, new DataIssuesMonkeyFactory())
281    .put(CONFIGURABLE_SLOW_DETERMINISTIC, new ConfigurableSlowDeterministicMonkeyFactory()).build();
282
283  public static MonkeyFactory getFactory(String factoryName) {
284    MonkeyFactory fact = FACTORIES.get(factoryName);
285    if (fact == null && factoryName != null && !factoryName.isEmpty()) {
286      Class<? extends MonkeyFactory> klass = null;
287      try {
288        klass = Class.forName(factoryName).asSubclass(MonkeyFactory.class);
289        if (klass != null) {
290          LOG.info("Instantiating {}", klass.getName());
291          fact = ReflectionUtils.newInstance(klass);
292        }
293      } catch (Exception e) {
294        LOG.error("Error trying to create " + factoryName + " could not load it by class name");
295        return null;
296      }
297    }
298    return fact;
299  }
300}