001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.chaos.monkies;
019
020import org.apache.hadoop.hbase.Stoppable;
021
022/**
023 * A utility to injects faults in a running cluster.
024 * <p>
025 * ChaosMonkey defines Action's and Policy's. Actions are sequences of events, like - Select a
026 * random server to kill - Sleep for 5 sec - Start the server on the same host Actions can also be
027 * complex events, like rolling restart of all of the servers.
028 * <p>
029 * Policies on the other hand are responsible for executing the actions based on a strategy. The
030 * default policy is to execute a random action every minute based on predefined action weights.
031 * ChaosMonkey executes predefined named policies until it is stopped. More than one policy can be
032 * active at any time.
033 * <p>
034 * Chaos monkey can be run from the command line, or can be invoked from integration tests. See
035 * {@link org.apache.hadoop.hbase.IntegrationTestIngest} or other integration tests that use chaos
036 * monkey for code examples.
037 * <p>
038 * ChaosMonkey class is indeed inspired by the Netflix's same-named tool:
039 * http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html
040 */
041public abstract class ChaosMonkey implements Stoppable {
042  public abstract void start() throws Exception;
043
044  @Override
045  public abstract void stop(String why);
046
047  @Override
048  public abstract boolean isStopped();
049
050  public abstract void waitForStop() throws InterruptedException;
051
052  /**
053   * Returns whether the CM does destructive actions (killing servers) so that a cluster restore is
054   * needed after CM is stopped. Otherwise cluster will be left as it is
055   */
056  public abstract boolean isDestructive();
057}