001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.chaos.monkies;
020
021import org.apache.hadoop.hbase.Stoppable;
022
023/**
024 * A utility to injects faults in a running cluster.
025 * <p>
026 * ChaosMonkey defines Action's and Policy's. Actions are sequences of events, like
027 *  - Select a random server to kill
028 *  - Sleep for 5 sec
029 *  - Start the server on the same host
030 * Actions can also be complex events, like rolling restart of all of the servers.
031 * <p>
032 * Policies on the other hand are responsible for executing the actions based on a strategy.
033 * The default policy is to execute a random action every minute based on predefined action
034 * weights. ChaosMonkey executes predefined named policies until it is stopped. More than one
035 * policy can be active at any time.
036 * <p>
037 * Chaos monkey can be run from the command line, or can be invoked from integration tests.
038 * See {@link org.apache.hadoop.hbase.IntegrationTestIngest} or other integration tests that use
039 * chaos monkey for code examples.
040 * <p>
041 * ChaosMonkey class is indeed inspired by the Netflix's same-named tool:
042 * http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html
043 */
044public abstract class ChaosMonkey implements Stoppable {
045  public abstract void start() throws Exception;
046
047  @Override
048  public abstract void stop(String why);
049
050  @Override
051  public abstract boolean isStopped();
052
053  public abstract void waitForStop() throws InterruptedException;
054
055  /**
056   * Returns whether the CM does destructive actions (killing servers) so that a cluster restore
057   * is needed after CM is stopped. Otherwise cluster will be left as it is
058   */
059  public abstract boolean isDestructive();
060}