001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.chaos.monkies; 020 021import org.apache.hadoop.hbase.Stoppable; 022 023/** 024 * A utility to injects faults in a running cluster. 025 * <p> 026 * ChaosMonkey defines Action's and Policy's. Actions are sequences of events, like 027 * - Select a random server to kill 028 * - Sleep for 5 sec 029 * - Start the server on the same host 030 * Actions can also be complex events, like rolling restart of all of the servers. 031 * <p> 032 * Policies on the other hand are responsible for executing the actions based on a strategy. 033 * The default policy is to execute a random action every minute based on predefined action 034 * weights. ChaosMonkey executes predefined named policies until it is stopped. More than one 035 * policy can be active at any time. 036 * <p> 037 * Chaos monkey can be run from the command line, or can be invoked from integration tests. 038 * See {@link org.apache.hadoop.hbase.IntegrationTestIngest} or other integration tests that use 039 * chaos monkey for code examples. 040 * <p> 041 * ChaosMonkey class is indeed inspired by the Netflix's same-named tool: 042 * http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html 043 */ 044public abstract class ChaosMonkey implements Stoppable { 045 public abstract void start() throws Exception; 046 047 @Override 048 public abstract void stop(String why); 049 050 @Override 051 public abstract boolean isStopped(); 052 053 public abstract void waitForStop() throws InterruptedException; 054 055 /** 056 * Returns whether the CM does destructive actions (killing servers) so that a cluster restore 057 * is needed after CM is stopped. Otherwise cluster will be left as it is 058 */ 059 public abstract boolean isDestructive(); 060}