001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.chaos.monkies; 019 020import org.apache.hadoop.hbase.Stoppable; 021 022/** 023 * A utility to injects faults in a running cluster. 024 * <p> 025 * ChaosMonkey defines Action's and Policy's. Actions are sequences of events, like - Select a 026 * random server to kill - Sleep for 5 sec - Start the server on the same host Actions can also be 027 * complex events, like rolling restart of all of the servers. 028 * <p> 029 * Policies on the other hand are responsible for executing the actions based on a strategy. The 030 * default policy is to execute a random action every minute based on predefined action weights. 031 * ChaosMonkey executes predefined named policies until it is stopped. More than one policy can be 032 * active at any time. 033 * <p> 034 * Chaos monkey can be run from the command line, or can be invoked from integration tests. See 035 * {@link org.apache.hadoop.hbase.IntegrationTestIngest} or other integration tests that use chaos 036 * monkey for code examples. 037 * <p> 038 * ChaosMonkey class is indeed inspired by the Netflix's same-named tool: 039 * http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html 040 */ 041public abstract class ChaosMonkey implements Stoppable { 042 public abstract void start() throws Exception; 043 044 @Override 045 public abstract void stop(String why); 046 047 @Override 048 public abstract boolean isStopped(); 049 050 public abstract void waitForStop() throws InterruptedException; 051 052 /** 053 * Returns whether the CM does destructive actions (killing servers) so that a cluster restore is 054 * needed after CM is stopped. Otherwise cluster will be left as it is 055 */ 056 public abstract boolean isDestructive(); 057}