001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.chaos.actions; 020 021import java.util.HashSet; 022import java.util.List; 023import java.util.Set; 024import org.apache.hadoop.hbase.ServerName; 025import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; 026 027/** 028 * Restarts a ratio of the running regionservers at the same time 029 */ 030public class BatchRestartRsAction extends RestartActionBaseAction { 031 float ratio; //ratio of regionservers to restart 032 033 public BatchRestartRsAction(long sleepTime, float ratio) { 034 super(sleepTime); 035 this.ratio = ratio; 036 } 037 038 @Override 039 public void perform() throws Exception { 040 LOG.info(String.format("Performing action: Batch restarting %d%% of region servers", 041 (int)(ratio * 100))); 042 List<ServerName> selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(), 043 ratio); 044 045 Set<ServerName> killedServers = new HashSet<>(); 046 047 for (ServerName server : selectedServers) { 048 // Don't keep killing servers if we're 049 // trying to stop the monkey. 050 if (context.isStopping()) { 051 break; 052 } 053 LOG.info("Killing region server:" + server); 054 cluster.killRegionServer(server); 055 killedServers.add(server); 056 } 057 058 for (ServerName server : killedServers) { 059 cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT); 060 } 061 062 LOG.info("Killed " + killedServers.size() + " region servers. Reported num of rs:" 063 + cluster.getClusterMetrics().getLiveServerMetrics().size()); 064 065 sleep(sleepTime); 066 067 for (ServerName server : killedServers) { 068 LOG.info("Starting region server:" + server.getHostname()); 069 cluster.startRegionServer(server.getHostname(), server.getPort()); 070 071 } 072 for (ServerName server : killedServers) { 073 cluster.waitForRegionServerToStart(server.getHostname(), 074 server.getPort(), 075 PolicyBasedChaosMonkey.TIMEOUT); 076 } 077 LOG.info("Started " + killedServers.size() +" region servers. Reported num of rs:" 078 + cluster.getClusterMetrics().getLiveServerMetrics().size()); 079 } 080}