001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.rsgroup; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022 023import org.apache.hadoop.hbase.HBaseClassTestRule; 024import org.apache.hadoop.hbase.HBaseClusterInterface; 025import org.apache.hadoop.hbase.HBaseTestingUtil; 026import org.apache.hadoop.hbase.SingleProcessHBaseCluster; 027import org.apache.hadoop.hbase.StartTestingClusterOption; 028import org.apache.hadoop.hbase.TableName; 029import org.apache.hadoop.hbase.Waiter; 030import org.apache.hadoop.hbase.client.Admin; 031import org.apache.hadoop.hbase.client.RegionInfo; 032import org.apache.hadoop.hbase.master.HMaster; 033import org.apache.hadoop.hbase.master.ServerManager; 034import org.apache.hadoop.hbase.regionserver.HRegionServer; 035import org.apache.hadoop.hbase.testclassification.MediumTests; 036import org.apache.hadoop.hbase.testclassification.RSGroupTests; 037import org.apache.hadoop.hbase.util.Bytes; 038import org.junit.AfterClass; 039import org.junit.BeforeClass; 040import org.junit.ClassRule; 041import org.junit.Rule; 042import org.junit.Test; 043import org.junit.experimental.categories.Category; 044import org.junit.rules.TestName; 045import org.slf4j.Logger; 046import org.slf4j.LoggerFactory; 047 048import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 049 050/** 051 * This tests that GroupBasedBalancer will use data in zk to do balancing during master startup. 052 * This does not test retain assignment. 053 * <p/> 054 * The tests brings up 3 RS, creates a new RS group 'my_group', moves 1 RS to 'my_group', assigns 055 * 'hbase:rsgroup' to 'my_group', and kill the only server in that group so that 'hbase:rsgroup' 056 * table isn't available. It then kills the active master and waits for backup master to come 057 * online. In new master, RSGroupInfoManagerImpl gets the data from zk and waits for the expected 058 * assignment with a timeout. 059 */ 060@Category({ RSGroupTests.class, MediumTests.class }) 061public class TestRSGroupsOfflineMode extends TestRSGroupsBase { 062 063 @ClassRule 064 public static final HBaseClassTestRule CLASS_RULE = 065 HBaseClassTestRule.forClass(TestRSGroupsOfflineMode.class); 066 067 private static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsOfflineMode.class); 068 private static HMaster master; 069 private static Admin hbaseAdmin; 070 private static HBaseTestingUtil TEST_UTIL; 071 private static HBaseClusterInterface cluster; 072 private final static long WAIT_TIMEOUT = 60000 * 5; 073 074 @Rule 075 public TestName name = new TestName(); 076 077 @BeforeClass 078 public static void setUp() throws Exception { 079 TEST_UTIL = new HBaseTestingUtil(); 080 RSGroupUtil.enableRSGroup(TEST_UTIL.getConfiguration()); 081 TEST_UTIL.getConfiguration().set(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, "1"); 082 StartTestingClusterOption option = 083 StartTestingClusterOption.builder().numMasters(2).numRegionServers(3).numDataNodes(3).build(); 084 TEST_UTIL.startMiniCluster(option); 085 cluster = TEST_UTIL.getHBaseCluster(); 086 master = ((SingleProcessHBaseCluster) cluster).getMaster(); 087 master.balanceSwitch(false); 088 hbaseAdmin = TEST_UTIL.getAdmin(); 089 // wait till the balancer is in online mode 090 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 091 @Override 092 public boolean evaluate() throws Exception { 093 return master.isInitialized() 094 && ((RSGroupBasedLoadBalancer) master.getLoadBalancer()).isOnline() 095 && master.getServerManager().getOnlineServersList().size() >= 3; 096 } 097 }); 098 } 099 100 @AfterClass 101 public static void tearDown() throws Exception { 102 TEST_UTIL.shutdownMiniCluster(); 103 } 104 105 @Test 106 public void testOffline() throws Exception, InterruptedException { 107 // Table should be after group table name so it gets assigned later. 108 final TableName failoverTable = TableName.valueOf(getNameWithoutIndex(name.getMethodName())); 109 TEST_UTIL.createTable(failoverTable, Bytes.toBytes("f")); 110 final HRegionServer killRS = ((SingleProcessHBaseCluster) cluster).getRegionServer(0); 111 final HRegionServer groupRS = ((SingleProcessHBaseCluster) cluster).getRegionServer(1); 112 final HRegionServer failoverRS = ((SingleProcessHBaseCluster) cluster).getRegionServer(2); 113 String newGroup = "my_group"; 114 Admin admin = TEST_UTIL.getAdmin(); 115 admin.addRSGroup(newGroup); 116 if ( 117 master.getAssignmentManager().getRegionStates().getRegionAssignments() 118 .containsValue(failoverRS.getServerName()) 119 ) { 120 for (RegionInfo regionInfo : hbaseAdmin.getRegions(failoverRS.getServerName())) { 121 hbaseAdmin.move(regionInfo.getEncodedNameAsBytes(), failoverRS.getServerName()); 122 } 123 LOG.info("Waiting for region unassignments on failover RS..."); 124 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 125 @Override 126 public boolean evaluate() throws Exception { 127 return !master.getServerManager().getLoad(failoverRS.getServerName()).getRegionMetrics() 128 .isEmpty(); 129 } 130 }); 131 } 132 133 // Move server to group and make sure all tables are assigned. 134 admin.moveServersToRSGroup(Sets.newHashSet(groupRS.getServerName().getAddress()), newGroup); 135 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 136 @Override 137 public boolean evaluate() throws Exception { 138 return groupRS.getNumberOfOnlineRegions() < 1 139 && master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount() < 1; 140 } 141 }); 142 // Move table to group and wait. 143 admin.setRSGroup(Sets.newHashSet(RSGroupInfoManagerImpl.RSGROUP_TABLE_NAME), newGroup); 144 LOG.info("Waiting for move table..."); 145 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 146 @Override 147 public boolean evaluate() throws Exception { 148 return groupRS.getNumberOfOnlineRegions() == 1; 149 } 150 }); 151 152 groupRS.stop("die"); 153 // Race condition here. 154 TEST_UTIL.getHBaseCluster().getMaster().stopMaster(); 155 LOG.info("Waiting for offline mode..."); 156 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 157 @Override 158 public boolean evaluate() throws Exception { 159 return TEST_UTIL.getHBaseCluster().getMaster() != null 160 && TEST_UTIL.getHBaseCluster().getMaster().isActiveMaster() 161 && TEST_UTIL.getHBaseCluster().getMaster().isInitialized() 162 && TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServers().size() 163 <= 3; 164 } 165 }); 166 167 // Get groupInfoManager from the new active master. 168 RSGroupInfoManager groupMgr = 169 ((SingleProcessHBaseCluster) cluster).getMaster().getRSGroupInfoManager(); 170 // Make sure balancer is in offline mode, since this is what we're testing. 171 assertFalse(groupMgr.isOnline()); 172 // Kill final regionserver to see the failover happens for all tables except GROUP table since 173 // it's group does not have any online RS. 174 killRS.stop("die"); 175 master = TEST_UTIL.getHBaseCluster().getMaster(); 176 LOG.info("Waiting for new table assignment..."); 177 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 178 @Override 179 public boolean evaluate() throws Exception { 180 return failoverRS.getRegions(failoverTable).size() >= 1; 181 } 182 }); 183 assertEquals(0, failoverRS.getRegions(RSGroupInfoManagerImpl.RSGROUP_TABLE_NAME).size()); 184 185 // Need this for minicluster to shutdown cleanly. 186 master.stopMaster(); 187 } 188}