001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.rsgroup; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022 023import org.apache.hadoop.hbase.HBaseClassTestRule; 024import org.apache.hadoop.hbase.HBaseCluster; 025import org.apache.hadoop.hbase.HBaseTestingUtility; 026import org.apache.hadoop.hbase.HConstants; 027import org.apache.hadoop.hbase.MiniHBaseCluster; 028import org.apache.hadoop.hbase.StartMiniClusterOption; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.Waiter; 031import org.apache.hadoop.hbase.client.Admin; 032import org.apache.hadoop.hbase.client.RegionInfo; 033import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; 034import org.apache.hadoop.hbase.master.HMaster; 035import org.apache.hadoop.hbase.master.ServerManager; 036import org.apache.hadoop.hbase.regionserver.HRegionServer; 037import org.apache.hadoop.hbase.testclassification.MediumTests; 038import org.apache.hadoop.hbase.util.Bytes; 039import org.junit.AfterClass; 040import org.junit.Assert; 041import org.junit.BeforeClass; 042import org.junit.ClassRule; 043import org.junit.Rule; 044import org.junit.Test; 045import org.junit.experimental.categories.Category; 046import org.junit.rules.TestName; 047import org.slf4j.Logger; 048import org.slf4j.LoggerFactory; 049 050import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 051 052// This tests that GroupBasedBalancer will use data in zk to do balancing during master startup. 053// This does not test retain assignment. 054// The tests brings up 3 RS, creates a new RS group 'my_group', moves 1 RS to 'my_group', assigns 055// 'hbase:rsgroup' to 'my_group', and kill the only server in that group so that 'hbase:rsgroup' 056// table isn't available. It then kills the active master and waits for backup master to come 057// online. In new master, RSGroupInfoManagerImpl gets the data from zk and waits for the expected 058// assignment with a timeout. 059@Category(MediumTests.class) 060public class TestRSGroupsOfflineMode { 061 062 @ClassRule 063 public static final HBaseClassTestRule CLASS_RULE = 064 HBaseClassTestRule.forClass(TestRSGroupsOfflineMode.class); 065 066 private static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsOfflineMode.class); 067 private static HMaster master; 068 private static Admin hbaseAdmin; 069 private static HBaseTestingUtility TEST_UTIL; 070 private static HBaseCluster cluster; 071 private final static long WAIT_TIMEOUT = 60000 * 5; 072 073 @Rule 074 public TestName name = new TestName(); 075 076 @BeforeClass 077 public static void setUp() throws Exception { 078 TEST_UTIL = new HBaseTestingUtility(); 079 TEST_UTIL.getConfiguration().set(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, 080 RSGroupBasedLoadBalancer.class.getName()); 081 TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, 082 RSGroupAdminEndpoint.class.getName()); 083 TEST_UTIL.getConfiguration().set(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, "1"); 084 StartMiniClusterOption option = 085 StartMiniClusterOption.builder().numMasters(2).numRegionServers(3).numDataNodes(3).build(); 086 TEST_UTIL.startMiniCluster(option); 087 cluster = TEST_UTIL.getHBaseCluster(); 088 master = ((MiniHBaseCluster) cluster).getMaster(); 089 master.balanceSwitch(false); 090 hbaseAdmin = TEST_UTIL.getAdmin(); 091 // wait till the balancer is in online mode 092 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 093 @Override 094 public boolean evaluate() throws Exception { 095 return master.isInitialized() 096 && ((RSGroupBasedLoadBalancer) master.getLoadBalancer()).isOnline() 097 && master.getServerManager().getOnlineServersList().size() >= 3; 098 } 099 }); 100 } 101 102 @AfterClass 103 public static void tearDown() throws Exception { 104 TEST_UTIL.shutdownMiniCluster(); 105 } 106 107 @Test 108 public void testOffline() throws Exception, InterruptedException { 109 // Table should be after group table name so it gets assigned later. 110 final TableName failoverTable = TableName.valueOf(name.getMethodName()); 111 TEST_UTIL.createTable(failoverTable, Bytes.toBytes("f")); 112 final HRegionServer killRS = ((MiniHBaseCluster) cluster).getRegionServer(0); 113 final HRegionServer groupRS = ((MiniHBaseCluster) cluster).getRegionServer(1); 114 final HRegionServer failoverRS = ((MiniHBaseCluster) cluster).getRegionServer(2); 115 String newGroup = "my_group"; 116 RSGroupAdmin groupAdmin = new RSGroupAdminClient(TEST_UTIL.getConnection()); 117 groupAdmin.addRSGroup(newGroup); 118 if ( 119 master.getAssignmentManager().getRegionStates().getRegionAssignments() 120 .containsValue(failoverRS.getServerName()) 121 ) { 122 for (RegionInfo regionInfo : hbaseAdmin.getRegions(failoverRS.getServerName())) { 123 hbaseAdmin.move(regionInfo.getEncodedNameAsBytes(), failoverRS.getServerName()); 124 } 125 LOG.info("Waiting for region unassignments on failover RS..."); 126 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 127 @Override 128 public boolean evaluate() throws Exception { 129 return !master.getServerManager().getLoad(failoverRS.getServerName()).getRegionMetrics() 130 .isEmpty(); 131 } 132 }); 133 } 134 135 // Move server to group and make sure all tables are assigned. 136 groupAdmin.moveServers(Sets.newHashSet(groupRS.getServerName().getAddress()), newGroup); 137 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 138 @Override 139 public boolean evaluate() throws Exception { 140 return groupRS.getNumberOfOnlineRegions() < 1 141 && master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount() < 1; 142 } 143 }); 144 // Move table to group and wait. 145 groupAdmin.moveTables(Sets.newHashSet(RSGroupInfoManager.RSGROUP_TABLE_NAME), newGroup); 146 LOG.info("Waiting for move table..."); 147 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 148 @Override 149 public boolean evaluate() throws Exception { 150 return groupRS.getNumberOfOnlineRegions() == 1; 151 } 152 }); 153 154 groupRS.stop("die"); 155 // Race condition here. 156 TEST_UTIL.getHBaseCluster().getMaster().stopMaster(); 157 LOG.info("Waiting for offline mode..."); 158 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 159 @Override 160 public boolean evaluate() throws Exception { 161 return TEST_UTIL.getHBaseCluster().getMaster() != null 162 && TEST_UTIL.getHBaseCluster().getMaster().isActiveMaster() 163 && TEST_UTIL.getHBaseCluster().getMaster().isInitialized() 164 && TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServers().size() 165 <= 3; 166 } 167 }); 168 169 // Get groupInfoManager from the new active master. 170 RSGroupInfoManager groupMgr = ((MiniHBaseCluster) cluster).getMaster() 171 .getMasterCoprocessorHost().findCoprocessor(RSGroupAdminEndpoint.class).getGroupInfoManager(); 172 // Make sure balancer is in offline mode, since this is what we're testing. 173 assertFalse(groupMgr.isOnline()); 174 // Verify the group affiliation that's loaded from ZK instead of tables. 175 assertEquals(newGroup, groupMgr.getRSGroupOfTable(RSGroupInfoManager.RSGROUP_TABLE_NAME)); 176 assertEquals(RSGroupInfo.DEFAULT_GROUP, groupMgr.getRSGroupOfTable(failoverTable)); 177 // Kill final regionserver to see the failover happens for all tables except GROUP table since 178 // it's group does not have any online RS. 179 killRS.stop("die"); 180 master = TEST_UTIL.getHBaseCluster().getMaster(); 181 LOG.info("Waiting for new table assignment..."); 182 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 183 @Override 184 public boolean evaluate() throws Exception { 185 return failoverRS.getRegions(failoverTable).size() >= 1; 186 } 187 }); 188 Assert.assertEquals(0, failoverRS.getRegions(RSGroupInfoManager.RSGROUP_TABLE_NAME).size()); 189 190 // Need this for minicluster to shutdown cleanly. 191 master.stopMaster(); 192 } 193}