001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.rsgroup; 019 020import static org.apache.hadoop.hbase.util.Threads.sleep; 021import static org.junit.jupiter.api.Assertions.assertEquals; 022import static org.junit.jupiter.api.Assertions.assertTrue; 023 024import java.lang.reflect.Field; 025import java.util.ArrayList; 026import java.util.HashSet; 027import java.util.List; 028import java.util.Map; 029import java.util.Set; 030import org.apache.hadoop.hbase.HConstants; 031import org.apache.hadoop.hbase.NamespaceDescriptor; 032import org.apache.hadoop.hbase.ServerName; 033import org.apache.hadoop.hbase.TableName; 034import org.apache.hadoop.hbase.Version; 035import org.apache.hadoop.hbase.Waiter; 036import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 037import org.apache.hadoop.hbase.client.RegionInfo; 038import org.apache.hadoop.hbase.client.Table; 039import org.apache.hadoop.hbase.client.TableDescriptor; 040import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 041import org.apache.hadoop.hbase.ipc.MetaRWQueueRpcExecutor; 042import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; 043import org.apache.hadoop.hbase.net.Address; 044import org.apache.hadoop.hbase.testclassification.LargeTests; 045import org.apache.hadoop.hbase.testclassification.RSGroupTests; 046import org.apache.hadoop.hbase.util.Bytes; 047import org.apache.hadoop.hbase.util.JVMClusterUtil; 048import org.apache.hadoop.hbase.util.VersionInfo; 049import org.junit.jupiter.api.AfterAll; 050import org.junit.jupiter.api.AfterEach; 051import org.junit.jupiter.api.BeforeAll; 052import org.junit.jupiter.api.BeforeEach; 053import org.junit.jupiter.api.Tag; 054import org.junit.jupiter.api.Test; 055import org.junit.jupiter.api.TestInfo; 056import org.slf4j.Logger; 057import org.slf4j.LoggerFactory; 058 059import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 060 061@Tag(RSGroupTests.TAG) 062@Tag(LargeTests.TAG) 063public class TestRSGroupsKillRS extends TestRSGroupsBase { 064 065 private static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsKillRS.class); 066 067 @BeforeAll 068 public static void setUp() throws Exception { 069 // avoid all the handlers blocked when meta is offline, and regionServerReport can not be 070 // processed which causes dead lock. 071 TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10); 072 TEST_UTIL.getConfiguration() 073 .setFloat(MetaRWQueueRpcExecutor.META_CALL_QUEUE_READ_SHARE_CONF_KEY, 0.5f); 074 setUpTestBeforeClass(); 075 } 076 077 @AfterAll 078 public static void tearDown() throws Exception { 079 tearDownAfterClass(); 080 } 081 082 @BeforeEach 083 public void beforeMethod(TestInfo testInfo) throws Exception { 084 setUpBeforeMethod(testInfo); 085 } 086 087 @AfterEach 088 public void afterMethod() throws Exception { 089 tearDownAfterMethod(); 090 } 091 092 @Test 093 public void testKillRS() throws Exception { 094 RSGroupInfo appInfo = addGroup("appInfo", 1); 095 final TableName tableName = 096 TableName.valueOf(TABLE_PREFIX + "_ns", getNameWithoutIndex(name.getMethodName())); 097 ADMIN.createNamespace(NamespaceDescriptor.create(tableName.getNamespaceAsString()) 098 .addConfiguration(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP, appInfo.getName()).build()); 099 final TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName) 100 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")).build(); 101 ADMIN.createTable(desc); 102 // wait for created table to be assigned 103 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 104 @Override 105 public boolean evaluate() throws Exception { 106 return getTableRegionMap().get(desc.getTableName()) != null; 107 } 108 }); 109 110 ServerName targetServer = getServerName(appInfo.getServers().iterator().next()); 111 assertEquals(1, ADMIN.getRegions(targetServer).size()); 112 113 try { 114 // stopping may cause an exception 115 // due to the connection loss 116 ADMIN.stopRegionServer(targetServer.getAddress().toString()); 117 } catch (Exception e) { 118 } 119 // wait until the server is actually down 120 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 121 @Override 122 public boolean evaluate() throws Exception { 123 return !CLUSTER.getClusterMetrics().getLiveServerMetrics().containsKey(targetServer); 124 } 125 }); 126 // there is only one rs in the group and we killed it, so the region can not be online, until 127 // later we add new servers to it. 128 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 129 @Override 130 public boolean evaluate() throws Exception { 131 return !CLUSTER.getClusterMetrics().getRegionStatesInTransition().isEmpty(); 132 } 133 }); 134 Set<Address> newServers = Sets.newHashSet(); 135 newServers.add(ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().iterator().next()); 136 ADMIN.moveServersToRSGroup(newServers, appInfo.getName()); 137 138 // Make sure all the table's regions get reassigned 139 // disabling the table guarantees no conflicting assign/unassign (ie SSH) happens 140 ADMIN.disableTable(tableName); 141 ADMIN.enableTable(tableName); 142 143 // wait for region to be assigned 144 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 145 @Override 146 public boolean evaluate() throws Exception { 147 return CLUSTER.getClusterMetrics().getRegionStatesInTransition().isEmpty(); 148 } 149 }); 150 151 ServerName targetServer1 = getServerName(newServers.iterator().next()); 152 assertEquals(1, ADMIN.getRegions(targetServer1).size()); 153 assertEquals(tableName, ADMIN.getRegions(targetServer1).get(0).getTable()); 154 } 155 156 @Test 157 public void testKillAllRSInGroup() throws Exception { 158 // create a rsgroup and move two regionservers to it 159 String groupName = "my_group"; 160 int groupRSCount = 2; 161 addGroup(groupName, groupRSCount); 162 163 // create a table, and move it to my_group 164 Table t = TEST_UTIL.createMultiRegionTable(tableName, Bytes.toBytes("f"), 5); 165 TEST_UTIL.loadTable(t, Bytes.toBytes("f")); 166 Set<TableName> toAddTables = new HashSet<>(); 167 toAddTables.add(tableName); 168 ADMIN.setRSGroup(toAddTables, groupName); 169 assertTrue( 170 ADMIN.getConfiguredNamespacesAndTablesInRSGroup(groupName).getSecond().contains(tableName)); 171 TEST_UTIL.waitTableAvailable(tableName, 30000); 172 173 // check my_group servers and table regions 174 Set<Address> servers = ADMIN.getRSGroup(groupName).getServers(); 175 assertEquals(2, servers.size()); 176 LOG.debug("group servers {}", servers); 177 for (RegionInfo tr : MASTER.getAssignmentManager().getRegionStates() 178 .getRegionsOfTable(tableName)) { 179 assertTrue(servers.contains(MASTER.getAssignmentManager().getRegionStates() 180 .getRegionAssignments().get(tr).getAddress())); 181 } 182 183 // Move a region, to ensure there exists a region whose 'lastHost' is in my_group 184 // ('lastHost' of other regions are in 'default' group) 185 // and check if all table regions are online 186 List<ServerName> gsn = new ArrayList<>(); 187 for (Address addr : servers) { 188 gsn.add(getServerName(addr)); 189 } 190 assertEquals(2, gsn.size()); 191 for (Map.Entry<RegionInfo, ServerName> entry : MASTER.getAssignmentManager().getRegionStates() 192 .getRegionAssignments().entrySet()) { 193 if (entry.getKey().getTable().equals(tableName)) { 194 LOG.debug("move region {} from {} to {}", entry.getKey().getRegionNameAsString(), 195 entry.getValue(), gsn.get(1 - gsn.indexOf(entry.getValue()))); 196 TEST_UTIL.moveRegionAndWait(entry.getKey(), gsn.get(1 - gsn.indexOf(entry.getValue()))); 197 break; 198 } 199 } 200 TEST_UTIL.waitTableAvailable(tableName, 30000); 201 202 // case 1: stop all the regionservers in my_group, and restart a regionserver in my_group, 203 // and then check if all table regions are online 204 for (Address addr : ADMIN.getRSGroup(groupName).getServers()) { 205 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr)); 206 } 207 // better wait for a while for region reassign 208 sleep(10000); 209 assertEquals(NUM_SLAVES_BASE - gsn.size(), 210 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 211 TEST_UTIL.getMiniHBaseCluster().startRegionServer(gsn.get(0).getHostname(), 212 gsn.get(0).getPort()); 213 assertEquals(NUM_SLAVES_BASE - gsn.size() + 1, 214 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 215 TEST_UTIL.waitTableAvailable(tableName, 30000); 216 217 // case 2: stop all the regionservers in my_group, and move another 218 // regionserver(from the 'default' group) to my_group, 219 // and then check if all table regions are online 220 for (JVMClusterUtil.RegionServerThread rst : TEST_UTIL.getMiniHBaseCluster() 221 .getLiveRegionServerThreads()) { 222 if (rst.getRegionServer().getServerName().getAddress().equals(gsn.get(0).getAddress())) { 223 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(rst.getRegionServer().getServerName()); 224 break; 225 } 226 } 227 sleep(10000); 228 assertEquals(NUM_SLAVES_BASE - gsn.size(), 229 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 230 ServerName newServer = MASTER.getServerManager().getOnlineServersList().get(0); 231 ADMIN.moveServersToRSGroup(Sets.newHashSet(newServer.getAddress()), groupName); 232 // wait and check if table regions are online 233 TEST_UTIL.waitTableAvailable(tableName, 30000); 234 } 235 236 @Test 237 public void testLowerMetaGroupVersion() throws Exception { 238 // create a rsgroup and move one regionserver to it 239 String groupName = "meta_group"; 240 int groupRSCount = 1; 241 addGroup(groupName, groupRSCount); 242 243 // move hbase:meta to meta_group 244 Set<TableName> toAddTables = new HashSet<>(); 245 toAddTables.add(TableName.META_TABLE_NAME); 246 ADMIN.setRSGroup(toAddTables, groupName); 247 assertTrue(ADMIN.getConfiguredNamespacesAndTablesInRSGroup(groupName).getSecond() 248 .contains(TableName.META_TABLE_NAME)); 249 250 // restart the regionserver in meta_group, and lower its version 251 String originVersion = ""; 252 Set<Address> servers = new HashSet<>(); 253 for (Address addr : ADMIN.getRSGroup(groupName).getServers()) { 254 servers.add(addr); 255 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr)); 256 originVersion = MASTER.getRegionServerVersion(getServerName(addr)); 257 } 258 // better wait for a while for region reassign 259 sleep(10000); 260 assertEquals(NUM_SLAVES_BASE - groupRSCount, 261 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 262 Address address = servers.iterator().next(); 263 int majorVersion = VersionInfo.getMajorVersion(originVersion); 264 assertTrue(majorVersion >= 1); 265 String lowerVersion = 266 String.valueOf(majorVersion - 1) + originVersion.substring(originVersion.indexOf(".")); 267 try { 268 setVersionInfoVersion(lowerVersion); 269 TEST_UTIL.getMiniHBaseCluster().startRegionServer(address.getHostName(), address.getPort()); 270 assertEquals(NUM_SLAVES_BASE, 271 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 272 assertTrue(VersionInfo.compareVersion(originVersion, 273 MASTER.getRegionServerVersion(getServerName(servers.iterator().next()))) > 0); 274 LOG.debug("wait for META assigned..."); 275 // SCP finished, which means all regions assigned too. 276 TEST_UTIL.waitFor(60000, () -> !TEST_UTIL.getHBaseCluster().getMaster().getProcedures() 277 .stream().filter(p -> (p instanceof ServerCrashProcedure)).findAny().isPresent()); 278 } finally { 279 setVersionInfoVersion(Version.version); 280 } 281 } 282 283 private static void setVersionInfoVersion(String newValue) throws Exception { 284 Field f = VersionInfo.class.getDeclaredField("version"); 285 f.setAccessible(true); 286 f.set(null, newValue); 287 } 288}