001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.rsgroup; 019 020import static org.apache.hadoop.hbase.util.Threads.sleep; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertTrue; 023 024import java.lang.reflect.Field; 025import java.util.ArrayList; 026import java.util.HashSet; 027import java.util.List; 028import java.util.Map; 029import java.util.Set; 030import org.apache.hadoop.hbase.HBaseClassTestRule; 031import org.apache.hadoop.hbase.HConstants; 032import org.apache.hadoop.hbase.NamespaceDescriptor; 033import org.apache.hadoop.hbase.ServerName; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.Version; 036import org.apache.hadoop.hbase.Waiter; 037import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 038import org.apache.hadoop.hbase.client.RegionInfo; 039import org.apache.hadoop.hbase.client.Table; 040import org.apache.hadoop.hbase.client.TableDescriptor; 041import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 042import org.apache.hadoop.hbase.ipc.MetaRWQueueRpcExecutor; 043import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; 044import org.apache.hadoop.hbase.net.Address; 045import org.apache.hadoop.hbase.testclassification.MediumTests; 046import org.apache.hadoop.hbase.testclassification.RSGroupTests; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.JVMClusterUtil; 049import org.apache.hadoop.hbase.util.VersionInfo; 050import org.junit.After; 051import org.junit.AfterClass; 052import org.junit.Before; 053import org.junit.BeforeClass; 054import org.junit.ClassRule; 055import org.junit.Test; 056import org.junit.experimental.categories.Category; 057import org.slf4j.Logger; 058import org.slf4j.LoggerFactory; 059 060import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 061 062@Category({ RSGroupTests.class, MediumTests.class }) 063public class TestRSGroupsKillRS extends TestRSGroupsBase { 064 065 @ClassRule 066 public static final HBaseClassTestRule CLASS_RULE = 067 HBaseClassTestRule.forClass(TestRSGroupsKillRS.class); 068 069 private static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsKillRS.class); 070 071 @BeforeClass 072 public static void setUp() throws Exception { 073 // avoid all the handlers blocked when meta is offline, and regionServerReport can not be 074 // processed which causes dead lock. 075 TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10); 076 TEST_UTIL.getConfiguration() 077 .setFloat(MetaRWQueueRpcExecutor.META_CALL_QUEUE_READ_SHARE_CONF_KEY, 0.5f); 078 setUpTestBeforeClass(); 079 } 080 081 @AfterClass 082 public static void tearDown() throws Exception { 083 tearDownAfterClass(); 084 } 085 086 @Before 087 public void beforeMethod() throws Exception { 088 setUpBeforeMethod(); 089 } 090 091 @After 092 public void afterMethod() throws Exception { 093 tearDownAfterMethod(); 094 } 095 096 @Test 097 public void testKillRS() throws Exception { 098 RSGroupInfo appInfo = addGroup("appInfo", 1); 099 final TableName tableName = 100 TableName.valueOf(TABLE_PREFIX + "_ns", getNameWithoutIndex(name.getMethodName())); 101 ADMIN.createNamespace(NamespaceDescriptor.create(tableName.getNamespaceAsString()) 102 .addConfiguration(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP, appInfo.getName()).build()); 103 final TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName) 104 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")).build(); 105 ADMIN.createTable(desc); 106 // wait for created table to be assigned 107 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 108 @Override 109 public boolean evaluate() throws Exception { 110 return getTableRegionMap().get(desc.getTableName()) != null; 111 } 112 }); 113 114 ServerName targetServer = getServerName(appInfo.getServers().iterator().next()); 115 assertEquals(1, ADMIN.getRegions(targetServer).size()); 116 117 try { 118 // stopping may cause an exception 119 // due to the connection loss 120 ADMIN.stopRegionServer(targetServer.getAddress().toString()); 121 } catch (Exception e) { 122 } 123 // wait until the server is actually down 124 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 125 @Override 126 public boolean evaluate() throws Exception { 127 return !CLUSTER.getClusterMetrics().getLiveServerMetrics().containsKey(targetServer); 128 } 129 }); 130 // there is only one rs in the group and we killed it, so the region can not be online, until 131 // later we add new servers to it. 132 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 133 @Override 134 public boolean evaluate() throws Exception { 135 return !CLUSTER.getClusterMetrics().getRegionStatesInTransition().isEmpty(); 136 } 137 }); 138 Set<Address> newServers = Sets.newHashSet(); 139 newServers.add(ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().iterator().next()); 140 ADMIN.moveServersToRSGroup(newServers, appInfo.getName()); 141 142 // Make sure all the table's regions get reassigned 143 // disabling the table guarantees no conflicting assign/unassign (ie SSH) happens 144 ADMIN.disableTable(tableName); 145 ADMIN.enableTable(tableName); 146 147 // wait for region to be assigned 148 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 149 @Override 150 public boolean evaluate() throws Exception { 151 return CLUSTER.getClusterMetrics().getRegionStatesInTransition().isEmpty(); 152 } 153 }); 154 155 ServerName targetServer1 = getServerName(newServers.iterator().next()); 156 assertEquals(1, ADMIN.getRegions(targetServer1).size()); 157 assertEquals(tableName, ADMIN.getRegions(targetServer1).get(0).getTable()); 158 } 159 160 @Test 161 public void testKillAllRSInGroup() throws Exception { 162 // create a rsgroup and move two regionservers to it 163 String groupName = "my_group"; 164 int groupRSCount = 2; 165 addGroup(groupName, groupRSCount); 166 167 // create a table, and move it to my_group 168 Table t = TEST_UTIL.createMultiRegionTable(tableName, Bytes.toBytes("f"), 5); 169 TEST_UTIL.loadTable(t, Bytes.toBytes("f")); 170 Set<TableName> toAddTables = new HashSet<>(); 171 toAddTables.add(tableName); 172 ADMIN.setRSGroup(toAddTables, groupName); 173 assertTrue( 174 ADMIN.getConfiguredNamespacesAndTablesInRSGroup(groupName).getSecond().contains(tableName)); 175 TEST_UTIL.waitTableAvailable(tableName, 30000); 176 177 // check my_group servers and table regions 178 Set<Address> servers = ADMIN.getRSGroup(groupName).getServers(); 179 assertEquals(2, servers.size()); 180 LOG.debug("group servers {}", servers); 181 for (RegionInfo tr : MASTER.getAssignmentManager().getRegionStates() 182 .getRegionsOfTable(tableName)) { 183 assertTrue(servers.contains(MASTER.getAssignmentManager().getRegionStates() 184 .getRegionAssignments().get(tr).getAddress())); 185 } 186 187 // Move a region, to ensure there exists a region whose 'lastHost' is in my_group 188 // ('lastHost' of other regions are in 'default' group) 189 // and check if all table regions are online 190 List<ServerName> gsn = new ArrayList<>(); 191 for (Address addr : servers) { 192 gsn.add(getServerName(addr)); 193 } 194 assertEquals(2, gsn.size()); 195 for (Map.Entry<RegionInfo, ServerName> entry : MASTER.getAssignmentManager().getRegionStates() 196 .getRegionAssignments().entrySet()) { 197 if (entry.getKey().getTable().equals(tableName)) { 198 LOG.debug("move region {} from {} to {}", entry.getKey().getRegionNameAsString(), 199 entry.getValue(), gsn.get(1 - gsn.indexOf(entry.getValue()))); 200 TEST_UTIL.moveRegionAndWait(entry.getKey(), gsn.get(1 - gsn.indexOf(entry.getValue()))); 201 break; 202 } 203 } 204 TEST_UTIL.waitTableAvailable(tableName, 30000); 205 206 // case 1: stop all the regionservers in my_group, and restart a regionserver in my_group, 207 // and then check if all table regions are online 208 for (Address addr : ADMIN.getRSGroup(groupName).getServers()) { 209 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr)); 210 } 211 // better wait for a while for region reassign 212 sleep(10000); 213 assertEquals(NUM_SLAVES_BASE - gsn.size(), 214 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 215 TEST_UTIL.getMiniHBaseCluster().startRegionServer(gsn.get(0).getHostname(), 216 gsn.get(0).getPort()); 217 assertEquals(NUM_SLAVES_BASE - gsn.size() + 1, 218 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 219 TEST_UTIL.waitTableAvailable(tableName, 30000); 220 221 // case 2: stop all the regionservers in my_group, and move another 222 // regionserver(from the 'default' group) to my_group, 223 // and then check if all table regions are online 224 for (JVMClusterUtil.RegionServerThread rst : TEST_UTIL.getMiniHBaseCluster() 225 .getLiveRegionServerThreads()) { 226 if (rst.getRegionServer().getServerName().getAddress().equals(gsn.get(0).getAddress())) { 227 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(rst.getRegionServer().getServerName()); 228 break; 229 } 230 } 231 sleep(10000); 232 assertEquals(NUM_SLAVES_BASE - gsn.size(), 233 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 234 ServerName newServer = MASTER.getServerManager().getOnlineServersList().get(0); 235 ADMIN.moveServersToRSGroup(Sets.newHashSet(newServer.getAddress()), groupName); 236 // wait and check if table regions are online 237 TEST_UTIL.waitTableAvailable(tableName, 30000); 238 } 239 240 @Test 241 public void testLowerMetaGroupVersion() throws Exception { 242 // create a rsgroup and move one regionserver to it 243 String groupName = "meta_group"; 244 int groupRSCount = 1; 245 addGroup(groupName, groupRSCount); 246 247 // move hbase:meta to meta_group 248 Set<TableName> toAddTables = new HashSet<>(); 249 toAddTables.add(TableName.META_TABLE_NAME); 250 ADMIN.setRSGroup(toAddTables, groupName); 251 assertTrue(ADMIN.getConfiguredNamespacesAndTablesInRSGroup(groupName).getSecond() 252 .contains(TableName.META_TABLE_NAME)); 253 254 // restart the regionserver in meta_group, and lower its version 255 String originVersion = ""; 256 Set<Address> servers = new HashSet<>(); 257 for (Address addr : ADMIN.getRSGroup(groupName).getServers()) { 258 servers.add(addr); 259 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr)); 260 originVersion = MASTER.getRegionServerVersion(getServerName(addr)); 261 } 262 // better wait for a while for region reassign 263 sleep(10000); 264 assertEquals(NUM_SLAVES_BASE - groupRSCount, 265 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 266 Address address = servers.iterator().next(); 267 int majorVersion = VersionInfo.getMajorVersion(originVersion); 268 assertTrue(majorVersion >= 1); 269 String lowerVersion = 270 String.valueOf(majorVersion - 1) + originVersion.substring(originVersion.indexOf(".")); 271 try { 272 setVersionInfoVersion(lowerVersion); 273 TEST_UTIL.getMiniHBaseCluster().startRegionServer(address.getHostName(), address.getPort()); 274 assertEquals(NUM_SLAVES_BASE, 275 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 276 assertTrue(VersionInfo.compareVersion(originVersion, 277 MASTER.getRegionServerVersion(getServerName(servers.iterator().next()))) > 0); 278 LOG.debug("wait for META assigned..."); 279 // SCP finished, which means all regions assigned too. 280 TEST_UTIL.waitFor(60000, () -> !TEST_UTIL.getHBaseCluster().getMaster().getProcedures() 281 .stream().filter(p -> (p instanceof ServerCrashProcedure)).findAny().isPresent()); 282 } finally { 283 setVersionInfoVersion(Version.version); 284 } 285 } 286 287 private static void setVersionInfoVersion(String newValue) throws Exception { 288 Field f = VersionInfo.class.getDeclaredField("version"); 289 f.setAccessible(true); 290 f.set(null, newValue); 291 } 292}