001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.rsgroup; 019 020import static org.apache.hadoop.hbase.util.Threads.sleep; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertTrue; 023 024import java.lang.reflect.Field; 025import java.lang.reflect.Modifier; 026import java.util.ArrayList; 027import java.util.HashSet; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.HConstants; 033import org.apache.hadoop.hbase.NamespaceDescriptor; 034import org.apache.hadoop.hbase.ServerName; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.Version; 037import org.apache.hadoop.hbase.Waiter; 038import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 039import org.apache.hadoop.hbase.client.RegionInfo; 040import org.apache.hadoop.hbase.client.Table; 041import org.apache.hadoop.hbase.client.TableDescriptor; 042import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 043import org.apache.hadoop.hbase.ipc.MetaRWQueueRpcExecutor; 044import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; 045import org.apache.hadoop.hbase.net.Address; 046import org.apache.hadoop.hbase.testclassification.LargeTests; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.JVMClusterUtil; 049import org.apache.hadoop.hbase.util.VersionInfo; 050import org.junit.After; 051import org.junit.AfterClass; 052import org.junit.Before; 053import org.junit.BeforeClass; 054import org.junit.ClassRule; 055import org.junit.Test; 056import org.junit.experimental.categories.Category; 057import org.slf4j.Logger; 058import org.slf4j.LoggerFactory; 059 060import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 061 062@Category({ LargeTests.class }) 063public class TestRSGroupsKillRS extends TestRSGroupsBase { 064 065 @ClassRule 066 public static final HBaseClassTestRule CLASS_RULE = 067 HBaseClassTestRule.forClass(TestRSGroupsKillRS.class); 068 069 protected static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsKillRS.class); 070 071 @BeforeClass 072 public static void setUp() throws Exception { 073 // avoid all the handlers blocked when meta is offline, and regionServerReport can not be 074 // processed which causes dead lock. 075 TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10); 076 TEST_UTIL.getConfiguration() 077 .setFloat(MetaRWQueueRpcExecutor.META_CALL_QUEUE_READ_SHARE_CONF_KEY, 0.5f); 078 setUpTestBeforeClass(); 079 } 080 081 @AfterClass 082 public static void tearDown() throws Exception { 083 tearDownAfterClass(); 084 } 085 086 @Before 087 public void beforeMethod() throws Exception { 088 setUpBeforeMethod(); 089 } 090 091 @After 092 public void afterMethod() throws Exception { 093 tearDownAfterMethod(); 094 } 095 096 @Test 097 public void testKillRS() throws Exception { 098 RSGroupInfo appInfo = addGroup("appInfo", 1); 099 final TableName tableName = TableName.valueOf(tablePrefix + "_ns", name.getMethodName()); 100 admin.createNamespace(NamespaceDescriptor.create(tableName.getNamespaceAsString()) 101 .addConfiguration(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP, appInfo.getName()).build()); 102 final TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName) 103 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")).build(); 104 admin.createTable(desc); 105 // wait for created table to be assigned 106 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 107 @Override 108 public boolean evaluate() throws Exception { 109 return getTableRegionMap().get(desc.getTableName()) != null; 110 } 111 }); 112 113 ServerName targetServer = getServerName(appInfo.getServers().iterator().next()); 114 assertEquals(1, admin.getRegions(targetServer).size()); 115 116 try { 117 // stopping may cause an exception 118 // due to the connection loss 119 admin.stopRegionServer(targetServer.getAddress().toString()); 120 } catch (Exception e) { 121 } 122 // wait until the server is actually down 123 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 124 @Override 125 public boolean evaluate() throws Exception { 126 return !cluster.getClusterMetrics().getLiveServerMetrics().containsKey(targetServer); 127 } 128 }); 129 // there is only one rs in the group and we killed it, so the region can not be online, until 130 // later we add new servers to it. 131 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 132 @Override 133 public boolean evaluate() throws Exception { 134 return !cluster.getClusterMetrics().getRegionStatesInTransition().isEmpty(); 135 } 136 }); 137 Set<Address> newServers = Sets.newHashSet(); 138 newServers 139 .add(rsGroupAdmin.getRSGroupInfo(RSGroupInfo.DEFAULT_GROUP).getServers().iterator().next()); 140 rsGroupAdmin.moveServers(newServers, appInfo.getName()); 141 142 // Make sure all the table's regions get reassigned 143 // disabling the table guarantees no conflicting assign/unassign (ie SSH) happens 144 admin.disableTable(tableName); 145 admin.enableTable(tableName); 146 147 // wait for region to be assigned 148 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 149 @Override 150 public boolean evaluate() throws Exception { 151 return cluster.getClusterMetrics().getRegionStatesInTransition().isEmpty(); 152 } 153 }); 154 155 ServerName targetServer1 = getServerName(newServers.iterator().next()); 156 assertEquals(1, admin.getRegions(targetServer1).size()); 157 assertEquals(tableName, admin.getRegions(targetServer1).get(0).getTable()); 158 } 159 160 @Test 161 public void testKillAllRSInGroup() throws Exception { 162 // create a rsgroup and move two regionservers to it 163 String groupName = "my_group"; 164 int groupRSCount = 2; 165 addGroup(groupName, groupRSCount); 166 167 // create a table, and move it to my_group 168 Table t = TEST_UTIL.createMultiRegionTable(tableName, Bytes.toBytes("f"), 5); 169 TEST_UTIL.loadTable(t, Bytes.toBytes("f")); 170 Set<TableName> toAddTables = new HashSet<>(); 171 toAddTables.add(tableName); 172 rsGroupAdmin.moveTables(toAddTables, groupName); 173 assertTrue(rsGroupAdmin.getRSGroupInfo(groupName).getTables().contains(tableName)); 174 TEST_UTIL.waitTableAvailable(tableName, 30000); 175 176 // check my_group servers and table regions 177 Set<Address> servers = rsGroupAdmin.getRSGroupInfo(groupName).getServers(); 178 assertEquals(2, servers.size()); 179 LOG.debug("group servers {}", servers); 180 for (RegionInfo tr : master.getAssignmentManager().getRegionStates() 181 .getRegionsOfTable(tableName)) { 182 assertTrue(servers.contains(master.getAssignmentManager().getRegionStates() 183 .getRegionAssignments().get(tr).getAddress())); 184 } 185 186 // Move a region, to ensure there exists a region whose 'lastHost' is in my_group 187 // ('lastHost' of other regions are in 'default' group) 188 // and check if all table regions are online 189 List<ServerName> gsn = new ArrayList<>(); 190 for (Address addr : servers) { 191 gsn.add(getServerName(addr)); 192 } 193 assertEquals(2, gsn.size()); 194 for (Map.Entry<RegionInfo, ServerName> entry : master.getAssignmentManager().getRegionStates() 195 .getRegionAssignments().entrySet()) { 196 if (entry.getKey().getTable().equals(tableName)) { 197 LOG.debug("move region {} from {} to {}", entry.getKey().getRegionNameAsString(), 198 entry.getValue(), gsn.get(1 - gsn.indexOf(entry.getValue()))); 199 TEST_UTIL.moveRegionAndWait(entry.getKey(), gsn.get(1 - gsn.indexOf(entry.getValue()))); 200 break; 201 } 202 } 203 TEST_UTIL.waitTableAvailable(tableName, 30000); 204 205 // case 1: stop all the regionservers in my_group, and restart a regionserver in my_group, 206 // and then check if all table regions are online 207 for (Address addr : rsGroupAdmin.getRSGroupInfo(groupName).getServers()) { 208 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr)); 209 } 210 // better wait for a while for region reassign 211 sleep(10000); 212 assertEquals(NUM_SLAVES_BASE - gsn.size(), 213 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 214 TEST_UTIL.getMiniHBaseCluster().startRegionServer(gsn.get(0).getHostname(), 215 gsn.get(0).getPort()); 216 assertEquals(NUM_SLAVES_BASE - gsn.size() + 1, 217 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 218 TEST_UTIL.waitTableAvailable(tableName, 30000); 219 220 // case 2: stop all the regionservers in my_group, and move another 221 // regionserver(from the 'default' group) to my_group, 222 // and then check if all table regions are online 223 for (JVMClusterUtil.RegionServerThread rst : TEST_UTIL.getMiniHBaseCluster() 224 .getLiveRegionServerThreads()) { 225 if (rst.getRegionServer().getServerName().getAddress().equals(gsn.get(0).getAddress())) { 226 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(rst.getRegionServer().getServerName()); 227 break; 228 } 229 } 230 sleep(10000); 231 assertEquals(NUM_SLAVES_BASE - gsn.size(), 232 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 233 ServerName newServer = master.getServerManager().getOnlineServersList().get(0); 234 rsGroupAdmin.moveServers(Sets.newHashSet(newServer.getAddress()), groupName); 235 // wait and check if table regions are online 236 TEST_UTIL.waitTableAvailable(tableName, 30000); 237 } 238 239 @Test 240 public void testLowerMetaGroupVersion() throws Exception { 241 // create a rsgroup and move one regionserver to it 242 String groupName = "meta_group"; 243 int groupRSCount = 1; 244 addGroup(groupName, groupRSCount); 245 246 // move hbase:meta to meta_group 247 Set<TableName> toAddTables = new HashSet<>(); 248 toAddTables.add(TableName.META_TABLE_NAME); 249 rsGroupAdmin.moveTables(toAddTables, groupName); 250 assertTrue( 251 rsGroupAdmin.getRSGroupInfo(groupName).getTables().contains(TableName.META_TABLE_NAME)); 252 253 // restart the regionserver in meta_group, and lower its version 254 String originVersion = ""; 255 Set<Address> servers = new HashSet<>(); 256 for (Address addr : rsGroupAdmin.getRSGroupInfo(groupName).getServers()) { 257 servers.add(addr); 258 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr)); 259 originVersion = master.getRegionServerVersion(getServerName(addr)); 260 } 261 // better wait for a while for region reassign 262 sleep(10000); 263 assertEquals(NUM_SLAVES_BASE - groupRSCount, 264 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 265 Address address = servers.iterator().next(); 266 int majorVersion = VersionInfo.getMajorVersion(originVersion); 267 assertTrue(majorVersion >= 1); 268 String lowerVersion = String.valueOf(majorVersion - 1) + originVersion.split("\\.")[1]; 269 setFinalStatic(Version.class.getField("version"), lowerVersion); 270 TEST_UTIL.getMiniHBaseCluster().startRegionServer(address.getHostName(), address.getPort()); 271 assertEquals(NUM_SLAVES_BASE, 272 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 273 assertTrue(VersionInfo.compareVersion(originVersion, 274 master.getRegionServerVersion(getServerName(servers.iterator().next()))) > 0); 275 LOG.debug("wait for META assigned..."); 276 // SCP finished, which means all regions assigned too. 277 TEST_UTIL.waitFor(60000, () -> !TEST_UTIL.getHBaseCluster().getMaster().getProcedures().stream() 278 .filter(p -> (p instanceof ServerCrashProcedure)).findAny().isPresent()); 279 } 280 281 private static void setFinalStatic(Field field, Object newValue) throws Exception { 282 field.setAccessible(true); 283 Field modifiersField = Field.class.getDeclaredField("modifiers"); 284 modifiersField.setAccessible(true); 285 modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL); 286 field.set(null, newValue); 287 } 288}