001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.rsgroup; 019 020import static org.apache.hadoop.hbase.util.Threads.sleep; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertTrue; 023 024import java.lang.reflect.Field; 025import java.lang.reflect.Modifier; 026import java.util.ArrayList; 027import java.util.HashSet; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.NamespaceDescriptor; 033import org.apache.hadoop.hbase.ServerName; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.Version; 036import org.apache.hadoop.hbase.Waiter; 037import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 038import org.apache.hadoop.hbase.client.RegionInfo; 039import org.apache.hadoop.hbase.client.Table; 040import org.apache.hadoop.hbase.client.TableDescriptor; 041import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 042import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; 043import org.apache.hadoop.hbase.net.Address; 044import org.apache.hadoop.hbase.testclassification.LargeTests; 045import org.apache.hadoop.hbase.util.Bytes; 046import org.apache.hadoop.hbase.util.JVMClusterUtil; 047import org.apache.hadoop.hbase.util.VersionInfo; 048import org.junit.After; 049import org.junit.AfterClass; 050import org.junit.Before; 051import org.junit.BeforeClass; 052import org.junit.ClassRule; 053import org.junit.Test; 054import org.junit.experimental.categories.Category; 055import org.slf4j.Logger; 056import org.slf4j.LoggerFactory; 057 058import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 059 060@Category({ LargeTests.class }) 061public class TestRSGroupsKillRS extends TestRSGroupsBase { 062 063 @ClassRule 064 public static final HBaseClassTestRule CLASS_RULE = 065 HBaseClassTestRule.forClass(TestRSGroupsKillRS.class); 066 067 protected static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsKillRS.class); 068 069 @BeforeClass 070 public static void setUp() throws Exception { 071 setUpTestBeforeClass(); 072 } 073 074 @AfterClass 075 public static void tearDown() throws Exception { 076 tearDownAfterClass(); 077 } 078 079 @Before 080 public void beforeMethod() throws Exception { 081 setUpBeforeMethod(); 082 } 083 084 @After 085 public void afterMethod() throws Exception { 086 tearDownAfterMethod(); 087 } 088 089 @Test 090 public void testKillRS() throws Exception { 091 RSGroupInfo appInfo = addGroup("appInfo", 1); 092 final TableName tableName = TableName.valueOf(tablePrefix + "_ns", name.getMethodName()); 093 admin.createNamespace(NamespaceDescriptor.create(tableName.getNamespaceAsString()) 094 .addConfiguration(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP, appInfo.getName()).build()); 095 final TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName) 096 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")).build(); 097 admin.createTable(desc); 098 // wait for created table to be assigned 099 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 100 @Override 101 public boolean evaluate() throws Exception { 102 return getTableRegionMap().get(desc.getTableName()) != null; 103 } 104 }); 105 106 ServerName targetServer = getServerName(appInfo.getServers().iterator().next()); 107 assertEquals(1, admin.getRegions(targetServer).size()); 108 109 try { 110 // stopping may cause an exception 111 // due to the connection loss 112 admin.stopRegionServer(targetServer.getAddress().toString()); 113 } catch (Exception e) { 114 } 115 // wait until the server is actually down 116 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 117 @Override 118 public boolean evaluate() throws Exception { 119 return !cluster.getClusterMetrics().getLiveServerMetrics().containsKey(targetServer); 120 } 121 }); 122 // there is only one rs in the group and we killed it, so the region can not be online, until 123 // later we add new servers to it. 124 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 125 @Override 126 public boolean evaluate() throws Exception { 127 return !cluster.getClusterMetrics().getRegionStatesInTransition().isEmpty(); 128 } 129 }); 130 Set<Address> newServers = Sets.newHashSet(); 131 newServers 132 .add(rsGroupAdmin.getRSGroupInfo(RSGroupInfo.DEFAULT_GROUP).getServers().iterator().next()); 133 rsGroupAdmin.moveServers(newServers, appInfo.getName()); 134 135 // Make sure all the table's regions get reassigned 136 // disabling the table guarantees no conflicting assign/unassign (ie SSH) happens 137 admin.disableTable(tableName); 138 admin.enableTable(tableName); 139 140 // wait for region to be assigned 141 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 142 @Override 143 public boolean evaluate() throws Exception { 144 return cluster.getClusterMetrics().getRegionStatesInTransition().isEmpty(); 145 } 146 }); 147 148 ServerName targetServer1 = getServerName(newServers.iterator().next()); 149 assertEquals(1, admin.getRegions(targetServer1).size()); 150 assertEquals(tableName, admin.getRegions(targetServer1).get(0).getTable()); 151 } 152 153 @Test 154 public void testKillAllRSInGroup() throws Exception { 155 // create a rsgroup and move two regionservers to it 156 String groupName = "my_group"; 157 int groupRSCount = 2; 158 addGroup(groupName, groupRSCount); 159 160 // create a table, and move it to my_group 161 Table t = TEST_UTIL.createMultiRegionTable(tableName, Bytes.toBytes("f"), 5); 162 TEST_UTIL.loadTable(t, Bytes.toBytes("f")); 163 Set<TableName> toAddTables = new HashSet<>(); 164 toAddTables.add(tableName); 165 rsGroupAdmin.moveTables(toAddTables, groupName); 166 assertTrue(rsGroupAdmin.getRSGroupInfo(groupName).getTables().contains(tableName)); 167 TEST_UTIL.waitTableAvailable(tableName, 30000); 168 169 // check my_group servers and table regions 170 Set<Address> servers = rsGroupAdmin.getRSGroupInfo(groupName).getServers(); 171 assertEquals(2, servers.size()); 172 LOG.debug("group servers {}", servers); 173 for (RegionInfo tr : 174 master.getAssignmentManager().getRegionStates().getRegionsOfTable(tableName)) { 175 assertTrue(servers.contains( 176 master.getAssignmentManager().getRegionStates().getRegionAssignments() 177 .get(tr).getAddress())); 178 } 179 180 // Move a region, to ensure there exists a region whose 'lastHost' is in my_group 181 // ('lastHost' of other regions are in 'default' group) 182 // and check if all table regions are online 183 List<ServerName> gsn = new ArrayList<>(); 184 for(Address addr : servers){ 185 gsn.add(getServerName(addr)); 186 } 187 assertEquals(2, gsn.size()); 188 for(Map.Entry<RegionInfo, ServerName> entry : 189 master.getAssignmentManager().getRegionStates().getRegionAssignments().entrySet()){ 190 if(entry.getKey().getTable().equals(tableName)){ 191 LOG.debug("move region {} from {} to {}", entry.getKey().getRegionNameAsString(), 192 entry.getValue(), gsn.get(1 - gsn.indexOf(entry.getValue()))); 193 TEST_UTIL.moveRegionAndWait(entry.getKey(), gsn.get(1 - gsn.indexOf(entry.getValue()))); 194 break; 195 } 196 } 197 TEST_UTIL.waitTableAvailable(tableName, 30000); 198 199 // case 1: stop all the regionservers in my_group, and restart a regionserver in my_group, 200 // and then check if all table regions are online 201 for(Address addr : rsGroupAdmin.getRSGroupInfo(groupName).getServers()) { 202 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr)); 203 } 204 // better wait for a while for region reassign 205 sleep(10000); 206 assertEquals(NUM_SLAVES_BASE - gsn.size(), 207 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 208 TEST_UTIL.getMiniHBaseCluster().startRegionServer(gsn.get(0).getHostname(), 209 gsn.get(0).getPort()); 210 assertEquals(NUM_SLAVES_BASE - gsn.size() + 1, 211 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 212 TEST_UTIL.waitTableAvailable(tableName, 30000); 213 214 // case 2: stop all the regionservers in my_group, and move another 215 // regionserver(from the 'default' group) to my_group, 216 // and then check if all table regions are online 217 for(JVMClusterUtil.RegionServerThread rst : 218 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads()){ 219 if(rst.getRegionServer().getServerName().getAddress().equals(gsn.get(0).getAddress())){ 220 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(rst.getRegionServer().getServerName()); 221 break; 222 } 223 } 224 sleep(10000); 225 assertEquals(NUM_SLAVES_BASE - gsn.size(), 226 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 227 ServerName newServer = master.getServerManager().getOnlineServersList().get(0); 228 rsGroupAdmin.moveServers(Sets.newHashSet(newServer.getAddress()), groupName); 229 // wait and check if table regions are online 230 TEST_UTIL.waitTableAvailable(tableName, 30000); 231 } 232 233 @Test 234 public void testLowerMetaGroupVersion() throws Exception{ 235 // create a rsgroup and move one regionserver to it 236 String groupName = "meta_group"; 237 int groupRSCount = 1; 238 addGroup(groupName, groupRSCount); 239 240 // move hbase:meta to meta_group 241 Set<TableName> toAddTables = new HashSet<>(); 242 toAddTables.add(TableName.META_TABLE_NAME); 243 rsGroupAdmin.moveTables(toAddTables, groupName); 244 assertTrue( 245 rsGroupAdmin.getRSGroupInfo(groupName).getTables().contains(TableName.META_TABLE_NAME)); 246 247 // restart the regionserver in meta_group, and lower its version 248 String originVersion = ""; 249 Set<Address> servers = new HashSet<>(); 250 for(Address addr : rsGroupAdmin.getRSGroupInfo(groupName).getServers()) { 251 servers.add(addr); 252 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr)); 253 originVersion = master.getRegionServerVersion(getServerName(addr)); 254 } 255 // better wait for a while for region reassign 256 sleep(10000); 257 assertEquals(NUM_SLAVES_BASE - groupRSCount, 258 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 259 Address address = servers.iterator().next(); 260 int majorVersion = VersionInfo.getMajorVersion(originVersion); 261 assertTrue(majorVersion >= 1); 262 String lowerVersion = String.valueOf(majorVersion - 1) + originVersion.split("\\.")[1]; 263 setFinalStatic(Version.class.getField("version"), lowerVersion); 264 TEST_UTIL.getMiniHBaseCluster().startRegionServer(address.getHostname(), 265 address.getPort()); 266 assertEquals(NUM_SLAVES_BASE, 267 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 268 assertTrue(VersionInfo.compareVersion(originVersion, 269 master.getRegionServerVersion(getServerName(servers.iterator().next()))) > 0); 270 LOG.debug("wait for META assigned..."); 271 // SCP finished, which means all regions assigned too. 272 TEST_UTIL.waitFor(60000, () -> !TEST_UTIL.getHBaseCluster().getMaster().getProcedures().stream() 273 .filter(p -> (p instanceof ServerCrashProcedure)).findAny().isPresent()); 274 } 275 276 private static void setFinalStatic(Field field, Object newValue) throws Exception { 277 field.setAccessible(true); 278 Field modifiersField = Field.class.getDeclaredField("modifiers"); 279 modifiersField.setAccessible(true); 280 modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL); 281 field.set(null, newValue); 282 } 283}