001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import static org.apache.hadoop.hbase.ServerName.NON_STARTCODE; 021import static org.junit.jupiter.api.Assertions.assertEquals; 022import static org.junit.jupiter.api.Assertions.assertFalse; 023import static org.junit.jupiter.api.Assertions.assertNotNull; 024import static org.junit.jupiter.api.Assertions.assertNull; 025import static org.junit.jupiter.api.Assertions.assertTrue; 026 027import java.io.IOException; 028import java.util.EnumSet; 029import java.util.List; 030import java.util.Map; 031import java.util.Map.Entry; 032import java.util.Set; 033import java.util.stream.Collectors; 034import org.apache.hadoop.conf.Configuration; 035import org.apache.hadoop.hbase.ClusterMetrics.Option; 036import org.apache.hadoop.hbase.HBaseTestingUtil; 037import org.apache.hadoop.hbase.HConstants; 038import org.apache.hadoop.hbase.ServerName; 039import org.apache.hadoop.hbase.SingleProcessHBaseCluster; 040import org.apache.hadoop.hbase.TableName; 041import org.apache.hadoop.hbase.Waiter; 042import org.apache.hadoop.hbase.client.Admin; 043import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 044import org.apache.hadoop.hbase.client.RegionInfo; 045import org.apache.hadoop.hbase.client.TableDescriptor; 046import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 047import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper; 048import org.apache.hadoop.hbase.favored.FavoredNodesManager; 049import org.apache.hadoop.hbase.favored.FavoredNodesPlan; 050import org.apache.hadoop.hbase.master.HMaster; 051import org.apache.hadoop.hbase.master.LoadBalancer; 052import org.apache.hadoop.hbase.master.ServerManager; 053import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 054import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 055import org.apache.hadoop.hbase.master.assignment.RegionStates; 056import org.apache.hadoop.hbase.regionserver.HRegion; 057import org.apache.hadoop.hbase.testclassification.MediumTests; 058import org.apache.hadoop.hbase.util.Bytes; 059import org.apache.hadoop.hbase.util.JVMClusterUtil; 060import org.junit.jupiter.api.AfterEach; 061import org.junit.jupiter.api.BeforeAll; 062import org.junit.jupiter.api.BeforeEach; 063import org.junit.jupiter.api.Disabled; 064import org.junit.jupiter.api.Tag; 065import org.junit.jupiter.api.Test; 066import org.slf4j.Logger; 067import org.slf4j.LoggerFactory; 068 069import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 070import org.apache.hbase.thirdparty.com.google.common.collect.Maps; 071import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 072 073@Disabled 074@Tag(MediumTests.TAG) 075public class TestFavoredStochasticLoadBalancer extends BalancerTestBase { 076 077 private static final Logger LOG = 078 LoggerFactory.getLogger(TestFavoredStochasticLoadBalancer.class); 079 080 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 081 private static final int SLAVES = 8; 082 private static final int REGION_NUM = SLAVES * 3; 083 084 private Admin admin; 085 private HMaster master; 086 private SingleProcessHBaseCluster cluster; 087 088 @BeforeAll 089 public static void setupBeforeClass() throws Exception { 090 Configuration conf = TEST_UTIL.getConfiguration(); 091 // Enable the favored nodes based load balancer 092 conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, 093 LoadOnlyFavoredStochasticBalancer.class, LoadBalancer.class); 094 } 095 096 @BeforeEach 097 public void startCluster() throws Exception { 098 TEST_UTIL.startMiniCluster(SLAVES); 099 TEST_UTIL.getDFSCluster().waitClusterUp(); 100 cluster = TEST_UTIL.getMiniHBaseCluster(); 101 master = TEST_UTIL.getMiniHBaseCluster().getMaster(); 102 admin = TEST_UTIL.getAdmin(); 103 admin.balancerSwitch(false, true); 104 } 105 106 @AfterEach 107 public void stopCluster() throws Exception { 108 TEST_UTIL.cleanupTestDir(); 109 TEST_UTIL.shutdownMiniCluster(); 110 } 111 112 @Test 113 public void testBasicBalance() throws Exception { 114 115 TableName tableName = TableName.valueOf("testBasicBalance"); 116 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 117 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 118 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 119 TEST_UTIL.waitTableAvailable(tableName); 120 TEST_UTIL.loadTable(admin.getConnection().getTable(tableName), HConstants.CATALOG_FAMILY); 121 admin.flush(tableName); 122 compactTable(tableName); 123 124 JVMClusterUtil.RegionServerThread rs1 = cluster.startRegionServerAndWait(10000); 125 JVMClusterUtil.RegionServerThread rs2 = cluster.startRegionServerAndWait(10000); 126 127 // Now try to run balance, and verify no regions are moved to the 2 region servers recently 128 // started. 129 admin.balancerSwitch(true, true); 130 assertTrue(admin.balance(), "Balancer did not run"); 131 TEST_UTIL.waitUntilNoRegionsInTransition(120000); 132 133 List<RegionInfo> hris = admin.getRegions(rs1.getRegionServer().getServerName()); 134 for (RegionInfo hri : hris) { 135 assertFalse(hri.getTable().equals(tableName), 136 "New RS contains regions belonging to table: " + tableName); 137 } 138 hris = admin.getRegions(rs2.getRegionServer().getServerName()); 139 for (RegionInfo hri : hris) { 140 assertFalse(hri.getTable().equals(tableName), 141 "New RS contains regions belonging to table: " + tableName); 142 } 143 } 144 145 @Test 146 public void testRoundRobinAssignment() throws Exception { 147 148 TableName tableName = TableName.valueOf("testRoundRobinAssignment"); 149 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 150 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 151 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 152 TEST_UTIL.waitTableAvailable(tableName); 153 TEST_UTIL.loadTable(admin.getConnection().getTable(tableName), HConstants.CATALOG_FAMILY); 154 admin.flush(tableName); 155 156 LoadBalancer balancer = master.getLoadBalancer(); 157 List<RegionInfo> regions = admin.getRegions(tableName); 158 regions.addAll(admin.getRegions(TableName.META_TABLE_NAME)); 159 List<ServerName> servers = Lists.newArrayList( 160 admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet()); 161 Map<ServerName, List<RegionInfo>> map = balancer.roundRobinAssignment(regions, servers); 162 for (List<RegionInfo> regionInfos : map.values()) { 163 regions.removeAll(regionInfos); 164 } 165 assertEquals(0, regions.size(), "No region should be missed by balancer"); 166 } 167 168 @Test 169 public void testBasicRegionPlacementAndReplicaLoad() throws Exception { 170 String tableName = "testBasicRegionPlacement"; 171 TableDescriptor tableDescriptor = 172 TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName)) 173 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 174 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 175 TEST_UTIL.waitTableAvailable(tableDescriptor.getTableName()); 176 177 FavoredNodesManager fnm = master.getFavoredNodesManager(); 178 List<RegionInfo> regionsOfTable = admin.getRegions(TableName.valueOf(tableName)); 179 for (RegionInfo rInfo : regionsOfTable) { 180 Set<ServerName> favNodes = Sets.newHashSet(fnm.getFavoredNodes(rInfo)); 181 assertNotNull(favNodes); 182 assertEquals(FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, favNodes.size()); 183 } 184 185 Map<ServerName, List<Integer>> replicaLoadMap = fnm.getReplicaLoad(Lists.newArrayList( 186 admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet())); 187 assertTrue( 188 admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().size() 189 == replicaLoadMap.size(), 190 "Not all replica load collected."); 191 for (Entry<ServerName, List<Integer>> entry : replicaLoadMap.entrySet()) { 192 assertTrue(entry.getValue().size() == FavoredNodeAssignmentHelper.FAVORED_NODES_NUM); 193 assertTrue(entry.getValue().get(0) >= 0); 194 assertTrue(entry.getValue().get(1) >= 0); 195 assertTrue(entry.getValue().get(2) >= 0); 196 } 197 198 admin.disableTable(TableName.valueOf(tableName)); 199 admin.deleteTable(TableName.valueOf(tableName)); 200 replicaLoadMap = fnm.getReplicaLoad(Lists.newArrayList( 201 admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet())); 202 assertTrue( 203 replicaLoadMap.size() 204 == admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().size(), 205 "replica load found " + replicaLoadMap.size() + " instead of 0."); 206 } 207 208 @Test 209 public void testRandomAssignmentWithNoFavNodes() throws Exception { 210 211 final String tableName = "testRandomAssignmentWithNoFavNodes"; 212 TableDescriptor tableDescriptor = 213 TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName)) 214 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 215 admin.createTable(tableDescriptor); 216 TEST_UTIL.waitTableAvailable(tableDescriptor.getTableName()); 217 218 RegionInfo hri = admin.getRegions(TableName.valueOf(tableName)).get(0); 219 220 FavoredNodesManager fnm = master.getFavoredNodesManager(); 221 fnm.deleteFavoredNodesForRegions(Lists.newArrayList(hri)); 222 assertNull(fnm.getFavoredNodes(hri), "Favored nodes not found null after delete"); 223 224 LoadBalancer balancer = master.getLoadBalancer(); 225 ServerName destination = balancer.randomAssignment(hri, 226 Lists.newArrayList(admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)) 227 .getLiveServerMetrics().keySet().stream().collect(Collectors.toList()))); 228 assertNotNull(destination); 229 List<ServerName> favoredNodes = fnm.getFavoredNodes(hri); 230 assertNotNull(favoredNodes); 231 boolean containsFN = false; 232 for (ServerName sn : favoredNodes) { 233 if (ServerName.isSameAddress(destination, sn)) { 234 containsFN = true; 235 } 236 } 237 assertTrue(containsFN, "Destination server does not belong to favored nodes."); 238 } 239 240 @Test 241 public void testBalancerWithoutFavoredNodes() throws Exception { 242 243 TableName tableName = TableName.valueOf("testBalancerWithoutFavoredNodes"); 244 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 245 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 246 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 247 TEST_UTIL.waitTableAvailable(tableName); 248 249 final RegionInfo region = admin.getRegions(tableName).get(0); 250 LOG.info("Region thats supposed to be in transition: " + region); 251 FavoredNodesManager fnm = master.getFavoredNodesManager(); 252 List<ServerName> currentFN = fnm.getFavoredNodes(region); 253 assertNotNull(currentFN); 254 255 fnm.deleteFavoredNodesForRegions(Lists.newArrayList(region)); 256 257 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 258 admin.balancerSwitch(true, true); 259 260 // Balancer should unassign the region 261 assertTrue(admin.balance(), "Balancer did not run"); 262 TEST_UTIL.waitUntilNoRegionTransitScheduled(); 263 assertEquals(1, master.getAssignmentManager().getRegionsInTransitionCount(), 264 "One region should be unassigned"); 265 266 admin.assign(region.getEncodedNameAsBytes()); 267 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 268 269 currentFN = fnm.getFavoredNodes(region); 270 assertNotNull(currentFN); 271 assertEquals(FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, currentFN.size(), 272 "Expected number of FN not present"); 273 274 assertTrue(admin.balance(), "Balancer did not run"); 275 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 276 277 checkFavoredNodeAssignments(tableName, fnm, regionStates); 278 } 279 280 @Disabled 281 @Test 282 public void testMisplacedRegions() throws Exception { 283 TableName tableName = TableName.valueOf("testMisplacedRegions"); 284 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 285 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 286 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 287 TEST_UTIL.waitTableAvailable(tableName); 288 289 final RegionInfo misplacedRegion = admin.getRegions(tableName).get(0); 290 FavoredNodesManager fnm = master.getFavoredNodesManager(); 291 List<ServerName> currentFN = fnm.getFavoredNodes(misplacedRegion); 292 assertNotNull(currentFN); 293 294 List<ServerName> serversForNewFN = Lists.newArrayList(); 295 for (ServerName sn : admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)) 296 .getLiveServerMetrics().keySet()) { 297 serversForNewFN.add(ServerName.valueOf(sn.getHostname(), sn.getPort(), NON_STARTCODE)); 298 } 299 for (ServerName sn : currentFN) { 300 serversForNewFN.remove(sn); 301 } 302 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(serversForNewFN, conf); 303 helper.initialize(); 304 List<ServerName> newFavoredNodes = helper.generateFavoredNodes(misplacedRegion); 305 assertNotNull(newFavoredNodes); 306 assertEquals(FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, newFavoredNodes.size()); 307 Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap(); 308 regionFNMap.put(misplacedRegion, newFavoredNodes); 309 fnm.updateFavoredNodes(regionFNMap); 310 311 final RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 312 final ServerName current = regionStates.getRegionServerOfRegion(misplacedRegion); 313 assertNull( 314 FavoredNodesPlan.getFavoredServerPosition(fnm.getFavoredNodes(misplacedRegion), current), 315 "Misplaced region is still hosted on favored node, not expected."); 316 admin.balancerSwitch(true, true); 317 assertTrue(admin.balance(), "Balancer did not run"); 318 TEST_UTIL.waitFor(120000, 30000, new Waiter.Predicate<Exception>() { 319 @Override 320 public boolean evaluate() throws Exception { 321 ServerName host = regionStates.getRegionServerOfRegion(misplacedRegion); 322 return !ServerName.isSameAddress(host, current); 323 } 324 }); 325 checkFavoredNodeAssignments(tableName, fnm, regionStates); 326 } 327 328 @Test 329 public void test2FavoredNodesDead() throws Exception { 330 TableName tableName = TableName.valueOf("testAllFavoredNodesDead"); 331 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 332 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 333 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 334 TEST_UTIL.waitTableAvailable(tableName); 335 336 final RegionInfo region = admin.getRegions(tableName).get(0); 337 LOG.info("Region that's supposed to be in transition: " + region); 338 FavoredNodesManager fnm = master.getFavoredNodesManager(); 339 List<ServerName> currentFN = fnm.getFavoredNodes(region); 340 assertNotNull(currentFN); 341 342 List<ServerName> serversToStop = Lists.newArrayList(currentFN); 343 serversToStop.remove(currentFN.get(0)); 344 345 // Lets kill 2 FN for the region. All regions should still be assigned 346 stopServersAndWaitUntilProcessed(serversToStop); 347 348 TEST_UTIL.waitUntilNoRegionsInTransition(); 349 final RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 350 TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() { 351 @Override 352 public boolean evaluate() throws Exception { 353 return regionStates.getRegionState(region).isOpened(); 354 } 355 }); 356 357 assertEquals(REGION_NUM, admin.getRegions(tableName).size(), "Not all regions are online"); 358 admin.balancerSwitch(true, true); 359 assertTrue(admin.balance(), "Balancer did not run"); 360 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 361 362 checkFavoredNodeAssignments(tableName, fnm, regionStates); 363 } 364 365 @Disabled 366 @Test 367 public void testAllFavoredNodesDead() throws Exception { 368 TableName tableName = TableName.valueOf("testAllFavoredNodesDead"); 369 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 370 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 371 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 372 TEST_UTIL.waitTableAvailable(tableName); 373 374 final RegionInfo region = admin.getRegions(tableName).get(0); 375 LOG.info("Region that's supposed to be in transition: " + region); 376 FavoredNodesManager fnm = master.getFavoredNodesManager(); 377 List<ServerName> currentFN = fnm.getFavoredNodes(region); 378 assertNotNull(currentFN); 379 380 // Lets kill all the RS that are favored nodes for this region. 381 stopServersAndWaitUntilProcessed(currentFN); 382 383 final AssignmentManager am = master.getAssignmentManager(); 384 final RegionStates regionStates = am.getRegionStates(); 385 TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() { 386 @Override 387 public boolean evaluate() throws Exception { 388 return regionStates.getRegionState(region).isFailedOpen(); 389 } 390 }); 391 assertTrue(regionStates.getRegionState(region).isFailedOpen(), 392 "Region: " + region + " should be RIT"); 393 394 // Regenerate FN and assign, everything else should be fine 395 List<ServerName> serversForNewFN = Lists.newArrayList(); 396 for (ServerName sn : admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)) 397 .getLiveServerMetrics().keySet()) { 398 serversForNewFN.add(ServerName.valueOf(sn.getHostname(), sn.getPort(), NON_STARTCODE)); 399 } 400 401 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(serversForNewFN, conf); 402 helper.initialize(); 403 404 for (RegionStateNode regionState : am.getRegionsInTransition()) { 405 RegionInfo regionInfo = regionState.getRegionInfo(); 406 List<ServerName> newFavoredNodes = helper.generateFavoredNodes(regionInfo); 407 assertNotNull(newFavoredNodes); 408 assertEquals(FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, newFavoredNodes.size()); 409 LOG.info("Region: " + regionInfo.getEncodedName() + " FN: " + newFavoredNodes); 410 411 Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap(); 412 regionFNMap.put(regionInfo, newFavoredNodes); 413 fnm.updateFavoredNodes(regionFNMap); 414 LOG.info("Assigning region: " + regionInfo.getEncodedName()); 415 admin.assign(regionInfo.getEncodedNameAsBytes()); 416 } 417 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 418 assertEquals(REGION_NUM, admin.getRegions(tableName).size(), "Not all regions are online"); 419 420 admin.balancerSwitch(true, true); 421 assertTrue(admin.balance(), "Balancer did not run"); 422 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 423 424 checkFavoredNodeAssignments(tableName, fnm, regionStates); 425 } 426 427 @Disabled 428 @Test 429 public void testAllFavoredNodesDeadMasterRestarted() throws Exception { 430 TableName tableName = TableName.valueOf("testAllFavoredNodesDeadMasterRestarted"); 431 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 432 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 433 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 434 TEST_UTIL.waitTableAvailable(tableName); 435 436 final RegionInfo region = admin.getRegions(tableName).get(0); 437 LOG.info("Region that's supposed to be in transition: " + region); 438 FavoredNodesManager fnm = master.getFavoredNodesManager(); 439 List<ServerName> currentFN = fnm.getFavoredNodes(region); 440 assertNotNull(currentFN); 441 442 // Lets kill all the RS that are favored nodes for this region. 443 stopServersAndWaitUntilProcessed(currentFN); 444 445 final AssignmentManager am = master.getAssignmentManager(); 446 final RegionStates regionStatesBeforeMaster = am.getRegionStates(); 447 TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() { 448 @Override 449 public boolean evaluate() throws Exception { 450 return regionStatesBeforeMaster.getRegionState(region).isFailedOpen(); 451 } 452 }); 453 454 assertTrue(regionStatesBeforeMaster.getRegionState(region).isFailedOpen(), 455 "Region: " + region + " should be RIT"); 456 457 List<RegionInfo> rit = Lists.newArrayList(); 458 for (RegionStateNode regionState : am.getRegionsInTransition()) { 459 RegionInfo regionInfo = regionState.getRegionInfo(); 460 LOG.debug("Region in transition after stopping FN's: " + regionInfo); 461 rit.add(regionInfo); 462 assertTrue(regionStatesBeforeMaster.getRegionState(regionInfo).isFailedOpen(), 463 "Region: " + regionInfo + " should be RIT"); 464 assertEquals(tableName, regionInfo.getTable(), 465 "Region: " + regionInfo + " does not belong to table: " + tableName); 466 } 467 468 Configuration conf = cluster.getConf(); 469 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 470 SLAVES - FavoredNodeAssignmentHelper.FAVORED_NODES_NUM); 471 472 cluster.stopMaster(master.getServerName()); 473 cluster.waitForMasterToStop(master.getServerName(), 60000); 474 475 cluster.startMaster(); 476 cluster.waitForActiveAndReadyMaster(); 477 master = cluster.getMaster(); 478 fnm = master.getFavoredNodesManager(); 479 480 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 481 assertTrue(regionStates.getRegionState(region).isFailedOpen(), 482 "Region: " + region + " should be RIT"); 483 484 for (RegionInfo regionInfo : rit) { 485 assertTrue(regionStates.getRegionState(regionInfo).isFailedOpen(), 486 "Region: " + regionInfo + " should be RIT"); 487 } 488 489 // Regenerate FN and assign, everything else should be fine 490 List<ServerName> serversForNewFN = Lists.newArrayList(); 491 for (ServerName sn : admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)) 492 .getLiveServerMetrics().keySet()) { 493 serversForNewFN.add(ServerName.valueOf(sn.getHostname(), sn.getPort(), NON_STARTCODE)); 494 } 495 496 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(serversForNewFN, conf); 497 helper.initialize(); 498 499 for (RegionInfo regionInfo : rit) { 500 List<ServerName> newFavoredNodes = helper.generateFavoredNodes(regionInfo); 501 assertNotNull(newFavoredNodes); 502 assertEquals(FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, newFavoredNodes.size()); 503 LOG.info("Region: " + regionInfo.getEncodedName() + " FN: " + newFavoredNodes); 504 505 Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap(); 506 regionFNMap.put(regionInfo, newFavoredNodes); 507 fnm.updateFavoredNodes(regionFNMap); 508 LOG.info("Assigning region: " + regionInfo.getEncodedName()); 509 admin.assign(regionInfo.getEncodedNameAsBytes()); 510 } 511 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 512 assertEquals(REGION_NUM, admin.getRegions(tableName).size(), "Not all regions are online"); 513 514 admin.balancerSwitch(true, true); 515 assertTrue(admin.balance(), "Balancer did not run"); 516 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 517 518 checkFavoredNodeAssignments(tableName, fnm, regionStates); 519 } 520 521 private void checkFavoredNodeAssignments(TableName tableName, FavoredNodesManager fnm, 522 RegionStates regionStates) throws IOException { 523 for (RegionInfo hri : admin.getRegions(tableName)) { 524 ServerName host = regionStates.getRegionServerOfRegion(hri); 525 assertNotNull(FavoredNodesPlan.getFavoredServerPosition(fnm.getFavoredNodes(hri), host), 526 "Region: " + hri.getEncodedName() + " not on FN, current: " + host + " FN list: " 527 + fnm.getFavoredNodes(hri)); 528 } 529 } 530 531 private void stopServersAndWaitUntilProcessed(List<ServerName> currentFN) throws Exception { 532 for (ServerName sn : currentFN) { 533 for (JVMClusterUtil.RegionServerThread rst : cluster.getLiveRegionServerThreads()) { 534 if (ServerName.isSameAddress(sn, rst.getRegionServer().getServerName())) { 535 LOG.info("Shutting down server: " + sn); 536 cluster.stopRegionServer(rst.getRegionServer().getServerName()); 537 cluster.waitForRegionServerToStop(rst.getRegionServer().getServerName(), 60000); 538 } 539 } 540 } 541 542 // Wait until dead servers are processed. 543 TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() { 544 @Override 545 public boolean evaluate() throws Exception { 546 return !master.getServerManager().areDeadServersInProgress(); 547 } 548 }); 549 550 assertEquals(SLAVES - currentFN.size(), cluster.getLiveRegionServerThreads().size(), 551 "Not all servers killed"); 552 } 553 554 private void compactTable(TableName tableName) throws IOException { 555 for (JVMClusterUtil.RegionServerThread t : cluster.getRegionServerThreads()) { 556 for (HRegion region : t.getRegionServer().getRegions(tableName)) { 557 region.compact(true); 558 } 559 } 560 } 561}