001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import static org.apache.hadoop.hbase.ServerName.NON_STARTCODE; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertFalse; 023import static org.junit.Assert.assertNotNull; 024import static org.junit.Assert.assertNull; 025import static org.junit.Assert.assertTrue; 026 027import java.io.IOException; 028import java.util.EnumSet; 029import java.util.List; 030import java.util.Map; 031import java.util.Map.Entry; 032import java.util.Set; 033import java.util.stream.Collectors; 034import org.apache.hadoop.conf.Configuration; 035import org.apache.hadoop.hbase.ClusterMetrics.Option; 036import org.apache.hadoop.hbase.HBaseClassTestRule; 037import org.apache.hadoop.hbase.HBaseTestingUtil; 038import org.apache.hadoop.hbase.HConstants; 039import org.apache.hadoop.hbase.ServerName; 040import org.apache.hadoop.hbase.SingleProcessHBaseCluster; 041import org.apache.hadoop.hbase.TableName; 042import org.apache.hadoop.hbase.Waiter; 043import org.apache.hadoop.hbase.client.Admin; 044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 045import org.apache.hadoop.hbase.client.RegionInfo; 046import org.apache.hadoop.hbase.client.TableDescriptor; 047import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 048import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper; 049import org.apache.hadoop.hbase.favored.FavoredNodesManager; 050import org.apache.hadoop.hbase.favored.FavoredNodesPlan; 051import org.apache.hadoop.hbase.master.HMaster; 052import org.apache.hadoop.hbase.master.LoadBalancer; 053import org.apache.hadoop.hbase.master.ServerManager; 054import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 055import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 056import org.apache.hadoop.hbase.master.assignment.RegionStates; 057import org.apache.hadoop.hbase.regionserver.HRegion; 058import org.apache.hadoop.hbase.testclassification.MediumTests; 059import org.apache.hadoop.hbase.util.Bytes; 060import org.apache.hadoop.hbase.util.JVMClusterUtil; 061import org.junit.After; 062import org.junit.Before; 063import org.junit.BeforeClass; 064import org.junit.ClassRule; 065import org.junit.Ignore; 066import org.junit.Test; 067import org.junit.experimental.categories.Category; 068import org.slf4j.Logger; 069import org.slf4j.LoggerFactory; 070 071import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 072import org.apache.hbase.thirdparty.com.google.common.collect.Maps; 073import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 074 075@Ignore // Disabled 076@Category(MediumTests.class) 077public class TestFavoredStochasticLoadBalancer extends BalancerTestBase { 078 079 @ClassRule 080 public static final HBaseClassTestRule CLASS_RULE = 081 HBaseClassTestRule.forClass(TestFavoredStochasticLoadBalancer.class); 082 083 private static final Logger LOG = 084 LoggerFactory.getLogger(TestFavoredStochasticLoadBalancer.class); 085 086 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 087 private static final int SLAVES = 8; 088 private static final int REGION_NUM = SLAVES * 3; 089 090 private Admin admin; 091 private HMaster master; 092 private SingleProcessHBaseCluster cluster; 093 094 @BeforeClass 095 public static void setupBeforeClass() throws Exception { 096 Configuration conf = TEST_UTIL.getConfiguration(); 097 // Enable the favored nodes based load balancer 098 conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, 099 LoadOnlyFavoredStochasticBalancer.class, LoadBalancer.class); 100 } 101 102 @Before 103 public void startCluster() throws Exception { 104 TEST_UTIL.startMiniCluster(SLAVES); 105 TEST_UTIL.getDFSCluster().waitClusterUp(); 106 cluster = TEST_UTIL.getMiniHBaseCluster(); 107 master = TEST_UTIL.getMiniHBaseCluster().getMaster(); 108 admin = TEST_UTIL.getAdmin(); 109 admin.balancerSwitch(false, true); 110 } 111 112 @After 113 public void stopCluster() throws Exception { 114 TEST_UTIL.cleanupTestDir(); 115 TEST_UTIL.shutdownMiniCluster(); 116 } 117 118 @Test 119 public void testBasicBalance() throws Exception { 120 121 TableName tableName = TableName.valueOf("testBasicBalance"); 122 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 123 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 124 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 125 TEST_UTIL.waitTableAvailable(tableName); 126 TEST_UTIL.loadTable(admin.getConnection().getTable(tableName), HConstants.CATALOG_FAMILY); 127 admin.flush(tableName); 128 compactTable(tableName); 129 130 JVMClusterUtil.RegionServerThread rs1 = cluster.startRegionServerAndWait(10000); 131 JVMClusterUtil.RegionServerThread rs2 = cluster.startRegionServerAndWait(10000); 132 133 // Now try to run balance, and verify no regions are moved to the 2 region servers recently 134 // started. 135 admin.balancerSwitch(true, true); 136 assertTrue("Balancer did not run", admin.balance()); 137 TEST_UTIL.waitUntilNoRegionsInTransition(120000); 138 139 List<RegionInfo> hris = admin.getRegions(rs1.getRegionServer().getServerName()); 140 for (RegionInfo hri : hris) { 141 assertFalse("New RS contains regions belonging to table: " + tableName, 142 hri.getTable().equals(tableName)); 143 } 144 hris = admin.getRegions(rs2.getRegionServer().getServerName()); 145 for (RegionInfo hri : hris) { 146 assertFalse("New RS contains regions belonging to table: " + tableName, 147 hri.getTable().equals(tableName)); 148 } 149 } 150 151 @Test 152 public void testRoundRobinAssignment() throws Exception { 153 154 TableName tableName = TableName.valueOf("testRoundRobinAssignment"); 155 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 156 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 157 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 158 TEST_UTIL.waitTableAvailable(tableName); 159 TEST_UTIL.loadTable(admin.getConnection().getTable(tableName), HConstants.CATALOG_FAMILY); 160 admin.flush(tableName); 161 162 LoadBalancer balancer = master.getLoadBalancer(); 163 List<RegionInfo> regions = admin.getRegions(tableName); 164 regions.addAll(admin.getRegions(TableName.META_TABLE_NAME)); 165 List<ServerName> servers = Lists.newArrayList( 166 admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet()); 167 Map<ServerName, List<RegionInfo>> map = balancer.roundRobinAssignment(regions, servers); 168 for (List<RegionInfo> regionInfos : map.values()) { 169 regions.removeAll(regionInfos); 170 } 171 assertEquals("No region should be missed by balancer", 0, regions.size()); 172 } 173 174 @Test 175 public void testBasicRegionPlacementAndReplicaLoad() throws Exception { 176 String tableName = "testBasicRegionPlacement"; 177 TableDescriptor tableDescriptor = 178 TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName)) 179 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 180 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 181 TEST_UTIL.waitTableAvailable(tableDescriptor.getTableName()); 182 183 FavoredNodesManager fnm = master.getFavoredNodesManager(); 184 List<RegionInfo> regionsOfTable = admin.getRegions(TableName.valueOf(tableName)); 185 for (RegionInfo rInfo : regionsOfTable) { 186 Set<ServerName> favNodes = Sets.newHashSet(fnm.getFavoredNodes(rInfo)); 187 assertNotNull(favNodes); 188 assertEquals(FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, favNodes.size()); 189 } 190 191 Map<ServerName, List<Integer>> replicaLoadMap = fnm.getReplicaLoad(Lists.newArrayList( 192 admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet())); 193 assertTrue("Not all replica load collected.", 194 admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().size() 195 == replicaLoadMap.size()); 196 for (Entry<ServerName, List<Integer>> entry : replicaLoadMap.entrySet()) { 197 assertTrue(entry.getValue().size() == FavoredNodeAssignmentHelper.FAVORED_NODES_NUM); 198 assertTrue(entry.getValue().get(0) >= 0); 199 assertTrue(entry.getValue().get(1) >= 0); 200 assertTrue(entry.getValue().get(2) >= 0); 201 } 202 203 admin.disableTable(TableName.valueOf(tableName)); 204 admin.deleteTable(TableName.valueOf(tableName)); 205 replicaLoadMap = fnm.getReplicaLoad(Lists.newArrayList( 206 admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet())); 207 assertTrue("replica load found " + replicaLoadMap.size() + " instead of 0.", 208 replicaLoadMap.size() == admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)) 209 .getLiveServerMetrics().size()); 210 } 211 212 @Test 213 public void testRandomAssignmentWithNoFavNodes() throws Exception { 214 215 final String tableName = "testRandomAssignmentWithNoFavNodes"; 216 TableDescriptor tableDescriptor = 217 TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName)) 218 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 219 admin.createTable(tableDescriptor); 220 TEST_UTIL.waitTableAvailable(tableDescriptor.getTableName()); 221 222 RegionInfo hri = admin.getRegions(TableName.valueOf(tableName)).get(0); 223 224 FavoredNodesManager fnm = master.getFavoredNodesManager(); 225 fnm.deleteFavoredNodesForRegions(Lists.newArrayList(hri)); 226 assertNull("Favored nodes not found null after delete", fnm.getFavoredNodes(hri)); 227 228 LoadBalancer balancer = master.getLoadBalancer(); 229 ServerName destination = balancer.randomAssignment(hri, 230 Lists.newArrayList(admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)) 231 .getLiveServerMetrics().keySet().stream().collect(Collectors.toList()))); 232 assertNotNull(destination); 233 List<ServerName> favoredNodes = fnm.getFavoredNodes(hri); 234 assertNotNull(favoredNodes); 235 boolean containsFN = false; 236 for (ServerName sn : favoredNodes) { 237 if (ServerName.isSameAddress(destination, sn)) { 238 containsFN = true; 239 } 240 } 241 assertTrue("Destination server does not belong to favored nodes.", containsFN); 242 } 243 244 @Test 245 public void testBalancerWithoutFavoredNodes() throws Exception { 246 247 TableName tableName = TableName.valueOf("testBalancerWithoutFavoredNodes"); 248 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 249 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 250 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 251 TEST_UTIL.waitTableAvailable(tableName); 252 253 final RegionInfo region = admin.getRegions(tableName).get(0); 254 LOG.info("Region thats supposed to be in transition: " + region); 255 FavoredNodesManager fnm = master.getFavoredNodesManager(); 256 List<ServerName> currentFN = fnm.getFavoredNodes(region); 257 assertNotNull(currentFN); 258 259 fnm.deleteFavoredNodesForRegions(Lists.newArrayList(region)); 260 261 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 262 admin.balancerSwitch(true, true); 263 264 // Balancer should unassign the region 265 assertTrue("Balancer did not run", admin.balance()); 266 TEST_UTIL.waitUntilNoRegionTransitScheduled(); 267 assertEquals("One region should be unassigned", 1, 268 master.getAssignmentManager().getRegionsInTransitionCount()); 269 270 admin.assign(region.getEncodedNameAsBytes()); 271 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 272 273 currentFN = fnm.getFavoredNodes(region); 274 assertNotNull(currentFN); 275 assertEquals("Expected number of FN not present", FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, 276 currentFN.size()); 277 278 assertTrue("Balancer did not run", admin.balance()); 279 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 280 281 checkFavoredNodeAssignments(tableName, fnm, regionStates); 282 } 283 284 @Ignore 285 @Test 286 public void testMisplacedRegions() throws Exception { 287 TableName tableName = TableName.valueOf("testMisplacedRegions"); 288 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 289 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 290 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 291 TEST_UTIL.waitTableAvailable(tableName); 292 293 final RegionInfo misplacedRegion = admin.getRegions(tableName).get(0); 294 FavoredNodesManager fnm = master.getFavoredNodesManager(); 295 List<ServerName> currentFN = fnm.getFavoredNodes(misplacedRegion); 296 assertNotNull(currentFN); 297 298 List<ServerName> serversForNewFN = Lists.newArrayList(); 299 for (ServerName sn : admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)) 300 .getLiveServerMetrics().keySet()) { 301 serversForNewFN.add(ServerName.valueOf(sn.getHostname(), sn.getPort(), NON_STARTCODE)); 302 } 303 for (ServerName sn : currentFN) { 304 serversForNewFN.remove(sn); 305 } 306 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(serversForNewFN, conf); 307 helper.initialize(); 308 List<ServerName> newFavoredNodes = helper.generateFavoredNodes(misplacedRegion); 309 assertNotNull(newFavoredNodes); 310 assertEquals(FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, newFavoredNodes.size()); 311 Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap(); 312 regionFNMap.put(misplacedRegion, newFavoredNodes); 313 fnm.updateFavoredNodes(regionFNMap); 314 315 final RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 316 final ServerName current = regionStates.getRegionServerOfRegion(misplacedRegion); 317 assertNull("Misplaced region is still hosted on favored node, not expected.", 318 FavoredNodesPlan.getFavoredServerPosition(fnm.getFavoredNodes(misplacedRegion), current)); 319 admin.balancerSwitch(true, true); 320 assertTrue("Balancer did not run", admin.balance()); 321 TEST_UTIL.waitFor(120000, 30000, new Waiter.Predicate<Exception>() { 322 @Override 323 public boolean evaluate() throws Exception { 324 ServerName host = regionStates.getRegionServerOfRegion(misplacedRegion); 325 return !ServerName.isSameAddress(host, current); 326 } 327 }); 328 checkFavoredNodeAssignments(tableName, fnm, regionStates); 329 } 330 331 @Test 332 public void test2FavoredNodesDead() throws Exception { 333 TableName tableName = TableName.valueOf("testAllFavoredNodesDead"); 334 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 335 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 336 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 337 TEST_UTIL.waitTableAvailable(tableName); 338 339 final RegionInfo region = admin.getRegions(tableName).get(0); 340 LOG.info("Region that's supposed to be in transition: " + region); 341 FavoredNodesManager fnm = master.getFavoredNodesManager(); 342 List<ServerName> currentFN = fnm.getFavoredNodes(region); 343 assertNotNull(currentFN); 344 345 List<ServerName> serversToStop = Lists.newArrayList(currentFN); 346 serversToStop.remove(currentFN.get(0)); 347 348 // Lets kill 2 FN for the region. All regions should still be assigned 349 stopServersAndWaitUntilProcessed(serversToStop); 350 351 TEST_UTIL.waitUntilNoRegionsInTransition(); 352 final RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 353 TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() { 354 @Override 355 public boolean evaluate() throws Exception { 356 return regionStates.getRegionState(region).isOpened(); 357 } 358 }); 359 360 assertEquals("Not all regions are online", REGION_NUM, admin.getRegions(tableName).size()); 361 admin.balancerSwitch(true, true); 362 assertTrue("Balancer did not run", admin.balance()); 363 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 364 365 checkFavoredNodeAssignments(tableName, fnm, regionStates); 366 } 367 368 @Ignore 369 @Test 370 public void testAllFavoredNodesDead() throws Exception { 371 TableName tableName = TableName.valueOf("testAllFavoredNodesDead"); 372 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 373 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 374 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 375 TEST_UTIL.waitTableAvailable(tableName); 376 377 final RegionInfo region = admin.getRegions(tableName).get(0); 378 LOG.info("Region that's supposed to be in transition: " + region); 379 FavoredNodesManager fnm = master.getFavoredNodesManager(); 380 List<ServerName> currentFN = fnm.getFavoredNodes(region); 381 assertNotNull(currentFN); 382 383 // Lets kill all the RS that are favored nodes for this region. 384 stopServersAndWaitUntilProcessed(currentFN); 385 386 final AssignmentManager am = master.getAssignmentManager(); 387 final RegionStates regionStates = am.getRegionStates(); 388 TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() { 389 @Override 390 public boolean evaluate() throws Exception { 391 return regionStates.getRegionState(region).isFailedOpen(); 392 } 393 }); 394 395 assertTrue("Region: " + region + " should be RIT", 396 regionStates.getRegionState(region).isFailedOpen()); 397 398 // Regenerate FN and assign, everything else should be fine 399 List<ServerName> serversForNewFN = Lists.newArrayList(); 400 for (ServerName sn : admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)) 401 .getLiveServerMetrics().keySet()) { 402 serversForNewFN.add(ServerName.valueOf(sn.getHostname(), sn.getPort(), NON_STARTCODE)); 403 } 404 405 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(serversForNewFN, conf); 406 helper.initialize(); 407 408 for (RegionStateNode regionState : am.getRegionsInTransition()) { 409 RegionInfo regionInfo = regionState.getRegionInfo(); 410 List<ServerName> newFavoredNodes = helper.generateFavoredNodes(regionInfo); 411 assertNotNull(newFavoredNodes); 412 assertEquals(FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, newFavoredNodes.size()); 413 LOG.info("Region: " + regionInfo.getEncodedName() + " FN: " + newFavoredNodes); 414 415 Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap(); 416 regionFNMap.put(regionInfo, newFavoredNodes); 417 fnm.updateFavoredNodes(regionFNMap); 418 LOG.info("Assigning region: " + regionInfo.getEncodedName()); 419 admin.assign(regionInfo.getEncodedNameAsBytes()); 420 } 421 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 422 assertEquals("Not all regions are online", REGION_NUM, admin.getRegions(tableName).size()); 423 424 admin.balancerSwitch(true, true); 425 assertTrue("Balancer did not run", admin.balance()); 426 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 427 428 checkFavoredNodeAssignments(tableName, fnm, regionStates); 429 } 430 431 @Ignore 432 @Test 433 public void testAllFavoredNodesDeadMasterRestarted() throws Exception { 434 TableName tableName = TableName.valueOf("testAllFavoredNodesDeadMasterRestarted"); 435 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 436 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build(); 437 admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM); 438 TEST_UTIL.waitTableAvailable(tableName); 439 440 final RegionInfo region = admin.getRegions(tableName).get(0); 441 LOG.info("Region that's supposed to be in transition: " + region); 442 FavoredNodesManager fnm = master.getFavoredNodesManager(); 443 List<ServerName> currentFN = fnm.getFavoredNodes(region); 444 assertNotNull(currentFN); 445 446 // Lets kill all the RS that are favored nodes for this region. 447 stopServersAndWaitUntilProcessed(currentFN); 448 449 final AssignmentManager am = master.getAssignmentManager(); 450 final RegionStates regionStatesBeforeMaster = am.getRegionStates(); 451 TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() { 452 @Override 453 public boolean evaluate() throws Exception { 454 return regionStatesBeforeMaster.getRegionState(region).isFailedOpen(); 455 } 456 }); 457 458 assertTrue("Region: " + region + " should be RIT", 459 regionStatesBeforeMaster.getRegionState(region).isFailedOpen()); 460 461 List<RegionInfo> rit = Lists.newArrayList(); 462 for (RegionStateNode regionState : am.getRegionsInTransition()) { 463 RegionInfo regionInfo = regionState.getRegionInfo(); 464 LOG.debug("Region in transition after stopping FN's: " + regionInfo); 465 rit.add(regionInfo); 466 assertTrue("Region: " + regionInfo + " should be RIT", 467 regionStatesBeforeMaster.getRegionState(regionInfo).isFailedOpen()); 468 assertEquals("Region: " + regionInfo + " does not belong to table: " + tableName, tableName, 469 regionInfo.getTable()); 470 } 471 472 Configuration conf = cluster.getConf(); 473 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 474 SLAVES - FavoredNodeAssignmentHelper.FAVORED_NODES_NUM); 475 476 cluster.stopMaster(master.getServerName()); 477 cluster.waitForMasterToStop(master.getServerName(), 60000); 478 479 cluster.startMaster(); 480 cluster.waitForActiveAndReadyMaster(); 481 master = cluster.getMaster(); 482 fnm = master.getFavoredNodesManager(); 483 484 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 485 assertTrue("Region: " + region + " should be RIT", 486 regionStates.getRegionState(region).isFailedOpen()); 487 488 for (RegionInfo regionInfo : rit) { 489 assertTrue("Region: " + regionInfo + " should be RIT", 490 regionStates.getRegionState(regionInfo).isFailedOpen()); 491 } 492 493 // Regenerate FN and assign, everything else should be fine 494 List<ServerName> serversForNewFN = Lists.newArrayList(); 495 for (ServerName sn : admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)) 496 .getLiveServerMetrics().keySet()) { 497 serversForNewFN.add(ServerName.valueOf(sn.getHostname(), sn.getPort(), NON_STARTCODE)); 498 } 499 500 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(serversForNewFN, conf); 501 helper.initialize(); 502 503 for (RegionInfo regionInfo : rit) { 504 List<ServerName> newFavoredNodes = helper.generateFavoredNodes(regionInfo); 505 assertNotNull(newFavoredNodes); 506 assertEquals(FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, newFavoredNodes.size()); 507 LOG.info("Region: " + regionInfo.getEncodedName() + " FN: " + newFavoredNodes); 508 509 Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap(); 510 regionFNMap.put(regionInfo, newFavoredNodes); 511 fnm.updateFavoredNodes(regionFNMap); 512 LOG.info("Assigning region: " + regionInfo.getEncodedName()); 513 admin.assign(regionInfo.getEncodedNameAsBytes()); 514 } 515 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 516 assertEquals("Not all regions are online", REGION_NUM, admin.getRegions(tableName).size()); 517 518 admin.balancerSwitch(true, true); 519 assertTrue("Balancer did not run", admin.balance()); 520 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 521 522 checkFavoredNodeAssignments(tableName, fnm, regionStates); 523 } 524 525 private void checkFavoredNodeAssignments(TableName tableName, FavoredNodesManager fnm, 526 RegionStates regionStates) throws IOException { 527 for (RegionInfo hri : admin.getRegions(tableName)) { 528 ServerName host = regionStates.getRegionServerOfRegion(hri); 529 assertNotNull( 530 "Region: " + hri.getEncodedName() + " not on FN, current: " + host + " FN list: " 531 + fnm.getFavoredNodes(hri), 532 FavoredNodesPlan.getFavoredServerPosition(fnm.getFavoredNodes(hri), host)); 533 } 534 } 535 536 private void stopServersAndWaitUntilProcessed(List<ServerName> currentFN) throws Exception { 537 for (ServerName sn : currentFN) { 538 for (JVMClusterUtil.RegionServerThread rst : cluster.getLiveRegionServerThreads()) { 539 if (ServerName.isSameAddress(sn, rst.getRegionServer().getServerName())) { 540 LOG.info("Shutting down server: " + sn); 541 cluster.stopRegionServer(rst.getRegionServer().getServerName()); 542 cluster.waitForRegionServerToStop(rst.getRegionServer().getServerName(), 60000); 543 } 544 } 545 } 546 547 // Wait until dead servers are processed. 548 TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() { 549 @Override 550 public boolean evaluate() throws Exception { 551 return !master.getServerManager().areDeadServersInProgress(); 552 } 553 }); 554 555 assertEquals("Not all servers killed", SLAVES - currentFN.size(), 556 cluster.getLiveRegionServerThreads().size()); 557 } 558 559 private void compactTable(TableName tableName) throws IOException { 560 for (JVMClusterUtil.RegionServerThread t : cluster.getRegionServerThreads()) { 561 for (HRegion region : t.getRegionServer().getRegions(tableName)) { 562 region.compact(true); 563 } 564 } 565 } 566}