001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import static org.apache.hadoop.hbase.ServerName.NON_STARTCODE;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertFalse;
023import static org.junit.Assert.assertNotNull;
024import static org.junit.Assert.assertNull;
025import static org.junit.Assert.assertTrue;
026
027import java.io.IOException;
028import java.util.EnumSet;
029import java.util.List;
030import java.util.Map;
031import java.util.Map.Entry;
032import java.util.Set;
033import java.util.stream.Collectors;
034import org.apache.hadoop.conf.Configuration;
035import org.apache.hadoop.hbase.ClusterMetrics.Option;
036import org.apache.hadoop.hbase.HBaseClassTestRule;
037import org.apache.hadoop.hbase.HBaseTestingUtil;
038import org.apache.hadoop.hbase.HConstants;
039import org.apache.hadoop.hbase.ServerName;
040import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
041import org.apache.hadoop.hbase.TableName;
042import org.apache.hadoop.hbase.Waiter;
043import org.apache.hadoop.hbase.client.Admin;
044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
045import org.apache.hadoop.hbase.client.RegionInfo;
046import org.apache.hadoop.hbase.client.TableDescriptor;
047import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
048import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper;
049import org.apache.hadoop.hbase.favored.FavoredNodesManager;
050import org.apache.hadoop.hbase.favored.FavoredNodesPlan;
051import org.apache.hadoop.hbase.master.HMaster;
052import org.apache.hadoop.hbase.master.LoadBalancer;
053import org.apache.hadoop.hbase.master.ServerManager;
054import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
055import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
056import org.apache.hadoop.hbase.master.assignment.RegionStates;
057import org.apache.hadoop.hbase.regionserver.HRegion;
058import org.apache.hadoop.hbase.testclassification.MediumTests;
059import org.apache.hadoop.hbase.util.Bytes;
060import org.apache.hadoop.hbase.util.JVMClusterUtil;
061import org.junit.After;
062import org.junit.Before;
063import org.junit.BeforeClass;
064import org.junit.ClassRule;
065import org.junit.Ignore;
066import org.junit.Test;
067import org.junit.experimental.categories.Category;
068import org.slf4j.Logger;
069import org.slf4j.LoggerFactory;
070
071import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
072import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
073import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
074
075@Ignore // Disabled
076@Category(MediumTests.class)
077public class TestFavoredStochasticLoadBalancer extends BalancerTestBase {
078
079  @ClassRule
080  public static final HBaseClassTestRule CLASS_RULE =
081    HBaseClassTestRule.forClass(TestFavoredStochasticLoadBalancer.class);
082
083  private static final Logger LOG =
084    LoggerFactory.getLogger(TestFavoredStochasticLoadBalancer.class);
085
086  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
087  private static final int SLAVES = 8;
088  private static final int REGION_NUM = SLAVES * 3;
089
090  private Admin admin;
091  private HMaster master;
092  private SingleProcessHBaseCluster cluster;
093
094  @BeforeClass
095  public static void setupBeforeClass() throws Exception {
096    Configuration conf = TEST_UTIL.getConfiguration();
097    // Enable the favored nodes based load balancer
098    conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
099      LoadOnlyFavoredStochasticBalancer.class, LoadBalancer.class);
100  }
101
102  @Before
103  public void startCluster() throws Exception {
104    TEST_UTIL.startMiniCluster(SLAVES);
105    TEST_UTIL.getDFSCluster().waitClusterUp();
106    cluster = TEST_UTIL.getMiniHBaseCluster();
107    master = TEST_UTIL.getMiniHBaseCluster().getMaster();
108    admin = TEST_UTIL.getAdmin();
109    admin.balancerSwitch(false, true);
110  }
111
112  @After
113  public void stopCluster() throws Exception {
114    TEST_UTIL.cleanupTestDir();
115    TEST_UTIL.shutdownMiniCluster();
116  }
117
118  @Test
119  public void testBasicBalance() throws Exception {
120
121    TableName tableName = TableName.valueOf("testBasicBalance");
122    TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
123      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
124    admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM);
125    TEST_UTIL.waitTableAvailable(tableName);
126    TEST_UTIL.loadTable(admin.getConnection().getTable(tableName), HConstants.CATALOG_FAMILY);
127    admin.flush(tableName);
128    compactTable(tableName);
129
130    JVMClusterUtil.RegionServerThread rs1 = cluster.startRegionServerAndWait(10000);
131    JVMClusterUtil.RegionServerThread rs2 = cluster.startRegionServerAndWait(10000);
132
133    // Now try to run balance, and verify no regions are moved to the 2 region servers recently
134    // started.
135    admin.balancerSwitch(true, true);
136    assertTrue("Balancer did not run", admin.balance());
137    TEST_UTIL.waitUntilNoRegionsInTransition(120000);
138
139    List<RegionInfo> hris = admin.getRegions(rs1.getRegionServer().getServerName());
140    for (RegionInfo hri : hris) {
141      assertFalse("New RS contains regions belonging to table: " + tableName,
142        hri.getTable().equals(tableName));
143    }
144    hris = admin.getRegions(rs2.getRegionServer().getServerName());
145    for (RegionInfo hri : hris) {
146      assertFalse("New RS contains regions belonging to table: " + tableName,
147        hri.getTable().equals(tableName));
148    }
149  }
150
151  @Test
152  public void testRoundRobinAssignment() throws Exception {
153
154    TableName tableName = TableName.valueOf("testRoundRobinAssignment");
155    TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
156      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
157    admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM);
158    TEST_UTIL.waitTableAvailable(tableName);
159    TEST_UTIL.loadTable(admin.getConnection().getTable(tableName), HConstants.CATALOG_FAMILY);
160    admin.flush(tableName);
161
162    LoadBalancer balancer = master.getLoadBalancer();
163    List<RegionInfo> regions = admin.getRegions(tableName);
164    regions.addAll(admin.getRegions(TableName.META_TABLE_NAME));
165    List<ServerName> servers = Lists.newArrayList(
166      admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet());
167    Map<ServerName, List<RegionInfo>> map = balancer.roundRobinAssignment(regions, servers);
168    for (List<RegionInfo> regionInfos : map.values()) {
169      regions.removeAll(regionInfos);
170    }
171    assertEquals("No region should be missed by balancer", 0, regions.size());
172  }
173
174  @Test
175  public void testBasicRegionPlacementAndReplicaLoad() throws Exception {
176    String tableName = "testBasicRegionPlacement";
177    TableDescriptor tableDescriptor =
178      TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName))
179        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
180    admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM);
181    TEST_UTIL.waitTableAvailable(tableDescriptor.getTableName());
182
183    FavoredNodesManager fnm = master.getFavoredNodesManager();
184    List<RegionInfo> regionsOfTable = admin.getRegions(TableName.valueOf(tableName));
185    for (RegionInfo rInfo : regionsOfTable) {
186      Set<ServerName> favNodes = Sets.newHashSet(fnm.getFavoredNodes(rInfo));
187      assertNotNull(favNodes);
188      assertEquals(FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, favNodes.size());
189    }
190
191    Map<ServerName, List<Integer>> replicaLoadMap = fnm.getReplicaLoad(Lists.newArrayList(
192      admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet()));
193    assertTrue("Not all replica load collected.",
194      admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().size()
195          == replicaLoadMap.size());
196    for (Entry<ServerName, List<Integer>> entry : replicaLoadMap.entrySet()) {
197      assertTrue(entry.getValue().size() == FavoredNodeAssignmentHelper.FAVORED_NODES_NUM);
198      assertTrue(entry.getValue().get(0) >= 0);
199      assertTrue(entry.getValue().get(1) >= 0);
200      assertTrue(entry.getValue().get(2) >= 0);
201    }
202
203    admin.disableTable(TableName.valueOf(tableName));
204    admin.deleteTable(TableName.valueOf(tableName));
205    replicaLoadMap = fnm.getReplicaLoad(Lists.newArrayList(
206      admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet()));
207    assertTrue("replica load found " + replicaLoadMap.size() + " instead of 0.",
208      replicaLoadMap.size() == admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
209        .getLiveServerMetrics().size());
210  }
211
212  @Test
213  public void testRandomAssignmentWithNoFavNodes() throws Exception {
214
215    final String tableName = "testRandomAssignmentWithNoFavNodes";
216    TableDescriptor tableDescriptor =
217      TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName))
218        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
219    admin.createTable(tableDescriptor);
220    TEST_UTIL.waitTableAvailable(tableDescriptor.getTableName());
221
222    RegionInfo hri = admin.getRegions(TableName.valueOf(tableName)).get(0);
223
224    FavoredNodesManager fnm = master.getFavoredNodesManager();
225    fnm.deleteFavoredNodesForRegions(Lists.newArrayList(hri));
226    assertNull("Favored nodes not found null after delete", fnm.getFavoredNodes(hri));
227
228    LoadBalancer balancer = master.getLoadBalancer();
229    ServerName destination = balancer.randomAssignment(hri,
230      Lists.newArrayList(admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
231        .getLiveServerMetrics().keySet().stream().collect(Collectors.toList())));
232    assertNotNull(destination);
233    List<ServerName> favoredNodes = fnm.getFavoredNodes(hri);
234    assertNotNull(favoredNodes);
235    boolean containsFN = false;
236    for (ServerName sn : favoredNodes) {
237      if (ServerName.isSameAddress(destination, sn)) {
238        containsFN = true;
239      }
240    }
241    assertTrue("Destination server does not belong to favored nodes.", containsFN);
242  }
243
244  @Test
245  public void testBalancerWithoutFavoredNodes() throws Exception {
246
247    TableName tableName = TableName.valueOf("testBalancerWithoutFavoredNodes");
248    TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
249      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
250    admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM);
251    TEST_UTIL.waitTableAvailable(tableName);
252
253    final RegionInfo region = admin.getRegions(tableName).get(0);
254    LOG.info("Region thats supposed to be in transition: " + region);
255    FavoredNodesManager fnm = master.getFavoredNodesManager();
256    List<ServerName> currentFN = fnm.getFavoredNodes(region);
257    assertNotNull(currentFN);
258
259    fnm.deleteFavoredNodesForRegions(Lists.newArrayList(region));
260
261    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
262    admin.balancerSwitch(true, true);
263
264    // Balancer should unassign the region
265    assertTrue("Balancer did not run", admin.balance());
266    TEST_UTIL.waitUntilNoRegionTransitScheduled();
267    assertEquals("One region should be unassigned", 1,
268      master.getAssignmentManager().getRegionsInTransitionCount());
269
270    admin.assign(region.getEncodedNameAsBytes());
271    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
272
273    currentFN = fnm.getFavoredNodes(region);
274    assertNotNull(currentFN);
275    assertEquals("Expected number of FN not present", FavoredNodeAssignmentHelper.FAVORED_NODES_NUM,
276      currentFN.size());
277
278    assertTrue("Balancer did not run", admin.balance());
279    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
280
281    checkFavoredNodeAssignments(tableName, fnm, regionStates);
282  }
283
284  @Ignore
285  @Test
286  public void testMisplacedRegions() throws Exception {
287    TableName tableName = TableName.valueOf("testMisplacedRegions");
288    TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
289      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
290    admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM);
291    TEST_UTIL.waitTableAvailable(tableName);
292
293    final RegionInfo misplacedRegion = admin.getRegions(tableName).get(0);
294    FavoredNodesManager fnm = master.getFavoredNodesManager();
295    List<ServerName> currentFN = fnm.getFavoredNodes(misplacedRegion);
296    assertNotNull(currentFN);
297
298    List<ServerName> serversForNewFN = Lists.newArrayList();
299    for (ServerName sn : admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
300      .getLiveServerMetrics().keySet()) {
301      serversForNewFN.add(ServerName.valueOf(sn.getHostname(), sn.getPort(), NON_STARTCODE));
302    }
303    for (ServerName sn : currentFN) {
304      serversForNewFN.remove(sn);
305    }
306    FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(serversForNewFN, conf);
307    helper.initialize();
308    List<ServerName> newFavoredNodes = helper.generateFavoredNodes(misplacedRegion);
309    assertNotNull(newFavoredNodes);
310    assertEquals(FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, newFavoredNodes.size());
311    Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap();
312    regionFNMap.put(misplacedRegion, newFavoredNodes);
313    fnm.updateFavoredNodes(regionFNMap);
314
315    final RegionStates regionStates = master.getAssignmentManager().getRegionStates();
316    final ServerName current = regionStates.getRegionServerOfRegion(misplacedRegion);
317    assertNull("Misplaced region is still hosted on favored node, not expected.",
318      FavoredNodesPlan.getFavoredServerPosition(fnm.getFavoredNodes(misplacedRegion), current));
319    admin.balancerSwitch(true, true);
320    assertTrue("Balancer did not run", admin.balance());
321    TEST_UTIL.waitFor(120000, 30000, new Waiter.Predicate<Exception>() {
322      @Override
323      public boolean evaluate() throws Exception {
324        ServerName host = regionStates.getRegionServerOfRegion(misplacedRegion);
325        return !ServerName.isSameAddress(host, current);
326      }
327    });
328    checkFavoredNodeAssignments(tableName, fnm, regionStates);
329  }
330
331  @Test
332  public void test2FavoredNodesDead() throws Exception {
333    TableName tableName = TableName.valueOf("testAllFavoredNodesDead");
334    TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
335      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
336    admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM);
337    TEST_UTIL.waitTableAvailable(tableName);
338
339    final RegionInfo region = admin.getRegions(tableName).get(0);
340    LOG.info("Region that's supposed to be in transition: " + region);
341    FavoredNodesManager fnm = master.getFavoredNodesManager();
342    List<ServerName> currentFN = fnm.getFavoredNodes(region);
343    assertNotNull(currentFN);
344
345    List<ServerName> serversToStop = Lists.newArrayList(currentFN);
346    serversToStop.remove(currentFN.get(0));
347
348    // Lets kill 2 FN for the region. All regions should still be assigned
349    stopServersAndWaitUntilProcessed(serversToStop);
350
351    TEST_UTIL.waitUntilNoRegionsInTransition();
352    final RegionStates regionStates = master.getAssignmentManager().getRegionStates();
353    TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() {
354      @Override
355      public boolean evaluate() throws Exception {
356        return regionStates.getRegionState(region).isOpened();
357      }
358    });
359
360    assertEquals("Not all regions are online", REGION_NUM, admin.getRegions(tableName).size());
361    admin.balancerSwitch(true, true);
362    assertTrue("Balancer did not run", admin.balance());
363    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
364
365    checkFavoredNodeAssignments(tableName, fnm, regionStates);
366  }
367
368  @Ignore
369  @Test
370  public void testAllFavoredNodesDead() throws Exception {
371    TableName tableName = TableName.valueOf("testAllFavoredNodesDead");
372    TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
373      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
374    admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM);
375    TEST_UTIL.waitTableAvailable(tableName);
376
377    final RegionInfo region = admin.getRegions(tableName).get(0);
378    LOG.info("Region that's supposed to be in transition: " + region);
379    FavoredNodesManager fnm = master.getFavoredNodesManager();
380    List<ServerName> currentFN = fnm.getFavoredNodes(region);
381    assertNotNull(currentFN);
382
383    // Lets kill all the RS that are favored nodes for this region.
384    stopServersAndWaitUntilProcessed(currentFN);
385
386    final AssignmentManager am = master.getAssignmentManager();
387    final RegionStates regionStates = am.getRegionStates();
388    TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() {
389      @Override
390      public boolean evaluate() throws Exception {
391        return regionStates.getRegionState(region).isFailedOpen();
392      }
393    });
394
395    assertTrue("Region: " + region + " should be RIT",
396      regionStates.getRegionState(region).isFailedOpen());
397
398    // Regenerate FN and assign, everything else should be fine
399    List<ServerName> serversForNewFN = Lists.newArrayList();
400    for (ServerName sn : admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
401      .getLiveServerMetrics().keySet()) {
402      serversForNewFN.add(ServerName.valueOf(sn.getHostname(), sn.getPort(), NON_STARTCODE));
403    }
404
405    FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(serversForNewFN, conf);
406    helper.initialize();
407
408    for (RegionStateNode regionState : am.getRegionsInTransition()) {
409      RegionInfo regionInfo = regionState.getRegionInfo();
410      List<ServerName> newFavoredNodes = helper.generateFavoredNodes(regionInfo);
411      assertNotNull(newFavoredNodes);
412      assertEquals(FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, newFavoredNodes.size());
413      LOG.info("Region: " + regionInfo.getEncodedName() + " FN: " + newFavoredNodes);
414
415      Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap();
416      regionFNMap.put(regionInfo, newFavoredNodes);
417      fnm.updateFavoredNodes(regionFNMap);
418      LOG.info("Assigning region: " + regionInfo.getEncodedName());
419      admin.assign(regionInfo.getEncodedNameAsBytes());
420    }
421    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
422    assertEquals("Not all regions are online", REGION_NUM, admin.getRegions(tableName).size());
423
424    admin.balancerSwitch(true, true);
425    assertTrue("Balancer did not run", admin.balance());
426    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
427
428    checkFavoredNodeAssignments(tableName, fnm, regionStates);
429  }
430
431  @Ignore
432  @Test
433  public void testAllFavoredNodesDeadMasterRestarted() throws Exception {
434    TableName tableName = TableName.valueOf("testAllFavoredNodesDeadMasterRestarted");
435    TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
436      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
437    admin.createTable(tableDescriptor, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGION_NUM);
438    TEST_UTIL.waitTableAvailable(tableName);
439
440    final RegionInfo region = admin.getRegions(tableName).get(0);
441    LOG.info("Region that's supposed to be in transition: " + region);
442    FavoredNodesManager fnm = master.getFavoredNodesManager();
443    List<ServerName> currentFN = fnm.getFavoredNodes(region);
444    assertNotNull(currentFN);
445
446    // Lets kill all the RS that are favored nodes for this region.
447    stopServersAndWaitUntilProcessed(currentFN);
448
449    final AssignmentManager am = master.getAssignmentManager();
450    final RegionStates regionStatesBeforeMaster = am.getRegionStates();
451    TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() {
452      @Override
453      public boolean evaluate() throws Exception {
454        return regionStatesBeforeMaster.getRegionState(region).isFailedOpen();
455      }
456    });
457
458    assertTrue("Region: " + region + " should be RIT",
459      regionStatesBeforeMaster.getRegionState(region).isFailedOpen());
460
461    List<RegionInfo> rit = Lists.newArrayList();
462    for (RegionStateNode regionState : am.getRegionsInTransition()) {
463      RegionInfo regionInfo = regionState.getRegionInfo();
464      LOG.debug("Region in transition after stopping FN's: " + regionInfo);
465      rit.add(regionInfo);
466      assertTrue("Region: " + regionInfo + " should be RIT",
467        regionStatesBeforeMaster.getRegionState(regionInfo).isFailedOpen());
468      assertEquals("Region: " + regionInfo + " does not belong to table: " + tableName, tableName,
469        regionInfo.getTable());
470    }
471
472    Configuration conf = cluster.getConf();
473    conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART,
474      SLAVES - FavoredNodeAssignmentHelper.FAVORED_NODES_NUM);
475
476    cluster.stopMaster(master.getServerName());
477    cluster.waitForMasterToStop(master.getServerName(), 60000);
478
479    cluster.startMaster();
480    cluster.waitForActiveAndReadyMaster();
481    master = cluster.getMaster();
482    fnm = master.getFavoredNodesManager();
483
484    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
485    assertTrue("Region: " + region + " should be RIT",
486      regionStates.getRegionState(region).isFailedOpen());
487
488    for (RegionInfo regionInfo : rit) {
489      assertTrue("Region: " + regionInfo + " should be RIT",
490        regionStates.getRegionState(regionInfo).isFailedOpen());
491    }
492
493    // Regenerate FN and assign, everything else should be fine
494    List<ServerName> serversForNewFN = Lists.newArrayList();
495    for (ServerName sn : admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
496      .getLiveServerMetrics().keySet()) {
497      serversForNewFN.add(ServerName.valueOf(sn.getHostname(), sn.getPort(), NON_STARTCODE));
498    }
499
500    FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(serversForNewFN, conf);
501    helper.initialize();
502
503    for (RegionInfo regionInfo : rit) {
504      List<ServerName> newFavoredNodes = helper.generateFavoredNodes(regionInfo);
505      assertNotNull(newFavoredNodes);
506      assertEquals(FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, newFavoredNodes.size());
507      LOG.info("Region: " + regionInfo.getEncodedName() + " FN: " + newFavoredNodes);
508
509      Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap();
510      regionFNMap.put(regionInfo, newFavoredNodes);
511      fnm.updateFavoredNodes(regionFNMap);
512      LOG.info("Assigning region: " + regionInfo.getEncodedName());
513      admin.assign(regionInfo.getEncodedNameAsBytes());
514    }
515    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
516    assertEquals("Not all regions are online", REGION_NUM, admin.getRegions(tableName).size());
517
518    admin.balancerSwitch(true, true);
519    assertTrue("Balancer did not run", admin.balance());
520    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
521
522    checkFavoredNodeAssignments(tableName, fnm, regionStates);
523  }
524
525  private void checkFavoredNodeAssignments(TableName tableName, FavoredNodesManager fnm,
526    RegionStates regionStates) throws IOException {
527    for (RegionInfo hri : admin.getRegions(tableName)) {
528      ServerName host = regionStates.getRegionServerOfRegion(hri);
529      assertNotNull(
530        "Region: " + hri.getEncodedName() + " not on FN, current: " + host + " FN list: "
531          + fnm.getFavoredNodes(hri),
532        FavoredNodesPlan.getFavoredServerPosition(fnm.getFavoredNodes(hri), host));
533    }
534  }
535
536  private void stopServersAndWaitUntilProcessed(List<ServerName> currentFN) throws Exception {
537    for (ServerName sn : currentFN) {
538      for (JVMClusterUtil.RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
539        if (ServerName.isSameAddress(sn, rst.getRegionServer().getServerName())) {
540          LOG.info("Shutting down server: " + sn);
541          cluster.stopRegionServer(rst.getRegionServer().getServerName());
542          cluster.waitForRegionServerToStop(rst.getRegionServer().getServerName(), 60000);
543        }
544      }
545    }
546
547    // Wait until dead servers are processed.
548    TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
549      @Override
550      public boolean evaluate() throws Exception {
551        return !master.getServerManager().areDeadServersInProgress();
552      }
553    });
554
555    assertEquals("Not all servers killed", SLAVES - currentFN.size(),
556      cluster.getLiveRegionServerThreads().size());
557  }
558
559  private void compactTable(TableName tableName) throws IOException {
560    for (JVMClusterUtil.RegionServerThread t : cluster.getRegionServerThreads()) {
561      for (HRegion region : t.getRegionServer().getRegions(tableName)) {
562        region.compact(true);
563      }
564    }
565  }
566}