001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertNotEquals;
022import static org.junit.Assert.assertNotNull;
023import static org.junit.Assert.assertTrue;
024
025import java.io.IOException;
026import java.util.Arrays;
027import java.util.EnumSet;
028import java.util.List;
029import java.util.Map;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.hbase.ClusterMetrics;
032import org.apache.hadoop.hbase.ClusterMetrics.Option;
033import org.apache.hadoop.hbase.HBaseClassTestRule;
034import org.apache.hadoop.hbase.HBaseTestingUtil;
035import org.apache.hadoop.hbase.HConstants;
036import org.apache.hadoop.hbase.ServerName;
037import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
038import org.apache.hadoop.hbase.TableName;
039import org.apache.hadoop.hbase.Waiter;
040import org.apache.hadoop.hbase.client.Admin;
041import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
042import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
043import org.apache.hadoop.hbase.client.RegionInfo;
044import org.apache.hadoop.hbase.client.TableDescriptor;
045import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
046import org.apache.hadoop.hbase.favored.FavoredNodesManager;
047import org.apache.hadoop.hbase.master.HMaster;
048import org.apache.hadoop.hbase.master.LoadBalancer;
049import org.apache.hadoop.hbase.master.RackManager;
050import org.apache.hadoop.hbase.master.assignment.RegionStates;
051import org.apache.hadoop.hbase.regionserver.HRegion;
052import org.apache.hadoop.hbase.regionserver.HRegionServer;
053import org.apache.hadoop.hbase.regionserver.Region;
054import org.apache.hadoop.hbase.testclassification.LargeTests;
055import org.apache.hadoop.hbase.util.Bytes;
056import org.apache.hadoop.hbase.util.JVMClusterUtil;
057import org.junit.After;
058import org.junit.Before;
059import org.junit.BeforeClass;
060import org.junit.ClassRule;
061import org.junit.Rule;
062import org.junit.Test;
063import org.junit.experimental.categories.Category;
064import org.junit.rules.TestName;
065import org.slf4j.Logger;
066import org.slf4j.LoggerFactory;
067
068import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
069import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
070
071@Category(LargeTests.class)
072public class TestFavoredStochasticBalancerPickers extends BalancerTestBase {
073
074  @ClassRule
075  public static final HBaseClassTestRule CLASS_RULE =
076    HBaseClassTestRule.forClass(TestFavoredStochasticBalancerPickers.class);
077
078  private static final Logger LOG =
079    LoggerFactory.getLogger(TestFavoredStochasticBalancerPickers.class);
080
081  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
082  private static final int SLAVES = 6;
083  private static final int REGIONS = SLAVES * 3;
084  private static Configuration conf;
085
086  private Admin admin;
087  private SingleProcessHBaseCluster cluster;
088
089  @Rule
090  public TestName name = new TestName();
091
092  @BeforeClass
093  public static void setupBeforeClass() throws Exception {
094    conf = TEST_UTIL.getConfiguration();
095    // Enable favored nodes based load balancer
096    conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
097      LoadOnlyFavoredStochasticBalancer.class, LoadBalancer.class);
098    conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 30000);
099    conf.setInt("hbase.master.balancer.stochastic.moveCost", 0);
100    conf.setBoolean("hbase.master.balancer.stochastic.execute.maxSteps", true);
101  }
102
103  @Before
104  public void startCluster() throws Exception {
105    TEST_UTIL.startMiniCluster(SLAVES);
106    TEST_UTIL.getDFSCluster().waitClusterUp();
107    TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(120 * 1000);
108    cluster = TEST_UTIL.getMiniHBaseCluster();
109    admin = TEST_UTIL.getAdmin();
110    admin.balancerSwitch(false, true);
111  }
112
113  @After
114  public void stopCluster() throws Exception {
115    TEST_UTIL.cleanupTestDir();
116    TEST_UTIL.shutdownMiniCluster();
117  }
118
119  @Test
120  public void testPickers() throws Exception {
121    TableName tableName = TableName.valueOf(name.getMethodName());
122    ColumnFamilyDescriptor columnFamilyDescriptor =
123      ColumnFamilyDescriptorBuilder.newBuilder(HConstants.CATALOG_FAMILY).build();
124    TableDescriptor desc =
125      TableDescriptorBuilder.newBuilder(tableName).setColumnFamily(columnFamilyDescriptor).build();
126    admin.createTable(desc, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGIONS);
127    TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
128    TEST_UTIL.loadTable(admin.getConnection().getTable(tableName), HConstants.CATALOG_FAMILY);
129    admin.flush(tableName);
130
131    HMaster master = cluster.getMaster();
132    FavoredNodesManager fnm = master.getFavoredNodesManager();
133    ServerName masterServerName = master.getServerName();
134    List<ServerName> excludedServers = Lists.newArrayList(masterServerName);
135    final ServerName mostLoadedServer = getRSWithMaxRegions(tableName, excludedServers);
136    assertNotNull(mostLoadedServer);
137    int numRegions = getTableRegionsFromServer(tableName, mostLoadedServer).size();
138    excludedServers.add(mostLoadedServer);
139    // Lets find another server with more regions to calculate number of regions to move
140    ServerName source = getRSWithMaxRegions(tableName, excludedServers);
141    assertNotNull(source);
142    int regionsToMove = getTableRegionsFromServer(tableName, source).size() / 2;
143
144    // Since move only works if the target is part of favored nodes of the region, lets get all
145    // regions that are movable to mostLoadedServer
146    List<RegionInfo> hris = getRegionsThatCanBeMoved(tableName, mostLoadedServer);
147    RegionStates rst = master.getAssignmentManager().getRegionStates();
148    for (int i = 0; i < regionsToMove; i++) {
149      final RegionInfo regionInfo = hris.get(i);
150      admin.move(regionInfo.getEncodedNameAsBytes(), mostLoadedServer);
151      LOG.info("Moving region: " + hris.get(i).getRegionNameAsString() + " to " + mostLoadedServer);
152      TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
153        @Override
154        public boolean evaluate() throws Exception {
155          return ServerName.isSameAddress(rst.getRegionServerOfRegion(regionInfo),
156            mostLoadedServer);
157        }
158      });
159    }
160    final int finalRegions = numRegions + regionsToMove;
161    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
162    TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
163      @Override
164      public boolean evaluate() throws Exception {
165        int numRegions = getTableRegionsFromServer(tableName, mostLoadedServer).size();
166        return (numRegions == finalRegions);
167      }
168    });
169    TEST_UTIL.getHBaseCluster().startRegionServerAndWait(60000);
170
171    Map<ServerName, List<RegionInfo>> serverAssignments = Maps.newHashMap();
172    ClusterMetrics status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS));
173    for (ServerName sn : status.getLiveServerMetrics().keySet()) {
174      if (!ServerName.isSameAddress(sn, masterServerName)) {
175        serverAssignments.put(sn, getTableRegionsFromServer(tableName, sn));
176      }
177    }
178    RegionHDFSBlockLocationFinder regionFinder = new RegionHDFSBlockLocationFinder();
179    regionFinder.setClusterMetrics(admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)));
180    regionFinder.setConf(conf);
181    regionFinder.setClusterInfoProvider(
182      new MasterClusterInfoProvider(TEST_UTIL.getMiniHBaseCluster().getMaster()));
183    BalancerClusterState cluster =
184      new BalancerClusterState(serverAssignments, null, regionFinder, new RackManager(conf));
185    LoadOnlyFavoredStochasticBalancer balancer = (LoadOnlyFavoredStochasticBalancer) TEST_UTIL
186      .getMiniHBaseCluster().getMaster().getLoadBalancer().getInternalBalancer();
187
188    cluster.sortServersByRegionCount();
189    Integer[] servers = cluster.serverIndicesSortedByRegionCount;
190    LOG.info("Servers sorted by region count:" + Arrays.toString(servers));
191    LOG.info("Cluster dump: " + cluster);
192    if (!mostLoadedServer.equals(cluster.servers[servers[servers.length - 1]])) {
193      LOG.error("Most loaded server: " + mostLoadedServer + " does not match: "
194        + cluster.servers[servers[servers.length - 1]]);
195    }
196    assertEquals(mostLoadedServer, cluster.servers[servers[servers.length - 1]]);
197    FavoredStochasticBalancer.FavoredNodeLoadPicker loadPicker =
198      balancer.new FavoredNodeLoadPicker();
199    boolean userRegionPicked = false;
200    for (int i = 0; i < 100; i++) {
201      if (userRegionPicked) {
202        break;
203      } else {
204        BalanceAction action = loadPicker.generate(cluster);
205        if (action.getType() == BalanceAction.Type.MOVE_REGION) {
206          MoveRegionAction moveRegionAction = (MoveRegionAction) action;
207          RegionInfo region = cluster.regions[moveRegionAction.getRegion()];
208          assertNotEquals(-1, moveRegionAction.getToServer());
209          ServerName destinationServer = cluster.servers[moveRegionAction.getToServer()];
210          assertEquals(cluster.servers[moveRegionAction.getFromServer()], mostLoadedServer);
211          if (!region.getTable().isSystemTable()) {
212            List<ServerName> favNodes = fnm.getFavoredNodes(region);
213            assertTrue(favNodes.contains(ServerName.valueOf(destinationServer.getAddress(), -1)));
214            userRegionPicked = true;
215          }
216        }
217      }
218    }
219    assertTrue("load picker did not pick expected regions in 100 iterations.", userRegionPicked);
220  }
221
222  /*
223   * A region can only be moved to one of its favored node. Hence this method helps us to get that
224   * list which makes it easy to write non-flaky tests.
225   */
226  private List<RegionInfo> getRegionsThatCanBeMoved(TableName tableName, ServerName serverName) {
227    List<RegionInfo> regions = Lists.newArrayList();
228    RegionStates rst = cluster.getMaster().getAssignmentManager().getRegionStates();
229    FavoredNodesManager fnm = cluster.getMaster().getFavoredNodesManager();
230    for (RegionInfo regionInfo : fnm.getRegionsOfFavoredNode(serverName)) {
231      if (
232        regionInfo.getTable().equals(tableName)
233          && !ServerName.isSameAddress(rst.getRegionServerOfRegion(regionInfo), serverName)
234      ) {
235        regions.add(regionInfo);
236      }
237    }
238    return regions;
239  }
240
241  private List<RegionInfo> getTableRegionsFromServer(TableName tableName, ServerName source)
242    throws IOException {
243    List<RegionInfo> regionInfos = Lists.newArrayList();
244    HRegionServer regionServer = cluster.getRegionServer(source);
245    for (Region region : regionServer.getRegions(tableName)) {
246      regionInfos.add(region.getRegionInfo());
247    }
248    return regionInfos;
249  }
250
251  private ServerName getRSWithMaxRegions(TableName tableName, List<ServerName> excludeNodes)
252    throws IOException {
253
254    int maxRegions = 0;
255    ServerName maxLoadedServer = null;
256
257    for (JVMClusterUtil.RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
258      List<HRegion> regions = rst.getRegionServer().getRegions(tableName);
259      LOG.debug("Server: " + rst.getRegionServer().getServerName() + " regions: " + regions.size());
260      if (regions.size() > maxRegions) {
261        if (
262          excludeNodes == null
263            || !doesMatchExcludeNodes(excludeNodes, rst.getRegionServer().getServerName())
264        ) {
265          maxRegions = regions.size();
266          maxLoadedServer = rst.getRegionServer().getServerName();
267        }
268      }
269    }
270    return maxLoadedServer;
271  }
272
273  private boolean doesMatchExcludeNodes(List<ServerName> excludeNodes, ServerName sn) {
274    for (ServerName excludeSN : excludeNodes) {
275      if (ServerName.isSameAddress(sn, excludeSN)) {
276        return true;
277      }
278    }
279    return false;
280  }
281}