001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import static org.junit.jupiter.api.Assertions.assertEquals;
021import static org.junit.jupiter.api.Assertions.assertNotEquals;
022import static org.junit.jupiter.api.Assertions.assertNotNull;
023import static org.junit.jupiter.api.Assertions.assertTrue;
024
025import java.io.IOException;
026import java.util.Arrays;
027import java.util.EnumSet;
028import java.util.List;
029import java.util.Map;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.hbase.ClusterMetrics;
032import org.apache.hadoop.hbase.ClusterMetrics.Option;
033import org.apache.hadoop.hbase.HBaseTestingUtil;
034import org.apache.hadoop.hbase.HConstants;
035import org.apache.hadoop.hbase.ServerName;
036import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.Waiter;
039import org.apache.hadoop.hbase.client.Admin;
040import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
041import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
042import org.apache.hadoop.hbase.client.RegionInfo;
043import org.apache.hadoop.hbase.client.TableDescriptor;
044import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
045import org.apache.hadoop.hbase.favored.FavoredNodesManager;
046import org.apache.hadoop.hbase.master.HMaster;
047import org.apache.hadoop.hbase.master.LoadBalancer;
048import org.apache.hadoop.hbase.master.RackManager;
049import org.apache.hadoop.hbase.master.assignment.RegionStates;
050import org.apache.hadoop.hbase.regionserver.HRegion;
051import org.apache.hadoop.hbase.regionserver.HRegionServer;
052import org.apache.hadoop.hbase.regionserver.Region;
053import org.apache.hadoop.hbase.testclassification.LargeTests;
054import org.apache.hadoop.hbase.util.Bytes;
055import org.apache.hadoop.hbase.util.JVMClusterUtil;
056import org.junit.jupiter.api.AfterEach;
057import org.junit.jupiter.api.BeforeAll;
058import org.junit.jupiter.api.BeforeEach;
059import org.junit.jupiter.api.Tag;
060import org.junit.jupiter.api.Test;
061import org.junit.jupiter.api.TestInfo;
062import org.slf4j.Logger;
063import org.slf4j.LoggerFactory;
064
065import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
066import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
067
068@Tag(LargeTests.TAG)
069public class TestFavoredStochasticBalancerPickers extends BalancerTestBase {
070
071  private static final Logger LOG =
072    LoggerFactory.getLogger(TestFavoredStochasticBalancerPickers.class);
073
074  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
075  private static final int SLAVES = 6;
076  private static final int REGIONS = SLAVES * 3;
077  private static Configuration conf;
078
079  private Admin admin;
080  private SingleProcessHBaseCluster cluster;
081
082  @BeforeAll
083  public static void setupBeforeClass() throws Exception {
084    conf = TEST_UTIL.getConfiguration();
085    // Enable favored nodes based load balancer
086    conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
087      LoadOnlyFavoredStochasticBalancer.class, LoadBalancer.class);
088    conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 30000);
089    conf.setInt("hbase.master.balancer.stochastic.moveCost", 0);
090    conf.setBoolean("hbase.master.balancer.stochastic.execute.maxSteps", true);
091  }
092
093  @BeforeEach
094  public void startCluster() throws Exception {
095    TEST_UTIL.startMiniCluster(SLAVES);
096    TEST_UTIL.getDFSCluster().waitClusterUp();
097    TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(120 * 1000);
098    cluster = TEST_UTIL.getMiniHBaseCluster();
099    admin = TEST_UTIL.getAdmin();
100    admin.balancerSwitch(false, true);
101  }
102
103  @AfterEach
104  public void stopCluster() throws Exception {
105    TEST_UTIL.cleanupTestDir();
106    TEST_UTIL.shutdownMiniCluster();
107  }
108
109  @Test
110  public void testPickers(TestInfo testInfo) throws Exception {
111    TableName tableName = TableName.valueOf(testInfo.getTestMethod().get().getName());
112    ColumnFamilyDescriptor columnFamilyDescriptor =
113      ColumnFamilyDescriptorBuilder.newBuilder(HConstants.CATALOG_FAMILY).build();
114    TableDescriptor desc =
115      TableDescriptorBuilder.newBuilder(tableName).setColumnFamily(columnFamilyDescriptor).build();
116    admin.createTable(desc, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGIONS);
117    TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
118    TEST_UTIL.loadTable(admin.getConnection().getTable(tableName), HConstants.CATALOG_FAMILY);
119    admin.flush(tableName);
120
121    HMaster master = cluster.getMaster();
122    FavoredNodesManager fnm = master.getFavoredNodesManager();
123    ServerName masterServerName = master.getServerName();
124    List<ServerName> excludedServers = Lists.newArrayList(masterServerName);
125    final ServerName mostLoadedServer = getRSWithMaxRegions(tableName, excludedServers);
126    assertNotNull(mostLoadedServer);
127    int numRegions = getTableRegionsFromServer(tableName, mostLoadedServer).size();
128    excludedServers.add(mostLoadedServer);
129    // Lets find another server with more regions to calculate number of regions to move
130    ServerName source = getRSWithMaxRegions(tableName, excludedServers);
131    assertNotNull(source);
132    int regionsToMove = getTableRegionsFromServer(tableName, source).size() / 2;
133
134    // Since move only works if the target is part of favored nodes of the region, lets get all
135    // regions that are movable to mostLoadedServer
136    List<RegionInfo> hris = getRegionsThatCanBeMoved(tableName, mostLoadedServer);
137    RegionStates rst = master.getAssignmentManager().getRegionStates();
138    for (int i = 0; i < regionsToMove; i++) {
139      final RegionInfo regionInfo = hris.get(i);
140      admin.move(regionInfo.getEncodedNameAsBytes(), mostLoadedServer);
141      LOG.info("Moving region: " + hris.get(i).getRegionNameAsString() + " to " + mostLoadedServer);
142      TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
143        @Override
144        public boolean evaluate() throws Exception {
145          return ServerName.isSameAddress(rst.getRegionServerOfRegion(regionInfo),
146            mostLoadedServer);
147        }
148      });
149    }
150    final int finalRegions = numRegions + regionsToMove;
151    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
152    TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
153      @Override
154      public boolean evaluate() throws Exception {
155        int numRegions = getTableRegionsFromServer(tableName, mostLoadedServer).size();
156        return (numRegions == finalRegions);
157      }
158    });
159    TEST_UTIL.getHBaseCluster().startRegionServerAndWait(60000);
160
161    Map<ServerName, List<RegionInfo>> serverAssignments = Maps.newHashMap();
162    ClusterMetrics status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS));
163    for (ServerName sn : status.getLiveServerMetrics().keySet()) {
164      if (!ServerName.isSameAddress(sn, masterServerName)) {
165        serverAssignments.put(sn, getTableRegionsFromServer(tableName, sn));
166      }
167    }
168    RegionHDFSBlockLocationFinder regionFinder = new RegionHDFSBlockLocationFinder();
169    regionFinder.setClusterMetrics(admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)));
170    regionFinder.setConf(conf);
171    regionFinder.setClusterInfoProvider(
172      new MasterClusterInfoProvider(TEST_UTIL.getMiniHBaseCluster().getMaster()));
173    BalancerClusterState cluster =
174      new BalancerClusterState(serverAssignments, null, regionFinder, new RackManager(conf));
175    LoadOnlyFavoredStochasticBalancer balancer = (LoadOnlyFavoredStochasticBalancer) TEST_UTIL
176      .getMiniHBaseCluster().getMaster().getLoadBalancer().getInternalBalancer();
177
178    cluster.sortServersByRegionCount();
179    Integer[] servers = cluster.serverIndicesSortedByRegionCount;
180    LOG.info("Servers sorted by region count:" + Arrays.toString(servers));
181    LOG.info("Cluster dump: " + cluster);
182    if (!mostLoadedServer.equals(cluster.servers[servers[servers.length - 1]])) {
183      LOG.error("Most loaded server: " + mostLoadedServer + " does not match: "
184        + cluster.servers[servers[servers.length - 1]]);
185    }
186    assertEquals(mostLoadedServer, cluster.servers[servers[servers.length - 1]]);
187    FavoredStochasticBalancer.FavoredNodeLoadPicker loadPicker =
188      balancer.new FavoredNodeLoadPicker();
189    boolean userRegionPicked = false;
190    for (int i = 0; i < 100; i++) {
191      if (userRegionPicked) {
192        break;
193      } else {
194        BalanceAction action = loadPicker.generate(cluster);
195        if (action.getType() == BalanceAction.Type.MOVE_REGION) {
196          MoveRegionAction moveRegionAction = (MoveRegionAction) action;
197          RegionInfo region = cluster.regions[moveRegionAction.getRegion()];
198          assertNotEquals(-1, moveRegionAction.getToServer());
199          ServerName destinationServer = cluster.servers[moveRegionAction.getToServer()];
200          assertEquals(cluster.servers[moveRegionAction.getFromServer()], mostLoadedServer);
201          if (!region.getTable().isSystemTable()) {
202            List<ServerName> favNodes = fnm.getFavoredNodes(region);
203            assertTrue(favNodes.contains(ServerName.valueOf(destinationServer.getAddress(), -1)));
204            userRegionPicked = true;
205          }
206        }
207      }
208    }
209    assertTrue(userRegionPicked, "load picker did not pick expected regions in 100 iterations.");
210  }
211
212  /*
213   * A region can only be moved to one of its favored node. Hence this method helps us to get that
214   * list which makes it easy to write non-flaky tests.
215   */
216  private List<RegionInfo> getRegionsThatCanBeMoved(TableName tableName, ServerName serverName) {
217    List<RegionInfo> regions = Lists.newArrayList();
218    RegionStates rst = cluster.getMaster().getAssignmentManager().getRegionStates();
219    FavoredNodesManager fnm = cluster.getMaster().getFavoredNodesManager();
220    for (RegionInfo regionInfo : fnm.getRegionsOfFavoredNode(serverName)) {
221      if (
222        regionInfo.getTable().equals(tableName)
223          && !ServerName.isSameAddress(rst.getRegionServerOfRegion(regionInfo), serverName)
224      ) {
225        regions.add(regionInfo);
226      }
227    }
228    return regions;
229  }
230
231  private List<RegionInfo> getTableRegionsFromServer(TableName tableName, ServerName source)
232    throws IOException {
233    List<RegionInfo> regionInfos = Lists.newArrayList();
234    HRegionServer regionServer = cluster.getRegionServer(source);
235    for (Region region : regionServer.getRegions(tableName)) {
236      regionInfos.add(region.getRegionInfo());
237    }
238    return regionInfos;
239  }
240
241  private ServerName getRSWithMaxRegions(TableName tableName, List<ServerName> excludeNodes)
242    throws IOException {
243
244    int maxRegions = 0;
245    ServerName maxLoadedServer = null;
246
247    for (JVMClusterUtil.RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
248      List<HRegion> regions = rst.getRegionServer().getRegions(tableName);
249      LOG.debug("Server: " + rst.getRegionServer().getServerName() + " regions: " + regions.size());
250      if (regions.size() > maxRegions) {
251        if (
252          excludeNodes == null
253            || !doesMatchExcludeNodes(excludeNodes, rst.getRegionServer().getServerName())
254        ) {
255          maxRegions = regions.size();
256          maxLoadedServer = rst.getRegionServer().getServerName();
257        }
258      }
259    }
260    return maxLoadedServer;
261  }
262
263  private boolean doesMatchExcludeNodes(List<ServerName> excludeNodes, ServerName sn) {
264    for (ServerName excludeSN : excludeNodes) {
265      if (ServerName.isSameAddress(sn, excludeSN)) {
266        return true;
267      }
268    }
269    return false;
270  }
271}