001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertNotEquals;
022import static org.junit.Assert.assertNotNull;
023import static org.junit.Assert.assertTrue;
024
025import java.io.IOException;
026import java.util.Arrays;
027import java.util.EnumSet;
028import java.util.List;
029import java.util.Map;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.hbase.ClusterMetrics;
032import org.apache.hadoop.hbase.ClusterMetrics.Option;
033import org.apache.hadoop.hbase.HBaseClassTestRule;
034import org.apache.hadoop.hbase.HBaseTestingUtility;
035import org.apache.hadoop.hbase.HConstants;
036import org.apache.hadoop.hbase.MiniHBaseCluster;
037import org.apache.hadoop.hbase.ServerName;
038import org.apache.hadoop.hbase.TableName;
039import org.apache.hadoop.hbase.Waiter;
040import org.apache.hadoop.hbase.client.Admin;
041import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
042import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
043import org.apache.hadoop.hbase.client.RegionInfo;
044import org.apache.hadoop.hbase.client.TableDescriptor;
045import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
046import org.apache.hadoop.hbase.favored.FavoredNodesManager;
047import org.apache.hadoop.hbase.master.HMaster;
048import org.apache.hadoop.hbase.master.LoadBalancer;
049import org.apache.hadoop.hbase.master.RackManager;
050import org.apache.hadoop.hbase.master.assignment.RegionStates;
051import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster;
052import org.apache.hadoop.hbase.regionserver.HRegion;
053import org.apache.hadoop.hbase.regionserver.HRegionServer;
054import org.apache.hadoop.hbase.regionserver.Region;
055import org.apache.hadoop.hbase.testclassification.LargeTests;
056import org.apache.hadoop.hbase.util.Bytes;
057import org.apache.hadoop.hbase.util.JVMClusterUtil;
058import org.junit.After;
059import org.junit.Before;
060import org.junit.BeforeClass;
061import org.junit.ClassRule;
062import org.junit.Rule;
063import org.junit.Test;
064import org.junit.experimental.categories.Category;
065import org.junit.rules.TestName;
066import org.slf4j.Logger;
067import org.slf4j.LoggerFactory;
068
069import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
070import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
071
072@Category(LargeTests.class)
073public class TestFavoredStochasticBalancerPickers extends BalancerTestBase {
074
075  @ClassRule
076  public static final HBaseClassTestRule CLASS_RULE =
077      HBaseClassTestRule.forClass(TestFavoredStochasticBalancerPickers.class);
078
079  private static final Logger LOG =
080      LoggerFactory.getLogger(TestFavoredStochasticBalancerPickers.class);
081
082  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
083  private static final int SLAVES = 6;
084  private static final int REGIONS = SLAVES * 3;
085  private static Configuration conf;
086
087  private Admin admin;
088  private MiniHBaseCluster cluster;
089
090  @Rule
091  public TestName name = new TestName();
092
093  @BeforeClass
094  public static void setupBeforeClass() throws Exception {
095    conf = TEST_UTIL.getConfiguration();
096    // Enable favored nodes based load balancer
097    conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
098        LoadOnlyFavoredStochasticBalancer.class, LoadBalancer.class);
099    conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 30000);
100    conf.setInt("hbase.master.balancer.stochastic.moveCost", 0);
101    conf.setBoolean("hbase.master.balancer.stochastic.execute.maxSteps", true);
102    conf.set(BaseLoadBalancer.TABLES_ON_MASTER, "none");
103  }
104
105  @Before
106  public void startCluster() throws Exception {
107    TEST_UTIL.startMiniCluster(SLAVES);
108    TEST_UTIL.getDFSCluster().waitClusterUp();
109    TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(120*1000);
110    cluster = TEST_UTIL.getMiniHBaseCluster();
111    admin = TEST_UTIL.getAdmin();
112    admin.setBalancerRunning(false, true);
113  }
114
115  @After
116  public void stopCluster() throws Exception {
117    TEST_UTIL.cleanupTestDir();
118    TEST_UTIL.shutdownMiniCluster();
119  }
120
121
122  @Test
123  public void testPickers() throws Exception {
124    TableName tableName = TableName.valueOf(name.getMethodName());
125    ColumnFamilyDescriptor columnFamilyDescriptor =
126        ColumnFamilyDescriptorBuilder.newBuilder(HConstants.CATALOG_FAMILY).build();
127    TableDescriptor desc = TableDescriptorBuilder
128        .newBuilder(tableName)
129        .setColumnFamily(columnFamilyDescriptor)
130        .build();
131    admin.createTable(desc, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGIONS);
132    TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
133    TEST_UTIL.loadTable(admin.getConnection().getTable(tableName), HConstants.CATALOG_FAMILY);
134    admin.flush(tableName);
135
136    HMaster master = cluster.getMaster();
137    FavoredNodesManager fnm = master.getFavoredNodesManager();
138    ServerName masterServerName = master.getServerName();
139    List<ServerName> excludedServers = Lists.newArrayList(masterServerName);
140    final ServerName mostLoadedServer = getRSWithMaxRegions(tableName, excludedServers);
141    assertNotNull(mostLoadedServer);
142    int numRegions = getTableRegionsFromServer(tableName, mostLoadedServer).size();
143    excludedServers.add(mostLoadedServer);
144    // Lets find another server with more regions to calculate number of regions to move
145    ServerName source = getRSWithMaxRegions(tableName, excludedServers);
146    assertNotNull(source);
147    int regionsToMove = getTableRegionsFromServer(tableName, source).size()/2;
148
149    // Since move only works if the target is part of favored nodes of the region, lets get all
150    // regions that are movable to mostLoadedServer
151    List<RegionInfo> hris = getRegionsThatCanBeMoved(tableName, mostLoadedServer);
152    RegionStates rst = master.getAssignmentManager().getRegionStates();
153    for (int i = 0; i < regionsToMove; i++) {
154      final RegionInfo regionInfo = hris.get(i);
155      admin.move(regionInfo.getEncodedNameAsBytes(), mostLoadedServer);
156      LOG.info("Moving region: " + hris.get(i).getRegionNameAsString() + " to " + mostLoadedServer);
157      TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
158        @Override
159        public boolean evaluate() throws Exception {
160          return ServerName.isSameAddress(
161              rst.getRegionServerOfRegion(regionInfo), mostLoadedServer);
162        }
163      });
164    }
165    final int finalRegions = numRegions + regionsToMove;
166    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
167    TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
168      @Override
169      public boolean evaluate() throws Exception {
170        int numRegions = getTableRegionsFromServer(tableName, mostLoadedServer).size();
171        return (numRegions == finalRegions);
172      }
173    });
174    TEST_UTIL.getHBaseCluster().startRegionServerAndWait(60000);
175
176    Map<ServerName, List<RegionInfo>> serverAssignments = Maps.newHashMap();
177    ClusterMetrics status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS));
178    for (ServerName sn : status.getLiveServerMetrics().keySet()) {
179      if (!ServerName.isSameAddress(sn, masterServerName)) {
180        serverAssignments.put(sn, getTableRegionsFromServer(tableName, sn));
181      }
182    }
183    RegionLocationFinder regionFinder = new RegionLocationFinder();
184    regionFinder.setClusterMetrics(admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)));
185    regionFinder.setConf(conf);
186    regionFinder.setServices(TEST_UTIL.getMiniHBaseCluster().getMaster());
187    Cluster cluster = new Cluster(serverAssignments, null, regionFinder, new RackManager(conf));
188    LoadOnlyFavoredStochasticBalancer balancer = (LoadOnlyFavoredStochasticBalancer) TEST_UTIL
189        .getMiniHBaseCluster().getMaster().getLoadBalancer();
190
191    cluster.sortServersByRegionCount();
192    Integer[] servers = cluster.serverIndicesSortedByRegionCount;
193    LOG.info("Servers sorted by region count:" + Arrays.toString(servers));
194    LOG.info("Cluster dump: " + cluster);
195    if (!mostLoadedServer.equals(cluster.servers[servers[servers.length -1]])) {
196      LOG.error("Most loaded server: " + mostLoadedServer + " does not match: "
197          + cluster.servers[servers[servers.length -1]]);
198    }
199    assertEquals(mostLoadedServer, cluster.servers[servers[servers.length -1]]);
200    FavoredStochasticBalancer.FavoredNodeLoadPicker loadPicker = balancer.new FavoredNodeLoadPicker();
201    boolean userRegionPicked = false;
202    for (int i = 0; i < 100; i++) {
203      if (userRegionPicked) {
204        break;
205      } else {
206        Cluster.Action action = loadPicker.generate(cluster);
207        if (action.type == Cluster.Action.Type.MOVE_REGION) {
208          Cluster.MoveRegionAction moveRegionAction = (Cluster.MoveRegionAction) action;
209          RegionInfo region = cluster.regions[moveRegionAction.region];
210          assertNotEquals(-1, moveRegionAction.toServer);
211          ServerName destinationServer = cluster.servers[moveRegionAction.toServer];
212          assertEquals(cluster.servers[moveRegionAction.fromServer], mostLoadedServer);
213          if (!region.getTable().isSystemTable()) {
214            List<ServerName> favNodes = fnm.getFavoredNodes(region);
215            assertTrue(favNodes.contains(ServerName.valueOf(destinationServer.getHostAndPort(), -1)));
216            userRegionPicked = true;
217          }
218        }
219      }
220    }
221    assertTrue("load picker did not pick expected regions in 100 iterations.", userRegionPicked);
222  }
223
224  /*
225   * A region can only be moved to one of its favored node. Hence this method helps us to
226   * get that list which makes it easy to write non-flaky tests.
227   */
228  private List<RegionInfo> getRegionsThatCanBeMoved(TableName tableName,
229      ServerName serverName) {
230    List<RegionInfo> regions = Lists.newArrayList();
231    RegionStates rst = cluster.getMaster().getAssignmentManager().getRegionStates();
232    FavoredNodesManager fnm = cluster.getMaster().getFavoredNodesManager();
233    for (RegionInfo regionInfo : fnm.getRegionsOfFavoredNode(serverName)) {
234      if (regionInfo.getTable().equals(tableName) &&
235          !ServerName.isSameAddress(rst.getRegionServerOfRegion(regionInfo), serverName)) {
236        regions.add(regionInfo);
237      }
238    }
239    return regions;
240  }
241
242  private List<RegionInfo> getTableRegionsFromServer(TableName tableName, ServerName source)
243      throws IOException {
244    List<RegionInfo> regionInfos = Lists.newArrayList();
245    HRegionServer regionServer = cluster.getRegionServer(source);
246    for (Region region : regionServer.getRegions(tableName)) {
247      regionInfos.add(region.getRegionInfo());
248    }
249    return regionInfos;
250  }
251
252  private ServerName getRSWithMaxRegions(TableName tableName, List<ServerName> excludeNodes)
253      throws IOException {
254
255    int maxRegions = 0;
256    ServerName maxLoadedServer = null;
257
258    for (JVMClusterUtil.RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
259      List<HRegion> regions = rst.getRegionServer().getRegions(tableName);
260      LOG.debug("Server: " + rst.getRegionServer().getServerName() + " regions: " + regions.size());
261      if (regions.size() > maxRegions) {
262        if (excludeNodes == null ||
263            !doesMatchExcludeNodes(excludeNodes, rst.getRegionServer().getServerName())) {
264          maxRegions = regions.size();
265          maxLoadedServer = rst.getRegionServer().getServerName();
266        }
267      }
268    }
269    return maxLoadedServer;
270  }
271
272  private boolean doesMatchExcludeNodes(List<ServerName> excludeNodes, ServerName sn) {
273    for (ServerName excludeSN : excludeNodes) {
274      if (ServerName.isSameAddress(sn, excludeSN)) {
275        return true;
276      }
277    }
278    return false;
279  }
280}