001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import static org.junit.jupiter.api.Assertions.assertEquals; 021import static org.junit.jupiter.api.Assertions.assertNotEquals; 022import static org.junit.jupiter.api.Assertions.assertNotNull; 023import static org.junit.jupiter.api.Assertions.assertTrue; 024 025import java.io.IOException; 026import java.util.Arrays; 027import java.util.EnumSet; 028import java.util.List; 029import java.util.Map; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.hbase.ClusterMetrics; 032import org.apache.hadoop.hbase.ClusterMetrics.Option; 033import org.apache.hadoop.hbase.HBaseTestingUtil; 034import org.apache.hadoop.hbase.HConstants; 035import org.apache.hadoop.hbase.ServerName; 036import org.apache.hadoop.hbase.SingleProcessHBaseCluster; 037import org.apache.hadoop.hbase.TableName; 038import org.apache.hadoop.hbase.Waiter; 039import org.apache.hadoop.hbase.client.Admin; 040import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 041import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 042import org.apache.hadoop.hbase.client.RegionInfo; 043import org.apache.hadoop.hbase.client.TableDescriptor; 044import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 045import org.apache.hadoop.hbase.favored.FavoredNodesManager; 046import org.apache.hadoop.hbase.master.HMaster; 047import org.apache.hadoop.hbase.master.LoadBalancer; 048import org.apache.hadoop.hbase.master.RackManager; 049import org.apache.hadoop.hbase.master.assignment.RegionStates; 050import org.apache.hadoop.hbase.regionserver.HRegion; 051import org.apache.hadoop.hbase.regionserver.HRegionServer; 052import org.apache.hadoop.hbase.regionserver.Region; 053import org.apache.hadoop.hbase.testclassification.LargeTests; 054import org.apache.hadoop.hbase.util.Bytes; 055import org.apache.hadoop.hbase.util.JVMClusterUtil; 056import org.junit.jupiter.api.AfterEach; 057import org.junit.jupiter.api.BeforeAll; 058import org.junit.jupiter.api.BeforeEach; 059import org.junit.jupiter.api.Tag; 060import org.junit.jupiter.api.Test; 061import org.junit.jupiter.api.TestInfo; 062import org.slf4j.Logger; 063import org.slf4j.LoggerFactory; 064 065import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 066import org.apache.hbase.thirdparty.com.google.common.collect.Maps; 067 068@Tag(LargeTests.TAG) 069public class TestFavoredStochasticBalancerPickers extends BalancerTestBase { 070 071 private static final Logger LOG = 072 LoggerFactory.getLogger(TestFavoredStochasticBalancerPickers.class); 073 074 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 075 private static final int SLAVES = 6; 076 private static final int REGIONS = SLAVES * 3; 077 private static Configuration conf; 078 079 private Admin admin; 080 private SingleProcessHBaseCluster cluster; 081 082 @BeforeAll 083 public static void setupBeforeClass() throws Exception { 084 conf = TEST_UTIL.getConfiguration(); 085 // Enable favored nodes based load balancer 086 conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, 087 LoadOnlyFavoredStochasticBalancer.class, LoadBalancer.class); 088 conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 30000); 089 conf.setInt("hbase.master.balancer.stochastic.moveCost", 0); 090 conf.setBoolean("hbase.master.balancer.stochastic.execute.maxSteps", true); 091 } 092 093 @BeforeEach 094 public void startCluster() throws Exception { 095 TEST_UTIL.startMiniCluster(SLAVES); 096 TEST_UTIL.getDFSCluster().waitClusterUp(); 097 TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(120 * 1000); 098 cluster = TEST_UTIL.getMiniHBaseCluster(); 099 admin = TEST_UTIL.getAdmin(); 100 admin.balancerSwitch(false, true); 101 } 102 103 @AfterEach 104 public void stopCluster() throws Exception { 105 TEST_UTIL.cleanupTestDir(); 106 TEST_UTIL.shutdownMiniCluster(); 107 } 108 109 @Test 110 public void testPickers(TestInfo testInfo) throws Exception { 111 TableName tableName = TableName.valueOf(testInfo.getTestMethod().get().getName()); 112 ColumnFamilyDescriptor columnFamilyDescriptor = 113 ColumnFamilyDescriptorBuilder.newBuilder(HConstants.CATALOG_FAMILY).build(); 114 TableDescriptor desc = 115 TableDescriptorBuilder.newBuilder(tableName).setColumnFamily(columnFamilyDescriptor).build(); 116 admin.createTable(desc, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGIONS); 117 TEST_UTIL.waitUntilAllRegionsAssigned(tableName); 118 TEST_UTIL.loadTable(admin.getConnection().getTable(tableName), HConstants.CATALOG_FAMILY); 119 admin.flush(tableName); 120 121 HMaster master = cluster.getMaster(); 122 FavoredNodesManager fnm = master.getFavoredNodesManager(); 123 ServerName masterServerName = master.getServerName(); 124 List<ServerName> excludedServers = Lists.newArrayList(masterServerName); 125 final ServerName mostLoadedServer = getRSWithMaxRegions(tableName, excludedServers); 126 assertNotNull(mostLoadedServer); 127 int numRegions = getTableRegionsFromServer(tableName, mostLoadedServer).size(); 128 excludedServers.add(mostLoadedServer); 129 // Lets find another server with more regions to calculate number of regions to move 130 ServerName source = getRSWithMaxRegions(tableName, excludedServers); 131 assertNotNull(source); 132 int regionsToMove = getTableRegionsFromServer(tableName, source).size() / 2; 133 134 // Since move only works if the target is part of favored nodes of the region, lets get all 135 // regions that are movable to mostLoadedServer 136 List<RegionInfo> hris = getRegionsThatCanBeMoved(tableName, mostLoadedServer); 137 RegionStates rst = master.getAssignmentManager().getRegionStates(); 138 for (int i = 0; i < regionsToMove; i++) { 139 final RegionInfo regionInfo = hris.get(i); 140 admin.move(regionInfo.getEncodedNameAsBytes(), mostLoadedServer); 141 LOG.info("Moving region: " + hris.get(i).getRegionNameAsString() + " to " + mostLoadedServer); 142 TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() { 143 @Override 144 public boolean evaluate() throws Exception { 145 return ServerName.isSameAddress(rst.getRegionServerOfRegion(regionInfo), 146 mostLoadedServer); 147 } 148 }); 149 } 150 final int finalRegions = numRegions + regionsToMove; 151 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 152 TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() { 153 @Override 154 public boolean evaluate() throws Exception { 155 int numRegions = getTableRegionsFromServer(tableName, mostLoadedServer).size(); 156 return (numRegions == finalRegions); 157 } 158 }); 159 TEST_UTIL.getHBaseCluster().startRegionServerAndWait(60000); 160 161 Map<ServerName, List<RegionInfo>> serverAssignments = Maps.newHashMap(); 162 ClusterMetrics status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)); 163 for (ServerName sn : status.getLiveServerMetrics().keySet()) { 164 if (!ServerName.isSameAddress(sn, masterServerName)) { 165 serverAssignments.put(sn, getTableRegionsFromServer(tableName, sn)); 166 } 167 } 168 RegionHDFSBlockLocationFinder regionFinder = new RegionHDFSBlockLocationFinder(); 169 regionFinder.setClusterMetrics(admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))); 170 regionFinder.setConf(conf); 171 regionFinder.setClusterInfoProvider( 172 new MasterClusterInfoProvider(TEST_UTIL.getMiniHBaseCluster().getMaster())); 173 BalancerClusterState cluster = 174 new BalancerClusterState(serverAssignments, null, regionFinder, new RackManager(conf)); 175 LoadOnlyFavoredStochasticBalancer balancer = (LoadOnlyFavoredStochasticBalancer) TEST_UTIL 176 .getMiniHBaseCluster().getMaster().getLoadBalancer().getInternalBalancer(); 177 178 cluster.sortServersByRegionCount(); 179 Integer[] servers = cluster.serverIndicesSortedByRegionCount; 180 LOG.info("Servers sorted by region count:" + Arrays.toString(servers)); 181 LOG.info("Cluster dump: " + cluster); 182 if (!mostLoadedServer.equals(cluster.servers[servers[servers.length - 1]])) { 183 LOG.error("Most loaded server: " + mostLoadedServer + " does not match: " 184 + cluster.servers[servers[servers.length - 1]]); 185 } 186 assertEquals(mostLoadedServer, cluster.servers[servers[servers.length - 1]]); 187 FavoredStochasticBalancer.FavoredNodeLoadPicker loadPicker = 188 balancer.new FavoredNodeLoadPicker(); 189 boolean userRegionPicked = false; 190 for (int i = 0; i < 100; i++) { 191 if (userRegionPicked) { 192 break; 193 } else { 194 BalanceAction action = loadPicker.generate(cluster); 195 if (action.getType() == BalanceAction.Type.MOVE_REGION) { 196 MoveRegionAction moveRegionAction = (MoveRegionAction) action; 197 RegionInfo region = cluster.regions[moveRegionAction.getRegion()]; 198 assertNotEquals(-1, moveRegionAction.getToServer()); 199 ServerName destinationServer = cluster.servers[moveRegionAction.getToServer()]; 200 assertEquals(cluster.servers[moveRegionAction.getFromServer()], mostLoadedServer); 201 if (!region.getTable().isSystemTable()) { 202 List<ServerName> favNodes = fnm.getFavoredNodes(region); 203 assertTrue(favNodes.contains(ServerName.valueOf(destinationServer.getAddress(), -1))); 204 userRegionPicked = true; 205 } 206 } 207 } 208 } 209 assertTrue(userRegionPicked, "load picker did not pick expected regions in 100 iterations."); 210 } 211 212 /* 213 * A region can only be moved to one of its favored node. Hence this method helps us to get that 214 * list which makes it easy to write non-flaky tests. 215 */ 216 private List<RegionInfo> getRegionsThatCanBeMoved(TableName tableName, ServerName serverName) { 217 List<RegionInfo> regions = Lists.newArrayList(); 218 RegionStates rst = cluster.getMaster().getAssignmentManager().getRegionStates(); 219 FavoredNodesManager fnm = cluster.getMaster().getFavoredNodesManager(); 220 for (RegionInfo regionInfo : fnm.getRegionsOfFavoredNode(serverName)) { 221 if ( 222 regionInfo.getTable().equals(tableName) 223 && !ServerName.isSameAddress(rst.getRegionServerOfRegion(regionInfo), serverName) 224 ) { 225 regions.add(regionInfo); 226 } 227 } 228 return regions; 229 } 230 231 private List<RegionInfo> getTableRegionsFromServer(TableName tableName, ServerName source) 232 throws IOException { 233 List<RegionInfo> regionInfos = Lists.newArrayList(); 234 HRegionServer regionServer = cluster.getRegionServer(source); 235 for (Region region : regionServer.getRegions(tableName)) { 236 regionInfos.add(region.getRegionInfo()); 237 } 238 return regionInfos; 239 } 240 241 private ServerName getRSWithMaxRegions(TableName tableName, List<ServerName> excludeNodes) 242 throws IOException { 243 244 int maxRegions = 0; 245 ServerName maxLoadedServer = null; 246 247 for (JVMClusterUtil.RegionServerThread rst : cluster.getLiveRegionServerThreads()) { 248 List<HRegion> regions = rst.getRegionServer().getRegions(tableName); 249 LOG.debug("Server: " + rst.getRegionServer().getServerName() + " regions: " + regions.size()); 250 if (regions.size() > maxRegions) { 251 if ( 252 excludeNodes == null 253 || !doesMatchExcludeNodes(excludeNodes, rst.getRegionServer().getServerName()) 254 ) { 255 maxRegions = regions.size(); 256 maxLoadedServer = rst.getRegionServer().getServerName(); 257 } 258 } 259 } 260 return maxLoadedServer; 261 } 262 263 private boolean doesMatchExcludeNodes(List<ServerName> excludeNodes, ServerName sn) { 264 for (ServerName excludeSN : excludeNodes) { 265 if (ServerName.isSameAddress(sn, excludeSN)) { 266 return true; 267 } 268 } 269 return false; 270 } 271}