001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import static org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.MAX_RUNNING_TIME_KEY; 021import static org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.MIN_COST_NEED_BALANCE_KEY; 022 023import java.time.Duration; 024import java.util.ArrayList; 025import java.util.Base64; 026import java.util.HashMap; 027import java.util.HashSet; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031import java.util.function.Function; 032import org.apache.hadoop.conf.Configuration; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.ServerName; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.master.RegionPlan; 038import org.apache.hadoop.hbase.master.balancer.replicas.ReplicaKey; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041 042public final class CandidateGeneratorTestUtil { 043 044 private static final Logger LOG = LoggerFactory.getLogger(CandidateGeneratorTestUtil.class); 045 046 private CandidateGeneratorTestUtil() { 047 } 048 049 static void runBalancerToExhaustion(Configuration conf, 050 Map<ServerName, List<RegionInfo>> serverToRegions, 051 Set<Function<BalancerClusterState, Boolean>> expectations, float targetMaxBalancerCost) { 052 // Do the full plan. We're testing with a lot of regions 053 conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true); 054 conf.setLong(MAX_RUNNING_TIME_KEY, 15000); 055 056 conf.setFloat(MIN_COST_NEED_BALANCE_KEY, targetMaxBalancerCost); 057 058 BalancerClusterState cluster = createMockBalancerClusterState(serverToRegions); 059 StochasticLoadBalancer stochasticLoadBalancer = buildStochasticLoadBalancer(cluster, conf); 060 printClusterDistribution(cluster, 0); 061 int balancerRuns = 0; 062 int actionsTaken = 0; 063 long balancingMillis = 0; 064 boolean isBalanced = false; 065 while (!isBalanced) { 066 balancerRuns++; 067 if (balancerRuns > 1000) { 068 throw new RuntimeException("Balancer failed to find balance & meet expectations"); 069 } 070 long start = System.currentTimeMillis(); 071 List<RegionPlan> regionPlans = 072 stochasticLoadBalancer.balanceCluster(partitionRegionsByTable(serverToRegions)); 073 balancingMillis += System.currentTimeMillis() - start; 074 actionsTaken++; 075 if (regionPlans != null) { 076 // Apply all plans to serverToRegions 077 for (RegionPlan rp : regionPlans) { 078 ServerName source = rp.getSource(); 079 ServerName dest = rp.getDestination(); 080 RegionInfo region = rp.getRegionInfo(); 081 082 // Update serverToRegions 083 serverToRegions.get(source).remove(region); 084 serverToRegions.get(dest).add(region); 085 actionsTaken++; 086 } 087 088 // Now rebuild cluster and balancer from updated serverToRegions 089 cluster = createMockBalancerClusterState(serverToRegions); 090 stochasticLoadBalancer = buildStochasticLoadBalancer(cluster, conf); 091 } 092 printClusterDistribution(cluster, actionsTaken); 093 isBalanced = true; 094 for (Function<BalancerClusterState, Boolean> condition : expectations) { 095 // Check if we've met all expectations for the candidate generator 096 if (!condition.apply(cluster)) { 097 isBalanced = false; 098 break; 099 } 100 } 101 if (isBalanced) { // Check if the balancer thinks we're done too 102 LOG.info("All balancer conditions passed. Checking if balancer thinks it's done."); 103 if (stochasticLoadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME, cluster)) { 104 LOG.info("Balancer would still like to run"); 105 isBalanced = false; 106 } else { 107 LOG.info("Balancer is done"); 108 } 109 } 110 } 111 LOG.info("Balancing took {}sec", Duration.ofMillis(balancingMillis).toMinutes()); 112 } 113 114 /** 115 * Prints the current cluster distribution of regions per table per server 116 */ 117 static void printClusterDistribution(BalancerClusterState cluster, long actionsTaken) { 118 LOG.info("=== Cluster Distribution after {} balancer actions taken ===", actionsTaken); 119 120 for (int i = 0; i < cluster.numServers; i++) { 121 int[] regions = cluster.regionsPerServer[i]; 122 int regionCount = (regions == null) ? 0 : regions.length; 123 124 LOG.info("Server {}: {} regions", cluster.servers[i].getServerName(), regionCount); 125 126 if (regionCount > 0) { 127 Map<TableName, Integer> tableRegionCounts = new HashMap<>(); 128 129 for (int regionIndex : regions) { 130 RegionInfo regionInfo = cluster.regions[regionIndex]; 131 TableName tableName = regionInfo.getTable(); 132 tableRegionCounts.put(tableName, tableRegionCounts.getOrDefault(tableName, 0) + 1); 133 } 134 135 tableRegionCounts 136 .forEach((table, count) -> LOG.info(" - Table {}: {} regions", table, count)); 137 } 138 } 139 140 LOG.info("==========================================="); 141 } 142 143 /** 144 * Partitions the given serverToRegions map by table The tables are derived from the RegionInfo 145 * objects found in serverToRegions. 146 * @param serverToRegions The map of servers to their assigned regions. 147 * @return A map of tables to their server-to-region assignments. 148 */ 149 public static Map<TableName, Map<ServerName, List<RegionInfo>>> 150 partitionRegionsByTable(Map<ServerName, List<RegionInfo>> serverToRegions) { 151 152 // First, gather all tables from the regions 153 Set<TableName> allTables = new HashSet<>(); 154 for (List<RegionInfo> regions : serverToRegions.values()) { 155 for (RegionInfo region : regions) { 156 allTables.add(region.getTable()); 157 } 158 } 159 160 Map<TableName, Map<ServerName, List<RegionInfo>>> tablesToServersToRegions = new HashMap<>(); 161 162 // Initialize each table with all servers mapped to empty lists 163 for (TableName table : allTables) { 164 Map<ServerName, List<RegionInfo>> serverMap = new HashMap<>(); 165 for (ServerName server : serverToRegions.keySet()) { 166 serverMap.put(server, new ArrayList<>()); 167 } 168 tablesToServersToRegions.put(table, serverMap); 169 } 170 171 // Distribute regions to their respective tables 172 for (Map.Entry<ServerName, List<RegionInfo>> serverAndRegions : serverToRegions.entrySet()) { 173 ServerName server = serverAndRegions.getKey(); 174 List<RegionInfo> regions = serverAndRegions.getValue(); 175 176 for (RegionInfo region : regions) { 177 TableName regionTable = region.getTable(); 178 // Now we know for sure regionTable is in allTables 179 Map<ServerName, List<RegionInfo>> tableServerMap = 180 tablesToServersToRegions.get(regionTable); 181 tableServerMap.get(server).add(region); 182 } 183 } 184 185 return tablesToServersToRegions; 186 } 187 188 static StochasticLoadBalancer buildStochasticLoadBalancer(BalancerClusterState cluster, 189 Configuration conf) { 190 StochasticLoadBalancer stochasticLoadBalancer = 191 new StochasticLoadBalancer(new DummyMetricsStochasticBalancer()); 192 stochasticLoadBalancer.setClusterInfoProvider(new DummyClusterInfoProvider(conf)); 193 stochasticLoadBalancer.loadConf(conf); 194 stochasticLoadBalancer.initCosts(cluster); 195 return stochasticLoadBalancer; 196 } 197 198 static BalancerClusterState 199 createMockBalancerClusterState(Map<ServerName, List<RegionInfo>> serverToRegions) { 200 return new BalancerClusterState(serverToRegions, null, null, null, null); 201 } 202 203 /** 204 * Validates that each replica is isolated from its others. Ensures that no server hosts more than 205 * one replica of the same region (i.e., regions with identical start and end keys). 206 * @param cluster The current state of the cluster. 207 * @return true if all replicas are properly isolated, false otherwise. 208 */ 209 static boolean areAllReplicasDistributed(BalancerClusterState cluster) { 210 // Iterate over each server 211 for (int[] regionsPerServer : cluster.regionsPerServer) { 212 if (regionsPerServer == null || regionsPerServer.length == 0) { 213 continue; // Skip empty servers 214 } 215 216 Set<ReplicaKey> foundKeys = new HashSet<>(); 217 for (int regionIndex : regionsPerServer) { 218 RegionInfo regionInfo = cluster.regions[regionIndex]; 219 ReplicaKey replicaKey = new ReplicaKey(regionInfo); 220 if (foundKeys.contains(replicaKey)) { 221 // Violation: Multiple replicas of the same region on the same server 222 LOG.warn("Replica isolation violated: one server hosts multiple replicas of key [{}].", 223 generateRegionKey(regionInfo)); 224 return false; 225 } 226 227 foundKeys.add(replicaKey); 228 } 229 } 230 231 LOG.info( 232 "Replica isolation validation passed: No server hosts multiple replicas of the same region."); 233 return true; 234 } 235 236 /** 237 * Generic method to validate table isolation. 238 */ 239 static boolean isTableIsolated(BalancerClusterState cluster, TableName tableName, 240 String tableType) { 241 for (int i = 0; i < cluster.numServers; i++) { 242 int[] regionsOnServer = cluster.regionsPerServer[i]; 243 if (regionsOnServer == null || regionsOnServer.length == 0) { 244 continue; // Skip empty servers 245 } 246 247 boolean hasTargetTableRegion = false; 248 boolean hasOtherTableRegion = false; 249 250 for (int regionIndex : regionsOnServer) { 251 RegionInfo regionInfo = cluster.regions[regionIndex]; 252 if (regionInfo.getTable().equals(tableName)) { 253 hasTargetTableRegion = true; 254 } else { 255 hasOtherTableRegion = true; 256 } 257 258 // If the target table and any other table are on the same server, isolation is violated 259 if (hasTargetTableRegion && hasOtherTableRegion) { 260 LOG.debug( 261 "Server {} has both {} table regions and other table regions, violating isolation.", 262 cluster.servers[i].getServerName(), tableType); 263 return false; 264 } 265 } 266 } 267 LOG.debug("{} table isolation validation passed.", tableType); 268 return true; 269 } 270 271 /** 272 * Generates a unique key for a region based on its start and end keys. This method ensures that 273 * regions with identical start and end keys have the same key. 274 * @param regionInfo The RegionInfo object. 275 * @return A string representing the unique key of the region. 276 */ 277 private static String generateRegionKey(RegionInfo regionInfo) { 278 // Using Base64 encoding for byte arrays to ensure uniqueness and readability 279 String startKey = Base64.getEncoder().encodeToString(regionInfo.getStartKey()); 280 String endKey = Base64.getEncoder().encodeToString(regionInfo.getEndKey()); 281 282 return regionInfo.getTable().getNameAsString() + ":" + startKey + ":" + endKey; 283 } 284 285}