001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import static org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.MAX_RUNNING_TIME_KEY;
021import static org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.MIN_COST_NEED_BALANCE_KEY;
022
023import java.time.Duration;
024import java.util.ArrayList;
025import java.util.Base64;
026import java.util.HashMap;
027import java.util.HashSet;
028import java.util.List;
029import java.util.Map;
030import java.util.Set;
031import java.util.function.Function;
032import org.apache.hadoop.conf.Configuration;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.ServerName;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.client.RegionInfo;
037import org.apache.hadoop.hbase.master.RegionPlan;
038import org.apache.hadoop.hbase.master.balancer.replicas.ReplicaKey;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041
042public final class CandidateGeneratorTestUtil {
043
044  private static final Logger LOG = LoggerFactory.getLogger(CandidateGeneratorTestUtil.class);
045
046  private CandidateGeneratorTestUtil() {
047  }
048
049  static void runBalancerToExhaustion(Configuration conf,
050    Map<ServerName, List<RegionInfo>> serverToRegions,
051    Set<Function<BalancerClusterState, Boolean>> expectations, float targetMaxBalancerCost) {
052    // Do the full plan. We're testing with a lot of regions
053    conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true);
054    conf.setLong(MAX_RUNNING_TIME_KEY, 15000);
055
056    conf.setFloat(MIN_COST_NEED_BALANCE_KEY, targetMaxBalancerCost);
057
058    BalancerClusterState cluster = createMockBalancerClusterState(serverToRegions);
059    StochasticLoadBalancer stochasticLoadBalancer = buildStochasticLoadBalancer(cluster, conf);
060    printClusterDistribution(cluster, 0);
061    int balancerRuns = 0;
062    int actionsTaken = 0;
063    long balancingMillis = 0;
064    boolean isBalanced = false;
065    while (!isBalanced) {
066      balancerRuns++;
067      if (balancerRuns > 1000) {
068        throw new RuntimeException("Balancer failed to find balance & meet expectations");
069      }
070      long start = System.currentTimeMillis();
071      List<RegionPlan> regionPlans =
072        stochasticLoadBalancer.balanceCluster(partitionRegionsByTable(serverToRegions));
073      balancingMillis += System.currentTimeMillis() - start;
074      actionsTaken++;
075      if (regionPlans != null) {
076        // Apply all plans to serverToRegions
077        for (RegionPlan rp : regionPlans) {
078          ServerName source = rp.getSource();
079          ServerName dest = rp.getDestination();
080          RegionInfo region = rp.getRegionInfo();
081
082          // Update serverToRegions
083          serverToRegions.get(source).remove(region);
084          serverToRegions.get(dest).add(region);
085          actionsTaken++;
086        }
087
088        // Now rebuild cluster and balancer from updated serverToRegions
089        cluster = createMockBalancerClusterState(serverToRegions);
090        stochasticLoadBalancer = buildStochasticLoadBalancer(cluster, conf);
091      }
092      printClusterDistribution(cluster, actionsTaken);
093      isBalanced = true;
094      for (Function<BalancerClusterState, Boolean> condition : expectations) {
095        // Check if we've met all expectations for the candidate generator
096        if (!condition.apply(cluster)) {
097          isBalanced = false;
098          break;
099        }
100      }
101      if (isBalanced) { // Check if the balancer thinks we're done too
102        LOG.info("All balancer conditions passed. Checking if balancer thinks it's done.");
103        if (stochasticLoadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME, cluster)) {
104          LOG.info("Balancer would still like to run");
105          isBalanced = false;
106        } else {
107          LOG.info("Balancer is done");
108        }
109      }
110    }
111    LOG.info("Balancing took {}sec", Duration.ofMillis(balancingMillis).toMinutes());
112  }
113
114  /**
115   * Prints the current cluster distribution of regions per table per server
116   */
117  static void printClusterDistribution(BalancerClusterState cluster, long actionsTaken) {
118    LOG.info("=== Cluster Distribution after {} balancer actions taken ===", actionsTaken);
119
120    for (int i = 0; i < cluster.numServers; i++) {
121      int[] regions = cluster.regionsPerServer[i];
122      int regionCount = (regions == null) ? 0 : regions.length;
123
124      LOG.info("Server {}: {} regions", cluster.servers[i].getServerName(), regionCount);
125
126      if (regionCount > 0) {
127        Map<TableName, Integer> tableRegionCounts = new HashMap<>();
128
129        for (int regionIndex : regions) {
130          RegionInfo regionInfo = cluster.regions[regionIndex];
131          TableName tableName = regionInfo.getTable();
132          tableRegionCounts.put(tableName, tableRegionCounts.getOrDefault(tableName, 0) + 1);
133        }
134
135        tableRegionCounts
136          .forEach((table, count) -> LOG.info("  - Table {}: {} regions", table, count));
137      }
138    }
139
140    LOG.info("===========================================");
141  }
142
143  /**
144   * Partitions the given serverToRegions map by table The tables are derived from the RegionInfo
145   * objects found in serverToRegions.
146   * @param serverToRegions The map of servers to their assigned regions.
147   * @return A map of tables to their server-to-region assignments.
148   */
149  public static Map<TableName, Map<ServerName, List<RegionInfo>>>
150    partitionRegionsByTable(Map<ServerName, List<RegionInfo>> serverToRegions) {
151
152    // First, gather all tables from the regions
153    Set<TableName> allTables = new HashSet<>();
154    for (List<RegionInfo> regions : serverToRegions.values()) {
155      for (RegionInfo region : regions) {
156        allTables.add(region.getTable());
157      }
158    }
159
160    Map<TableName, Map<ServerName, List<RegionInfo>>> tablesToServersToRegions = new HashMap<>();
161
162    // Initialize each table with all servers mapped to empty lists
163    for (TableName table : allTables) {
164      Map<ServerName, List<RegionInfo>> serverMap = new HashMap<>();
165      for (ServerName server : serverToRegions.keySet()) {
166        serverMap.put(server, new ArrayList<>());
167      }
168      tablesToServersToRegions.put(table, serverMap);
169    }
170
171    // Distribute regions to their respective tables
172    for (Map.Entry<ServerName, List<RegionInfo>> serverAndRegions : serverToRegions.entrySet()) {
173      ServerName server = serverAndRegions.getKey();
174      List<RegionInfo> regions = serverAndRegions.getValue();
175
176      for (RegionInfo region : regions) {
177        TableName regionTable = region.getTable();
178        // Now we know for sure regionTable is in allTables
179        Map<ServerName, List<RegionInfo>> tableServerMap =
180          tablesToServersToRegions.get(regionTable);
181        tableServerMap.get(server).add(region);
182      }
183    }
184
185    return tablesToServersToRegions;
186  }
187
188  static StochasticLoadBalancer buildStochasticLoadBalancer(BalancerClusterState cluster,
189    Configuration conf) {
190    StochasticLoadBalancer stochasticLoadBalancer =
191      new StochasticLoadBalancer(new DummyMetricsStochasticBalancer());
192    stochasticLoadBalancer.setClusterInfoProvider(new DummyClusterInfoProvider(conf));
193    stochasticLoadBalancer.loadConf(conf);
194    stochasticLoadBalancer.initCosts(cluster);
195    return stochasticLoadBalancer;
196  }
197
198  static BalancerClusterState
199    createMockBalancerClusterState(Map<ServerName, List<RegionInfo>> serverToRegions) {
200    return new BalancerClusterState(serverToRegions, null, null, null, null);
201  }
202
203  /**
204   * Validates that each replica is isolated from its others. Ensures that no server hosts more than
205   * one replica of the same region (i.e., regions with identical start and end keys).
206   * @param cluster The current state of the cluster.
207   * @return true if all replicas are properly isolated, false otherwise.
208   */
209  static boolean areAllReplicasDistributed(BalancerClusterState cluster) {
210    // Iterate over each server
211    for (int[] regionsPerServer : cluster.regionsPerServer) {
212      if (regionsPerServer == null || regionsPerServer.length == 0) {
213        continue; // Skip empty servers
214      }
215
216      Set<ReplicaKey> foundKeys = new HashSet<>();
217      for (int regionIndex : regionsPerServer) {
218        RegionInfo regionInfo = cluster.regions[regionIndex];
219        ReplicaKey replicaKey = new ReplicaKey(regionInfo);
220        if (foundKeys.contains(replicaKey)) {
221          // Violation: Multiple replicas of the same region on the same server
222          LOG.warn("Replica isolation violated: one server hosts multiple replicas of key [{}].",
223            generateRegionKey(regionInfo));
224          return false;
225        }
226
227        foundKeys.add(replicaKey);
228      }
229    }
230
231    LOG.info(
232      "Replica isolation validation passed: No server hosts multiple replicas of the same region.");
233    return true;
234  }
235
236  /**
237   * Generic method to validate table isolation.
238   */
239  static boolean isTableIsolated(BalancerClusterState cluster, TableName tableName,
240    String tableType) {
241    for (int i = 0; i < cluster.numServers; i++) {
242      int[] regionsOnServer = cluster.regionsPerServer[i];
243      if (regionsOnServer == null || regionsOnServer.length == 0) {
244        continue; // Skip empty servers
245      }
246
247      boolean hasTargetTableRegion = false;
248      boolean hasOtherTableRegion = false;
249
250      for (int regionIndex : regionsOnServer) {
251        RegionInfo regionInfo = cluster.regions[regionIndex];
252        if (regionInfo.getTable().equals(tableName)) {
253          hasTargetTableRegion = true;
254        } else {
255          hasOtherTableRegion = true;
256        }
257
258        // If the target table and any other table are on the same server, isolation is violated
259        if (hasTargetTableRegion && hasOtherTableRegion) {
260          LOG.debug(
261            "Server {} has both {} table regions and other table regions, violating isolation.",
262            cluster.servers[i].getServerName(), tableType);
263          return false;
264        }
265      }
266    }
267    LOG.debug("{} table isolation validation passed.", tableType);
268    return true;
269  }
270
271  /**
272   * Generates a unique key for a region based on its start and end keys. This method ensures that
273   * regions with identical start and end keys have the same key.
274   * @param regionInfo The RegionInfo object.
275   * @return A string representing the unique key of the region.
276   */
277  private static String generateRegionKey(RegionInfo regionInfo) {
278    // Using Base64 encoding for byte arrays to ensure uniqueness and readability
279    String startKey = Base64.getEncoder().encodeToString(regionInfo.getStartKey());
280    String endKey = Base64.getEncoder().encodeToString(regionInfo.getEndKey());
281
282    return regionInfo.getTable().getNameAsString() + ":" + startKey + ":" + endKey;
283  }
284
285}