001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import edu.umd.cs.findbugs.annotations.NonNull; 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.Collection; 024import java.util.Collections; 025import java.util.HashMap; 026import java.util.Iterator; 027import java.util.List; 028import java.util.Map; 029import java.util.NavigableMap; 030import java.util.Random; 031import java.util.Set; 032import java.util.TreeMap; 033import java.util.concurrent.ThreadLocalRandom; 034import java.util.function.Predicate; 035import java.util.stream.Collectors; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.hbase.ClusterMetrics; 038import org.apache.hadoop.hbase.HBaseIOException; 039import org.apache.hadoop.hbase.HConstants; 040import org.apache.hadoop.hbase.ServerMetrics; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.client.RegionInfo; 044import org.apache.hadoop.hbase.client.TableDescriptor; 045import org.apache.hadoop.hbase.master.LoadBalancer; 046import org.apache.hadoop.hbase.master.MasterServices; 047import org.apache.hadoop.hbase.master.RackManager; 048import org.apache.hadoop.hbase.master.RegionPlan; 049import org.apache.yetus.audience.InterfaceAudience; 050import org.slf4j.Logger; 051import org.slf4j.LoggerFactory; 052 053import org.apache.hbase.thirdparty.com.google.common.base.Joiner; 054import org.apache.hbase.thirdparty.com.google.common.collect.ArrayListMultimap; 055import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 056import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 057 058/** 059 * The base class for load balancers. It provides the the functions used to by 060 * {@link org.apache.hadoop.hbase.master.assignment.AssignmentManager} to assign regions in the edge 061 * cases. It doesn't provide an implementation of the actual balancing algorithm. 062 */ 063@InterfaceAudience.Private 064@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "IS2_INCONSISTENT_SYNC", 065 justification = "All the unsynchronized access is before initialization") 066public abstract class BaseLoadBalancer implements LoadBalancer { 067 068 private static final Logger LOG = LoggerFactory.getLogger(BaseLoadBalancer.class); 069 070 public static final String BALANCER_DECISION_BUFFER_ENABLED = 071 "hbase.master.balancer.decision.buffer.enabled"; 072 public static final boolean DEFAULT_BALANCER_DECISION_BUFFER_ENABLED = false; 073 074 public static final String BALANCER_REJECTION_BUFFER_ENABLED = 075 "hbase.master.balancer.rejection.buffer.enabled"; 076 public static final boolean DEFAULT_BALANCER_REJECTION_BUFFER_ENABLED = false; 077 078 public static final boolean DEFAULT_HBASE_MASTER_LOADBALANCE_BYTABLE = false; 079 080 protected static final int MIN_SERVER_BALANCE = 2; 081 private volatile boolean stopped = false; 082 083 private static final Predicate<ServerMetrics> IDLE_SERVER_PREDICATOR = 084 load -> load.getRegionMetrics().isEmpty(); 085 086 protected volatile RegionLocationFinder regionFinder; 087 protected boolean useRegionFinder; 088 protected boolean isByTable = DEFAULT_HBASE_MASTER_LOADBALANCE_BYTABLE; 089 090 // slop for regions 091 protected float slop; 092 protected volatile RackManager rackManager; 093 protected MetricsBalancer metricsBalancer = null; 094 protected ClusterMetrics clusterStatus = null; 095 protected ServerName masterServerName; 096 protected MasterServices services; 097 098 /** 099 * @deprecated since 2.4.0, will be removed in 3.0.0. 100 * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a> 101 */ 102 @Deprecated 103 protected boolean onlySystemTablesOnMaster; 104 105 /** 106 * The constructor that uses the basic MetricsBalancer 107 */ 108 protected BaseLoadBalancer() { 109 this(null); 110 } 111 112 /** 113 * This Constructor accepts an instance of MetricsBalancer, which will be used instead of creating 114 * a new one 115 */ 116 protected BaseLoadBalancer(MetricsBalancer metricsBalancer) { 117 this.metricsBalancer = (metricsBalancer != null) ? metricsBalancer : new MetricsBalancer(); 118 } 119 120 protected final Configuration getConf() { 121 return services.getConfiguration(); 122 } 123 124 /** 125 * Check if a region belongs to some system table. If so, the primary replica may be expected to 126 * be put on the master regionserver. 127 * @deprecated since 2.4.0, will be removed in 3.0.0. 128 * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a> 129 */ 130 @Deprecated 131 public boolean shouldBeOnMaster(RegionInfo region) { 132 return this.onlySystemTablesOnMaster && region.getTable().isSystemTable(); 133 } 134 135 /** 136 * Balance the regions that should be on master regionserver. 137 * @deprecated since 2.4.0, will be removed in 3.0.0. 138 * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a> 139 */ 140 @Deprecated 141 protected List<RegionPlan> balanceMasterRegions(Map<ServerName, List<RegionInfo>> clusterMap) { 142 if (masterServerName == null || clusterMap == null || clusterMap.size() <= 1) return null; 143 List<RegionPlan> plans = null; 144 List<RegionInfo> regions = clusterMap.get(masterServerName); 145 if (regions != null) { 146 Iterator<ServerName> keyIt = null; 147 for (RegionInfo region : regions) { 148 if (shouldBeOnMaster(region)) continue; 149 150 // Find a non-master regionserver to host the region 151 if (keyIt == null || !keyIt.hasNext()) { 152 keyIt = clusterMap.keySet().iterator(); 153 } 154 ServerName dest = keyIt.next(); 155 if (masterServerName.equals(dest)) { 156 if (!keyIt.hasNext()) { 157 keyIt = clusterMap.keySet().iterator(); 158 } 159 dest = keyIt.next(); 160 } 161 162 // Move this region away from the master regionserver 163 RegionPlan plan = new RegionPlan(region, masterServerName, dest); 164 if (plans == null) { 165 plans = new ArrayList<>(); 166 } 167 plans.add(plan); 168 } 169 } 170 for (Map.Entry<ServerName, List<RegionInfo>> server : clusterMap.entrySet()) { 171 if (masterServerName.equals(server.getKey())) continue; 172 for (RegionInfo region : server.getValue()) { 173 if (!shouldBeOnMaster(region)) continue; 174 175 // Move this region to the master regionserver 176 RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName); 177 if (plans == null) { 178 plans = new ArrayList<>(); 179 } 180 plans.add(plan); 181 } 182 } 183 return plans; 184 } 185 186 /** 187 * If master is configured to carry system tables only, in here is where we figure what to assign 188 * it. 189 * @deprecated since 2.4.0, will be removed in 3.0.0. 190 * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a> 191 */ 192 @Deprecated 193 @NonNull 194 protected Map<ServerName, List<RegionInfo>> 195 assignMasterSystemRegions(Collection<RegionInfo> regions, List<ServerName> servers) { 196 Map<ServerName, List<RegionInfo>> assignments = new TreeMap<>(); 197 if (this.onlySystemTablesOnMaster) { 198 if (masterServerName != null && servers.contains(masterServerName)) { 199 assignments.put(masterServerName, new ArrayList<>()); 200 for (RegionInfo region : regions) { 201 if (shouldBeOnMaster(region)) { 202 assignments.get(masterServerName).add(region); 203 } 204 } 205 } 206 } 207 return assignments; 208 } 209 210 @Override 211 public synchronized void updateClusterMetrics(ClusterMetrics st) { 212 this.clusterStatus = st; 213 if (useRegionFinder) { 214 regionFinder.setClusterMetrics(st); 215 } 216 } 217 218 @Override 219 public void setMasterServices(MasterServices masterServices) { 220 masterServerName = masterServices.getServerName(); 221 this.services = masterServices; 222 } 223 224 @Override 225 public synchronized void postMasterStartupInitialize() { 226 if (services != null && regionFinder != null) { 227 try { 228 Set<RegionInfo> regions = 229 services.getAssignmentManager().getRegionStates().getRegionAssignments().keySet(); 230 regionFinder.refreshAndWait(regions); 231 } catch (Exception e) { 232 LOG.warn("Refreshing region HDFS Block dist failed with exception, ignoring", e); 233 } 234 } 235 } 236 237 protected final boolean idleRegionServerExist(BalancerClusterState c) { 238 boolean isServerExistsWithMoreRegions = false; 239 boolean isServerExistsWithZeroRegions = false; 240 for (int[] serverList : c.regionsPerServer) { 241 if (serverList.length > 1) { 242 isServerExistsWithMoreRegions = true; 243 } 244 if (serverList.length == 0) { 245 isServerExistsWithZeroRegions = true; 246 } 247 } 248 return isServerExistsWithMoreRegions && isServerExistsWithZeroRegions; 249 } 250 251 protected final boolean sloppyRegionServerExist(ClusterLoadState cs) { 252 if (slop < 0) { 253 LOG.debug("Slop is less than zero, not checking for sloppiness."); 254 return false; 255 } 256 float average = cs.getLoadAverage(); // for logging 257 int floor = (int) Math.floor(average * (1 - slop)); 258 int ceiling = (int) Math.ceil(average * (1 + slop)); 259 if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) { 260 NavigableMap<ServerAndLoad, List<RegionInfo>> serversByLoad = cs.getServersByLoad(); 261 if (LOG.isTraceEnabled()) { 262 // If nothing to balance, then don't say anything unless trace-level logging. 263 LOG.trace("Skipping load balancing because balanced cluster; " + "servers=" 264 + cs.getNumServers() + " regions=" + cs.getNumRegions() + " average=" + average 265 + " mostloaded=" + serversByLoad.lastKey().getLoad() + " leastloaded=" 266 + serversByLoad.firstKey().getLoad()); 267 } 268 return false; 269 } 270 return true; 271 } 272 273 /** 274 * Generates a bulk assignment plan to be used on cluster startup using a simple round-robin 275 * assignment. 276 * <p/> 277 * Takes a list of all the regions and all the servers in the cluster and returns a map of each 278 * server to the regions that it should be assigned. 279 * <p/> 280 * Currently implemented as a round-robin assignment. Same invariant as load balancing, all 281 * servers holding floor(avg) or ceiling(avg). TODO: Use block locations from HDFS to place 282 * regions with their blocks 283 * @param regions all regions 284 * @param servers all servers 285 * @return map of server to the regions it should take, or emptyMap if no assignment is possible 286 * (ie. no servers) 287 */ 288 @Override 289 @NonNull 290 public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions, 291 List<ServerName> servers) throws HBaseIOException { 292 metricsBalancer.incrMiscInvocations(); 293 Map<ServerName, List<RegionInfo>> assignments = assignMasterSystemRegions(regions, servers); 294 if (!assignments.isEmpty()) { 295 servers = new ArrayList<>(servers); 296 // Guarantee not to put other regions on master 297 servers.remove(masterServerName); 298 List<RegionInfo> masterRegions = assignments.get(masterServerName); 299 if (!masterRegions.isEmpty()) { 300 regions = new ArrayList<>(regions); 301 regions.removeAll(masterRegions); 302 } 303 } 304 /** 305 * only need assign system table 306 */ 307 if (regions.isEmpty()) { 308 return assignments; 309 } 310 311 int numServers = servers == null ? 0 : servers.size(); 312 if (numServers == 0) { 313 LOG.warn("Wanted to do round robin assignment but no servers to assign to"); 314 return Collections.singletonMap(BOGUS_SERVER_NAME, new ArrayList<>(regions)); 315 } 316 317 // TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the 318 // normal LB.balancerCluster() with unassignedRegions. We only need to have a candidate 319 // generator for AssignRegionAction. The LB will ensure the regions are mostly local 320 // and balanced. This should also run fast with fewer number of iterations. 321 322 if (numServers == 1) { // Only one server, nothing fancy we can do here 323 ServerName server = servers.get(0); 324 assignments.put(server, new ArrayList<>(regions)); 325 return assignments; 326 } 327 328 BalancerClusterState cluster = createCluster(servers, regions); 329 roundRobinAssignment(cluster, regions, servers, assignments); 330 return assignments; 331 } 332 333 private BalancerClusterState createCluster(List<ServerName> servers, 334 Collection<RegionInfo> regions) throws HBaseIOException { 335 boolean hasRegionReplica = false; 336 try { 337 if (services != null && services.getTableDescriptors() != null) { 338 Map<String, TableDescriptor> tds = services.getTableDescriptors().getAll(); 339 for (RegionInfo regionInfo : regions) { 340 TableDescriptor td = tds.get(regionInfo.getTable().getNameWithNamespaceInclAsString()); 341 if (td != null && td.getRegionReplication() > 1) { 342 hasRegionReplica = true; 343 break; 344 } 345 } 346 } 347 } catch (IOException ioe) { 348 throw new HBaseIOException(ioe); 349 } 350 351 // Get the snapshot of the current assignments for the regions in question, and then create 352 // a cluster out of it. Note that we might have replicas already assigned to some servers 353 // earlier. So we want to get the snapshot to see those assignments, but this will only contain 354 // replicas of the regions that are passed (for performance). 355 Map<ServerName, List<RegionInfo>> clusterState = null; 356 if (!hasRegionReplica) { 357 clusterState = getRegionAssignmentsByServer(regions); 358 } else { 359 // for the case where we have region replica it is better we get the entire cluster's snapshot 360 clusterState = getRegionAssignmentsByServer(null); 361 } 362 363 for (ServerName server : servers) { 364 if (!clusterState.containsKey(server)) { 365 clusterState.put(server, Collections.emptyList()); 366 } 367 } 368 return new BalancerClusterState(regions, clusterState, null, this.regionFinder, rackManager); 369 } 370 371 private List<ServerName> findIdleServers(List<ServerName> servers) { 372 return this.services.getServerManager().getOnlineServersListWithPredicator(servers, 373 IDLE_SERVER_PREDICATOR); 374 } 375 376 /** 377 * Used to assign a single region to a random server. 378 */ 379 @Override 380 public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers) 381 throws HBaseIOException { 382 metricsBalancer.incrMiscInvocations(); 383 if (servers != null && servers.contains(masterServerName)) { 384 if (shouldBeOnMaster(regionInfo)) { 385 return masterServerName; 386 } 387 if (!LoadBalancer.isTablesOnMaster(getConf())) { 388 // Guarantee we do not put any regions on master 389 servers = new ArrayList<>(servers); 390 servers.remove(masterServerName); 391 } 392 } 393 394 int numServers = servers == null ? 0 : servers.size(); 395 if (numServers == 0) { 396 LOG.warn("Wanted to retain assignment but no servers to assign to"); 397 return null; 398 } 399 if (numServers == 1) { // Only one server, nothing fancy we can do here 400 return servers.get(0); 401 } 402 List<ServerName> idleServers = findIdleServers(servers); 403 if (idleServers.size() == 1) { 404 return idleServers.get(0); 405 } 406 final List<ServerName> finalServers = idleServers.isEmpty() ? servers : idleServers; 407 List<RegionInfo> regions = Lists.newArrayList(regionInfo); 408 BalancerClusterState cluster = createCluster(finalServers, regions); 409 return randomAssignment(cluster, regionInfo, finalServers); 410 } 411 412 /** 413 * Generates a bulk assignment startup plan, attempting to reuse the existing assignment 414 * information from META, but adjusting for the specified list of available/online servers 415 * available for assignment. 416 * <p> 417 * Takes a map of all regions to their existing assignment from META. Also takes a list of online 418 * servers for regions to be assigned to. Attempts to retain all assignment, so in some instances 419 * initial assignment will not be completely balanced. 420 * <p> 421 * Any leftover regions without an existing server to be assigned to will be assigned randomly to 422 * available servers. 423 * @param regions regions and existing assignment from meta 424 * @param servers available servers 425 * @return map of servers and regions to be assigned to them, or emptyMap if no assignment is 426 * possible (ie. no servers) 427 */ 428 @Override 429 @NonNull 430 public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, ServerName> regions, 431 List<ServerName> servers) throws HBaseIOException { 432 // Update metrics 433 metricsBalancer.incrMiscInvocations(); 434 Map<ServerName, List<RegionInfo>> assignments = 435 assignMasterSystemRegions(regions.keySet(), servers); 436 if (!assignments.isEmpty()) { 437 servers = new ArrayList<>(servers); 438 // Guarantee not to put other regions on master 439 servers.remove(masterServerName); 440 List<RegionInfo> masterRegions = assignments.get(masterServerName); 441 regions = regions.entrySet().stream().filter(e -> !masterRegions.contains(e.getKey())) 442 .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); 443 } 444 if (regions.isEmpty()) { 445 return assignments; 446 } 447 448 int numServers = servers == null ? 0 : servers.size(); 449 if (numServers == 0) { 450 LOG.warn("Wanted to do retain assignment but no servers to assign to"); 451 return Collections.singletonMap(BOGUS_SERVER_NAME, new ArrayList<>(regions.keySet())); 452 } 453 if (numServers == 1) { // Only one server, nothing fancy we can do here 454 ServerName server = servers.get(0); 455 assignments.put(server, new ArrayList<>(regions.keySet())); 456 return assignments; 457 } 458 459 // Group all of the old assignments by their hostname. 460 // We can't group directly by ServerName since the servers all have 461 // new start-codes. 462 463 // Group the servers by their hostname. It's possible we have multiple 464 // servers on the same host on different ports. 465 ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create(); 466 for (ServerName server : servers) { 467 assignments.put(server, new ArrayList<>()); 468 serversByHostname.put(server.getHostnameLowerCase(), server); 469 } 470 471 // Collection of the hostnames that used to have regions 472 // assigned, but for which we no longer have any RS running 473 // after the cluster restart. 474 Set<String> oldHostsNoLongerPresent = Sets.newTreeSet(); 475 476 // If the old servers aren't present, lets assign those regions later. 477 List<RegionInfo> randomAssignRegions = Lists.newArrayList(); 478 479 int numRandomAssignments = 0; 480 int numRetainedAssigments = 0; 481 for (Map.Entry<RegionInfo, ServerName> entry : regions.entrySet()) { 482 RegionInfo region = entry.getKey(); 483 ServerName oldServerName = entry.getValue(); 484 List<ServerName> localServers = new ArrayList<>(); 485 if (oldServerName != null) { 486 localServers = serversByHostname.get(oldServerName.getHostnameLowerCase()); 487 } 488 if (localServers.isEmpty()) { 489 // No servers on the new cluster match up with this hostname, assign randomly, later. 490 randomAssignRegions.add(region); 491 if (oldServerName != null) { 492 oldHostsNoLongerPresent.add(oldServerName.getHostnameLowerCase()); 493 } 494 } else if (localServers.size() == 1) { 495 // the usual case - one new server on same host 496 ServerName target = localServers.get(0); 497 assignments.get(target).add(region); 498 numRetainedAssigments++; 499 } else { 500 // multiple new servers in the cluster on this same host 501 if (localServers.contains(oldServerName)) { 502 assignments.get(oldServerName).add(region); 503 numRetainedAssigments++; 504 } else { 505 ServerName target = null; 506 for (ServerName tmp : localServers) { 507 if (tmp.getPort() == oldServerName.getPort()) { 508 target = tmp; 509 assignments.get(tmp).add(region); 510 numRetainedAssigments++; 511 break; 512 } 513 } 514 if (target == null) { 515 randomAssignRegions.add(region); 516 } 517 } 518 } 519 } 520 521 // If servers from prior assignment aren't present, then lets do randomAssignment on regions. 522 if (randomAssignRegions.size() > 0) { 523 BalancerClusterState cluster = createCluster(servers, regions.keySet()); 524 for (Map.Entry<ServerName, List<RegionInfo>> entry : assignments.entrySet()) { 525 ServerName sn = entry.getKey(); 526 for (RegionInfo region : entry.getValue()) { 527 cluster.doAssignRegion(region, sn); 528 } 529 } 530 for (RegionInfo region : randomAssignRegions) { 531 ServerName target = randomAssignment(cluster, region, servers); 532 assignments.get(target).add(region); 533 numRandomAssignments++; 534 } 535 } 536 537 String randomAssignMsg = ""; 538 if (numRandomAssignments > 0) { 539 randomAssignMsg = numRandomAssignments + " regions were assigned " 540 + "to random hosts, since the old hosts for these regions are no " 541 + "longer present in the cluster. These hosts were:\n " 542 + Joiner.on("\n ").join(oldHostsNoLongerPresent); 543 } 544 545 LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments 546 + " retained the pre-restart assignment. " + randomAssignMsg); 547 return assignments; 548 } 549 550 protected float getDefaultSlop() { 551 return 0.2f; 552 } 553 554 private RegionLocationFinder createRegionLocationFinder(Configuration conf) { 555 RegionLocationFinder finder = new RegionLocationFinder(); 556 finder.setConf(conf); 557 finder.setServices(services); 558 return finder; 559 } 560 561 protected void loadConf(Configuration conf) { 562 this.slop = conf.getFloat("hbase.regions.slop", getDefaultSlop()); 563 this.rackManager = new RackManager(getConf()); 564 this.onlySystemTablesOnMaster = LoadBalancer.isSystemTablesOnlyOnMaster(conf); 565 useRegionFinder = conf.getBoolean("hbase.master.balancer.uselocality", true); 566 if (useRegionFinder) { 567 regionFinder = createRegionLocationFinder(conf); 568 } else { 569 regionFinder = null; 570 } 571 this.isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, 572 DEFAULT_HBASE_MASTER_LOADBALANCE_BYTABLE); 573 // Print out base configs. Don't print overallSlop since it for simple balancer exclusively. 574 LOG.info("slop={}", this.slop); 575 } 576 577 @Override 578 public void initialize() { 579 loadConf(getConf()); 580 } 581 582 @Override 583 public void regionOnline(RegionInfo regionInfo, ServerName sn) { 584 } 585 586 @Override 587 public void regionOffline(RegionInfo regionInfo) { 588 } 589 590 @Override 591 public boolean isStopped() { 592 return stopped; 593 } 594 595 @Override 596 public void stop(String why) { 597 LOG.info("Load Balancer stop requested: {}", why); 598 stopped = true; 599 } 600 601 /** 602 * Updates the balancer status tag reported to JMX 603 */ 604 public void updateBalancerStatus(boolean status) { 605 metricsBalancer.balancerStatus(status); 606 } 607 608 /** 609 * Used to assign a single region to a random server. 610 */ 611 private ServerName randomAssignment(BalancerClusterState cluster, RegionInfo regionInfo, 612 List<ServerName> servers) { 613 int numServers = servers.size(); // servers is not null, numServers > 1 614 ServerName sn = null; 615 final int maxIterations = numServers * 4; 616 int iterations = 0; 617 List<ServerName> usedSNs = new ArrayList<>(servers.size()); 618 Random rand = ThreadLocalRandom.current(); 619 do { 620 int i = rand.nextInt(numServers); 621 sn = servers.get(i); 622 if (!usedSNs.contains(sn)) { 623 usedSNs.add(sn); 624 } 625 } while (cluster.wouldLowerAvailability(regionInfo, sn) && iterations++ < maxIterations); 626 if (iterations >= maxIterations) { 627 // We have reached the max. Means the servers that we collected is still lowering the 628 // availability 629 for (ServerName unusedServer : servers) { 630 if (!usedSNs.contains(unusedServer)) { 631 // check if any other unused server is there for us to use. 632 // If so use it. Else we have not other go but to go with one of them 633 if (!cluster.wouldLowerAvailability(regionInfo, unusedServer)) { 634 sn = unusedServer; 635 break; 636 } 637 } 638 } 639 } 640 cluster.doAssignRegion(regionInfo, sn); 641 return sn; 642 } 643 644 /** 645 * Round robin a list of regions to a list of servers 646 */ 647 private void roundRobinAssignment(BalancerClusterState cluster, List<RegionInfo> regions, 648 List<ServerName> servers, Map<ServerName, List<RegionInfo>> assignments) { 649 Random rand = ThreadLocalRandom.current(); 650 List<RegionInfo> unassignedRegions = new ArrayList<>(); 651 int numServers = servers.size(); 652 int numRegions = regions.size(); 653 int max = (int) Math.ceil((float) numRegions / numServers); 654 int serverIdx = 0; 655 if (numServers > 1) { 656 serverIdx = rand.nextInt(numServers); 657 } 658 int regionIdx = 0; 659 for (int j = 0; j < numServers; j++) { 660 ServerName server = servers.get((j + serverIdx) % numServers); 661 List<RegionInfo> serverRegions = new ArrayList<>(max); 662 for (int i = regionIdx; i < numRegions; i += numServers) { 663 RegionInfo region = regions.get(i % numRegions); 664 if (cluster.wouldLowerAvailability(region, server)) { 665 unassignedRegions.add(region); 666 } else { 667 serverRegions.add(region); 668 cluster.doAssignRegion(region, server); 669 } 670 } 671 assignments.put(server, serverRegions); 672 regionIdx++; 673 } 674 675 List<RegionInfo> lastFewRegions = new ArrayList<>(); 676 // assign the remaining by going through the list and try to assign to servers one-by-one 677 serverIdx = rand.nextInt(numServers); 678 for (RegionInfo region : unassignedRegions) { 679 boolean assigned = false; 680 for (int j = 0; j < numServers; j++) { // try all servers one by one 681 ServerName server = servers.get((j + serverIdx) % numServers); 682 if (cluster.wouldLowerAvailability(region, server)) { 683 continue; 684 } else { 685 assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region); 686 cluster.doAssignRegion(region, server); 687 serverIdx = (j + serverIdx + 1) % numServers; // remain from next server 688 assigned = true; 689 break; 690 } 691 } 692 if (!assigned) { 693 lastFewRegions.add(region); 694 } 695 } 696 // just sprinkle the rest of the regions on random regionservers. The balanceCluster will 697 // make it optimal later. we can end up with this if numReplicas > numServers. 698 for (RegionInfo region : lastFewRegions) { 699 int i = rand.nextInt(numServers); 700 ServerName server = servers.get(i); 701 assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region); 702 cluster.doAssignRegion(region, server); 703 } 704 } 705 706 private Map<ServerName, List<RegionInfo>> 707 getRegionAssignmentsByServer(Collection<RegionInfo> regions) { 708 if (this.services != null && this.services.getAssignmentManager() != null) { 709 return this.services.getAssignmentManager().getSnapShotOfAssignment(regions); 710 } else { 711 return new HashMap<>(); 712 } 713 } 714 715 protected final Map<ServerName, List<RegionInfo>> 716 toEnsumbleTableLoad(Map<TableName, Map<ServerName, List<RegionInfo>>> LoadOfAllTable) { 717 Map<ServerName, List<RegionInfo>> returnMap = new TreeMap<>(); 718 for (Map<ServerName, List<RegionInfo>> serverNameListMap : LoadOfAllTable.values()) { 719 serverNameListMap.forEach((serverName, regionInfoList) -> { 720 List<RegionInfo> regionInfos = 721 returnMap.computeIfAbsent(serverName, k -> new ArrayList<>()); 722 regionInfos.addAll(regionInfoList); 723 }); 724 } 725 return returnMap; 726 } 727 728 /** 729 * Perform the major balance operation for table, all sub classes should override this method. 730 * <p/> 731 * Will be invoked by {@link #balanceCluster(Map)}. If 732 * {@link HConstants#HBASE_MASTER_LOADBALANCE_BYTABLE} is enabled, we will call this method 733 * multiple times, one table a time, where we will only pass in the regions for a single table 734 * each time. If not, we will pass in all the regions at once, and the {@code tableName} will be 735 * {@link HConstants#ENSEMBLE_TABLE_NAME}. 736 * @param tableName the table to be balanced 737 * @param loadOfOneTable region load of servers for the specific one table 738 * @return List of plans 739 */ 740 protected abstract List<RegionPlan> balanceTable(TableName tableName, 741 Map<ServerName, List<RegionInfo>> loadOfOneTable); 742 743 /** 744 * Called before actually executing balanceCluster. The sub classes could override this method to 745 * do some initialization work. 746 */ 747 protected void 748 preBalanceCluster(Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable) { 749 } 750 751 /** 752 * Perform the major balance operation for cluster, will invoke 753 * {@link #balanceTable(TableName, Map)} to do actual balance. 754 * <p/> 755 * THIs method is marked as final which means you should not override this method. See the javadoc 756 * for {@link #balanceTable(TableName, Map)} for more details. 757 * @param loadOfAllTable region load of servers for all table 758 * @return a list of regions to be moved, including source and destination, or null if cluster is 759 * already balanced 760 * @see #balanceTable(TableName, Map) 761 */ 762 @Override 763 public synchronized final List<RegionPlan> 764 balanceCluster(Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable) { 765 preBalanceCluster(loadOfAllTable); 766 if (isByTable) { 767 List<RegionPlan> result = new ArrayList<>(); 768 loadOfAllTable.forEach((tableName, loadOfOneTable) -> { 769 LOG.info("Start Generate Balance plan for table: " + tableName); 770 List<RegionPlan> partialPlans = balanceTable(tableName, loadOfOneTable); 771 if (partialPlans != null) { 772 result.addAll(partialPlans); 773 } 774 }); 775 return result; 776 } else { 777 LOG.debug("Start Generate Balance plan for cluster."); 778 return balanceTable(HConstants.ENSEMBLE_TABLE_NAME, toEnsumbleTableLoad(loadOfAllTable)); 779 } 780 } 781 782 @Override 783 public synchronized void onConfigurationChange(Configuration conf) { 784 loadConf(conf); 785 } 786}