001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import edu.umd.cs.findbugs.annotations.NonNull;
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.Collection;
024import java.util.Collections;
025import java.util.HashMap;
026import java.util.Iterator;
027import java.util.List;
028import java.util.Map;
029import java.util.NavigableMap;
030import java.util.Random;
031import java.util.Set;
032import java.util.TreeMap;
033import java.util.concurrent.ThreadLocalRandom;
034import java.util.function.Predicate;
035import java.util.stream.Collectors;
036import org.apache.hadoop.conf.Configuration;
037import org.apache.hadoop.hbase.ClusterMetrics;
038import org.apache.hadoop.hbase.HBaseIOException;
039import org.apache.hadoop.hbase.HConstants;
040import org.apache.hadoop.hbase.ServerMetrics;
041import org.apache.hadoop.hbase.ServerName;
042import org.apache.hadoop.hbase.TableName;
043import org.apache.hadoop.hbase.client.RegionInfo;
044import org.apache.hadoop.hbase.client.TableDescriptor;
045import org.apache.hadoop.hbase.master.LoadBalancer;
046import org.apache.hadoop.hbase.master.MasterServices;
047import org.apache.hadoop.hbase.master.RackManager;
048import org.apache.hadoop.hbase.master.RegionPlan;
049import org.apache.yetus.audience.InterfaceAudience;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052
053import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
054import org.apache.hbase.thirdparty.com.google.common.collect.ArrayListMultimap;
055import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
056import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
057
058/**
059 * The base class for load balancers. It provides the the functions used to by
060 * {@link org.apache.hadoop.hbase.master.assignment.AssignmentManager} to assign regions in the edge
061 * cases. It doesn't provide an implementation of the actual balancing algorithm.
062 */
063@InterfaceAudience.Private
064@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "IS2_INCONSISTENT_SYNC",
065    justification = "All the unsynchronized access is before initialization")
066public abstract class BaseLoadBalancer implements LoadBalancer {
067
068  private static final Logger LOG = LoggerFactory.getLogger(BaseLoadBalancer.class);
069
070  public static final String BALANCER_DECISION_BUFFER_ENABLED =
071    "hbase.master.balancer.decision.buffer.enabled";
072  public static final boolean DEFAULT_BALANCER_DECISION_BUFFER_ENABLED = false;
073
074  public static final String BALANCER_REJECTION_BUFFER_ENABLED =
075    "hbase.master.balancer.rejection.buffer.enabled";
076  public static final boolean DEFAULT_BALANCER_REJECTION_BUFFER_ENABLED = false;
077
078  public static final boolean DEFAULT_HBASE_MASTER_LOADBALANCE_BYTABLE = false;
079
080  protected static final int MIN_SERVER_BALANCE = 2;
081  private volatile boolean stopped = false;
082
083  private static final Predicate<ServerMetrics> IDLE_SERVER_PREDICATOR =
084    load -> load.getRegionMetrics().isEmpty();
085
086  protected volatile RegionLocationFinder regionFinder;
087  protected boolean useRegionFinder;
088  protected boolean isByTable = DEFAULT_HBASE_MASTER_LOADBALANCE_BYTABLE;
089
090  // slop for regions
091  protected float slop;
092  protected volatile RackManager rackManager;
093  protected MetricsBalancer metricsBalancer = null;
094  protected ClusterMetrics clusterStatus = null;
095  protected ServerName masterServerName;
096  protected MasterServices services;
097
098  /**
099   * @deprecated since 2.4.0, will be removed in 3.0.0.
100   * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a>
101   */
102  @Deprecated
103  protected boolean onlySystemTablesOnMaster;
104
105  /**
106   * The constructor that uses the basic MetricsBalancer
107   */
108  protected BaseLoadBalancer() {
109    this(null);
110  }
111
112  /**
113   * This Constructor accepts an instance of MetricsBalancer, which will be used instead of creating
114   * a new one
115   */
116  protected BaseLoadBalancer(MetricsBalancer metricsBalancer) {
117    this.metricsBalancer = (metricsBalancer != null) ? metricsBalancer : new MetricsBalancer();
118  }
119
120  protected final Configuration getConf() {
121    return services.getConfiguration();
122  }
123
124  /**
125   * Check if a region belongs to some system table. If so, the primary replica may be expected to
126   * be put on the master regionserver.
127   * @deprecated since 2.4.0, will be removed in 3.0.0.
128   * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a>
129   */
130  @Deprecated
131  public boolean shouldBeOnMaster(RegionInfo region) {
132    return this.onlySystemTablesOnMaster && region.getTable().isSystemTable();
133  }
134
135  /**
136   * Balance the regions that should be on master regionserver.
137   * @deprecated since 2.4.0, will be removed in 3.0.0.
138   * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a>
139   */
140  @Deprecated
141  protected List<RegionPlan> balanceMasterRegions(Map<ServerName, List<RegionInfo>> clusterMap) {
142    if (masterServerName == null || clusterMap == null || clusterMap.size() <= 1) return null;
143    List<RegionPlan> plans = null;
144    List<RegionInfo> regions = clusterMap.get(masterServerName);
145    if (regions != null) {
146      Iterator<ServerName> keyIt = null;
147      for (RegionInfo region : regions) {
148        if (shouldBeOnMaster(region)) continue;
149
150        // Find a non-master regionserver to host the region
151        if (keyIt == null || !keyIt.hasNext()) {
152          keyIt = clusterMap.keySet().iterator();
153        }
154        ServerName dest = keyIt.next();
155        if (masterServerName.equals(dest)) {
156          if (!keyIt.hasNext()) {
157            keyIt = clusterMap.keySet().iterator();
158          }
159          dest = keyIt.next();
160        }
161
162        // Move this region away from the master regionserver
163        RegionPlan plan = new RegionPlan(region, masterServerName, dest);
164        if (plans == null) {
165          plans = new ArrayList<>();
166        }
167        plans.add(plan);
168      }
169    }
170    for (Map.Entry<ServerName, List<RegionInfo>> server : clusterMap.entrySet()) {
171      if (masterServerName.equals(server.getKey())) continue;
172      for (RegionInfo region : server.getValue()) {
173        if (!shouldBeOnMaster(region)) continue;
174
175        // Move this region to the master regionserver
176        RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName);
177        if (plans == null) {
178          plans = new ArrayList<>();
179        }
180        plans.add(plan);
181      }
182    }
183    return plans;
184  }
185
186  /**
187   * If master is configured to carry system tables only, in here is where we figure what to assign
188   * it.
189   * @deprecated since 2.4.0, will be removed in 3.0.0.
190   * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a>
191   */
192  @Deprecated
193  @NonNull
194  protected Map<ServerName, List<RegionInfo>>
195    assignMasterSystemRegions(Collection<RegionInfo> regions, List<ServerName> servers) {
196    Map<ServerName, List<RegionInfo>> assignments = new TreeMap<>();
197    if (this.onlySystemTablesOnMaster) {
198      if (masterServerName != null && servers.contains(masterServerName)) {
199        assignments.put(masterServerName, new ArrayList<>());
200        for (RegionInfo region : regions) {
201          if (shouldBeOnMaster(region)) {
202            assignments.get(masterServerName).add(region);
203          }
204        }
205      }
206    }
207    return assignments;
208  }
209
210  @Override
211  public synchronized void updateClusterMetrics(ClusterMetrics st) {
212    this.clusterStatus = st;
213    if (useRegionFinder) {
214      regionFinder.setClusterMetrics(st);
215    }
216  }
217
218  @Override
219  public void setMasterServices(MasterServices masterServices) {
220    masterServerName = masterServices.getServerName();
221    this.services = masterServices;
222  }
223
224  @Override
225  public synchronized void postMasterStartupInitialize() {
226    if (services != null && regionFinder != null) {
227      try {
228        Set<RegionInfo> regions =
229          services.getAssignmentManager().getRegionStates().getRegionAssignments().keySet();
230        regionFinder.refreshAndWait(regions);
231      } catch (Exception e) {
232        LOG.warn("Refreshing region HDFS Block dist failed with exception, ignoring", e);
233      }
234    }
235  }
236
237  protected final boolean idleRegionServerExist(BalancerClusterState c) {
238    boolean isServerExistsWithMoreRegions = false;
239    boolean isServerExistsWithZeroRegions = false;
240    for (int[] serverList : c.regionsPerServer) {
241      if (serverList.length > 1) {
242        isServerExistsWithMoreRegions = true;
243      }
244      if (serverList.length == 0) {
245        isServerExistsWithZeroRegions = true;
246      }
247    }
248    return isServerExistsWithMoreRegions && isServerExistsWithZeroRegions;
249  }
250
251  protected final boolean sloppyRegionServerExist(ClusterLoadState cs) {
252    if (slop < 0) {
253      LOG.debug("Slop is less than zero, not checking for sloppiness.");
254      return false;
255    }
256    float average = cs.getLoadAverage(); // for logging
257    int floor = (int) Math.floor(average * (1 - slop));
258    int ceiling = (int) Math.ceil(average * (1 + slop));
259    if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
260      NavigableMap<ServerAndLoad, List<RegionInfo>> serversByLoad = cs.getServersByLoad();
261      if (LOG.isTraceEnabled()) {
262        // If nothing to balance, then don't say anything unless trace-level logging.
263        LOG.trace("Skipping load balancing because balanced cluster; " + "servers="
264          + cs.getNumServers() + " regions=" + cs.getNumRegions() + " average=" + average
265          + " mostloaded=" + serversByLoad.lastKey().getLoad() + " leastloaded="
266          + serversByLoad.firstKey().getLoad());
267      }
268      return false;
269    }
270    return true;
271  }
272
273  /**
274   * Generates a bulk assignment plan to be used on cluster startup using a simple round-robin
275   * assignment.
276   * <p/>
277   * Takes a list of all the regions and all the servers in the cluster and returns a map of each
278   * server to the regions that it should be assigned.
279   * <p/>
280   * Currently implemented as a round-robin assignment. Same invariant as load balancing, all
281   * servers holding floor(avg) or ceiling(avg). TODO: Use block locations from HDFS to place
282   * regions with their blocks
283   * @param regions all regions
284   * @param servers all servers
285   * @return map of server to the regions it should take, or emptyMap if no assignment is possible
286   *         (ie. no servers)
287   */
288  @Override
289  @NonNull
290  public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions,
291    List<ServerName> servers) throws HBaseIOException {
292    metricsBalancer.incrMiscInvocations();
293    Map<ServerName, List<RegionInfo>> assignments = assignMasterSystemRegions(regions, servers);
294    if (!assignments.isEmpty()) {
295      servers = new ArrayList<>(servers);
296      // Guarantee not to put other regions on master
297      servers.remove(masterServerName);
298      List<RegionInfo> masterRegions = assignments.get(masterServerName);
299      if (!masterRegions.isEmpty()) {
300        regions = new ArrayList<>(regions);
301        regions.removeAll(masterRegions);
302      }
303    }
304    /**
305     * only need assign system table
306     */
307    if (regions.isEmpty()) {
308      return assignments;
309    }
310
311    int numServers = servers == null ? 0 : servers.size();
312    if (numServers == 0) {
313      LOG.warn("Wanted to do round robin assignment but no servers to assign to");
314      return Collections.singletonMap(BOGUS_SERVER_NAME, new ArrayList<>(regions));
315    }
316
317    // TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the
318    // normal LB.balancerCluster() with unassignedRegions. We only need to have a candidate
319    // generator for AssignRegionAction. The LB will ensure the regions are mostly local
320    // and balanced. This should also run fast with fewer number of iterations.
321
322    if (numServers == 1) { // Only one server, nothing fancy we can do here
323      ServerName server = servers.get(0);
324      assignments.put(server, new ArrayList<>(regions));
325      return assignments;
326    }
327
328    BalancerClusterState cluster = createCluster(servers, regions);
329    roundRobinAssignment(cluster, regions, servers, assignments);
330    return assignments;
331  }
332
333  private BalancerClusterState createCluster(List<ServerName> servers,
334    Collection<RegionInfo> regions) throws HBaseIOException {
335    boolean hasRegionReplica = false;
336    try {
337      if (services != null && services.getTableDescriptors() != null) {
338        Map<String, TableDescriptor> tds = services.getTableDescriptors().getAll();
339        for (RegionInfo regionInfo : regions) {
340          TableDescriptor td = tds.get(regionInfo.getTable().getNameWithNamespaceInclAsString());
341          if (td != null && td.getRegionReplication() > 1) {
342            hasRegionReplica = true;
343            break;
344          }
345        }
346      }
347    } catch (IOException ioe) {
348      throw new HBaseIOException(ioe);
349    }
350
351    // Get the snapshot of the current assignments for the regions in question, and then create
352    // a cluster out of it. Note that we might have replicas already assigned to some servers
353    // earlier. So we want to get the snapshot to see those assignments, but this will only contain
354    // replicas of the regions that are passed (for performance).
355    Map<ServerName, List<RegionInfo>> clusterState = null;
356    if (!hasRegionReplica) {
357      clusterState = getRegionAssignmentsByServer(regions);
358    } else {
359      // for the case where we have region replica it is better we get the entire cluster's snapshot
360      clusterState = getRegionAssignmentsByServer(null);
361    }
362
363    for (ServerName server : servers) {
364      if (!clusterState.containsKey(server)) {
365        clusterState.put(server, Collections.emptyList());
366      }
367    }
368    return new BalancerClusterState(regions, clusterState, null, this.regionFinder, rackManager);
369  }
370
371  private List<ServerName> findIdleServers(List<ServerName> servers) {
372    return this.services.getServerManager().getOnlineServersListWithPredicator(servers,
373      IDLE_SERVER_PREDICATOR);
374  }
375
376  /**
377   * Used to assign a single region to a random server.
378   */
379  @Override
380  public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers)
381    throws HBaseIOException {
382    metricsBalancer.incrMiscInvocations();
383    if (servers != null && servers.contains(masterServerName)) {
384      if (shouldBeOnMaster(regionInfo)) {
385        return masterServerName;
386      }
387      if (!LoadBalancer.isTablesOnMaster(getConf())) {
388        // Guarantee we do not put any regions on master
389        servers = new ArrayList<>(servers);
390        servers.remove(masterServerName);
391      }
392    }
393
394    int numServers = servers == null ? 0 : servers.size();
395    if (numServers == 0) {
396      LOG.warn("Wanted to retain assignment but no servers to assign to");
397      return null;
398    }
399    if (numServers == 1) { // Only one server, nothing fancy we can do here
400      return servers.get(0);
401    }
402    List<ServerName> idleServers = findIdleServers(servers);
403    if (idleServers.size() == 1) {
404      return idleServers.get(0);
405    }
406    final List<ServerName> finalServers = idleServers.isEmpty() ? servers : idleServers;
407    List<RegionInfo> regions = Lists.newArrayList(regionInfo);
408    BalancerClusterState cluster = createCluster(finalServers, regions);
409    return randomAssignment(cluster, regionInfo, finalServers);
410  }
411
412  /**
413   * Generates a bulk assignment startup plan, attempting to reuse the existing assignment
414   * information from META, but adjusting for the specified list of available/online servers
415   * available for assignment.
416   * <p>
417   * Takes a map of all regions to their existing assignment from META. Also takes a list of online
418   * servers for regions to be assigned to. Attempts to retain all assignment, so in some instances
419   * initial assignment will not be completely balanced.
420   * <p>
421   * Any leftover regions without an existing server to be assigned to will be assigned randomly to
422   * available servers.
423   * @param regions regions and existing assignment from meta
424   * @param servers available servers
425   * @return map of servers and regions to be assigned to them, or emptyMap if no assignment is
426   *         possible (ie. no servers)
427   */
428  @Override
429  @NonNull
430  public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, ServerName> regions,
431    List<ServerName> servers) throws HBaseIOException {
432    // Update metrics
433    metricsBalancer.incrMiscInvocations();
434    Map<ServerName, List<RegionInfo>> assignments =
435      assignMasterSystemRegions(regions.keySet(), servers);
436    if (!assignments.isEmpty()) {
437      servers = new ArrayList<>(servers);
438      // Guarantee not to put other regions on master
439      servers.remove(masterServerName);
440      List<RegionInfo> masterRegions = assignments.get(masterServerName);
441      regions = regions.entrySet().stream().filter(e -> !masterRegions.contains(e.getKey()))
442        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
443    }
444    if (regions.isEmpty()) {
445      return assignments;
446    }
447
448    int numServers = servers == null ? 0 : servers.size();
449    if (numServers == 0) {
450      LOG.warn("Wanted to do retain assignment but no servers to assign to");
451      return Collections.singletonMap(BOGUS_SERVER_NAME, new ArrayList<>(regions.keySet()));
452    }
453    if (numServers == 1) { // Only one server, nothing fancy we can do here
454      ServerName server = servers.get(0);
455      assignments.put(server, new ArrayList<>(regions.keySet()));
456      return assignments;
457    }
458
459    // Group all of the old assignments by their hostname.
460    // We can't group directly by ServerName since the servers all have
461    // new start-codes.
462
463    // Group the servers by their hostname. It's possible we have multiple
464    // servers on the same host on different ports.
465    ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
466    for (ServerName server : servers) {
467      assignments.put(server, new ArrayList<>());
468      serversByHostname.put(server.getHostnameLowerCase(), server);
469    }
470
471    // Collection of the hostnames that used to have regions
472    // assigned, but for which we no longer have any RS running
473    // after the cluster restart.
474    Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
475
476    // If the old servers aren't present, lets assign those regions later.
477    List<RegionInfo> randomAssignRegions = Lists.newArrayList();
478
479    int numRandomAssignments = 0;
480    int numRetainedAssigments = 0;
481    for (Map.Entry<RegionInfo, ServerName> entry : regions.entrySet()) {
482      RegionInfo region = entry.getKey();
483      ServerName oldServerName = entry.getValue();
484      List<ServerName> localServers = new ArrayList<>();
485      if (oldServerName != null) {
486        localServers = serversByHostname.get(oldServerName.getHostnameLowerCase());
487      }
488      if (localServers.isEmpty()) {
489        // No servers on the new cluster match up with this hostname, assign randomly, later.
490        randomAssignRegions.add(region);
491        if (oldServerName != null) {
492          oldHostsNoLongerPresent.add(oldServerName.getHostnameLowerCase());
493        }
494      } else if (localServers.size() == 1) {
495        // the usual case - one new server on same host
496        ServerName target = localServers.get(0);
497        assignments.get(target).add(region);
498        numRetainedAssigments++;
499      } else {
500        // multiple new servers in the cluster on this same host
501        if (localServers.contains(oldServerName)) {
502          assignments.get(oldServerName).add(region);
503          numRetainedAssigments++;
504        } else {
505          ServerName target = null;
506          for (ServerName tmp : localServers) {
507            if (tmp.getPort() == oldServerName.getPort()) {
508              target = tmp;
509              assignments.get(tmp).add(region);
510              numRetainedAssigments++;
511              break;
512            }
513          }
514          if (target == null) {
515            randomAssignRegions.add(region);
516          }
517        }
518      }
519    }
520
521    // If servers from prior assignment aren't present, then lets do randomAssignment on regions.
522    if (randomAssignRegions.size() > 0) {
523      BalancerClusterState cluster = createCluster(servers, regions.keySet());
524      for (Map.Entry<ServerName, List<RegionInfo>> entry : assignments.entrySet()) {
525        ServerName sn = entry.getKey();
526        for (RegionInfo region : entry.getValue()) {
527          cluster.doAssignRegion(region, sn);
528        }
529      }
530      for (RegionInfo region : randomAssignRegions) {
531        ServerName target = randomAssignment(cluster, region, servers);
532        assignments.get(target).add(region);
533        numRandomAssignments++;
534      }
535    }
536
537    String randomAssignMsg = "";
538    if (numRandomAssignments > 0) {
539      randomAssignMsg = numRandomAssignments + " regions were assigned "
540        + "to random hosts, since the old hosts for these regions are no "
541        + "longer present in the cluster. These hosts were:\n  "
542        + Joiner.on("\n  ").join(oldHostsNoLongerPresent);
543    }
544
545    LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
546      + " retained the pre-restart assignment. " + randomAssignMsg);
547    return assignments;
548  }
549
550  protected float getDefaultSlop() {
551    return 0.2f;
552  }
553
554  private RegionLocationFinder createRegionLocationFinder(Configuration conf) {
555    RegionLocationFinder finder = new RegionLocationFinder();
556    finder.setConf(conf);
557    finder.setServices(services);
558    return finder;
559  }
560
561  protected void loadConf(Configuration conf) {
562    this.slop = conf.getFloat("hbase.regions.slop", getDefaultSlop());
563    this.rackManager = new RackManager(getConf());
564    this.onlySystemTablesOnMaster = LoadBalancer.isSystemTablesOnlyOnMaster(conf);
565    useRegionFinder = conf.getBoolean("hbase.master.balancer.uselocality", true);
566    if (useRegionFinder) {
567      regionFinder = createRegionLocationFinder(conf);
568    } else {
569      regionFinder = null;
570    }
571    this.isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE,
572      DEFAULT_HBASE_MASTER_LOADBALANCE_BYTABLE);
573    // Print out base configs. Don't print overallSlop since it for simple balancer exclusively.
574    LOG.info("slop={}", this.slop);
575  }
576
577  @Override
578  public void initialize() {
579    loadConf(getConf());
580  }
581
582  @Override
583  public void regionOnline(RegionInfo regionInfo, ServerName sn) {
584  }
585
586  @Override
587  public void regionOffline(RegionInfo regionInfo) {
588  }
589
590  @Override
591  public boolean isStopped() {
592    return stopped;
593  }
594
595  @Override
596  public void stop(String why) {
597    LOG.info("Load Balancer stop requested: {}", why);
598    stopped = true;
599  }
600
601  /**
602   * Updates the balancer status tag reported to JMX
603   */
604  public void updateBalancerStatus(boolean status) {
605    metricsBalancer.balancerStatus(status);
606  }
607
608  /**
609   * Used to assign a single region to a random server.
610   */
611  private ServerName randomAssignment(BalancerClusterState cluster, RegionInfo regionInfo,
612    List<ServerName> servers) {
613    int numServers = servers.size(); // servers is not null, numServers > 1
614    ServerName sn = null;
615    final int maxIterations = numServers * 4;
616    int iterations = 0;
617    List<ServerName> usedSNs = new ArrayList<>(servers.size());
618    Random rand = ThreadLocalRandom.current();
619    do {
620      int i = rand.nextInt(numServers);
621      sn = servers.get(i);
622      if (!usedSNs.contains(sn)) {
623        usedSNs.add(sn);
624      }
625    } while (cluster.wouldLowerAvailability(regionInfo, sn) && iterations++ < maxIterations);
626    if (iterations >= maxIterations) {
627      // We have reached the max. Means the servers that we collected is still lowering the
628      // availability
629      for (ServerName unusedServer : servers) {
630        if (!usedSNs.contains(unusedServer)) {
631          // check if any other unused server is there for us to use.
632          // If so use it. Else we have not other go but to go with one of them
633          if (!cluster.wouldLowerAvailability(regionInfo, unusedServer)) {
634            sn = unusedServer;
635            break;
636          }
637        }
638      }
639    }
640    cluster.doAssignRegion(regionInfo, sn);
641    return sn;
642  }
643
644  /**
645   * Round robin a list of regions to a list of servers
646   */
647  private void roundRobinAssignment(BalancerClusterState cluster, List<RegionInfo> regions,
648    List<ServerName> servers, Map<ServerName, List<RegionInfo>> assignments) {
649    Random rand = ThreadLocalRandom.current();
650    List<RegionInfo> unassignedRegions = new ArrayList<>();
651    int numServers = servers.size();
652    int numRegions = regions.size();
653    int max = (int) Math.ceil((float) numRegions / numServers);
654    int serverIdx = 0;
655    if (numServers > 1) {
656      serverIdx = rand.nextInt(numServers);
657    }
658    int regionIdx = 0;
659    for (int j = 0; j < numServers; j++) {
660      ServerName server = servers.get((j + serverIdx) % numServers);
661      List<RegionInfo> serverRegions = new ArrayList<>(max);
662      for (int i = regionIdx; i < numRegions; i += numServers) {
663        RegionInfo region = regions.get(i % numRegions);
664        if (cluster.wouldLowerAvailability(region, server)) {
665          unassignedRegions.add(region);
666        } else {
667          serverRegions.add(region);
668          cluster.doAssignRegion(region, server);
669        }
670      }
671      assignments.put(server, serverRegions);
672      regionIdx++;
673    }
674
675    List<RegionInfo> lastFewRegions = new ArrayList<>();
676    // assign the remaining by going through the list and try to assign to servers one-by-one
677    serverIdx = rand.nextInt(numServers);
678    for (RegionInfo region : unassignedRegions) {
679      boolean assigned = false;
680      for (int j = 0; j < numServers; j++) { // try all servers one by one
681        ServerName server = servers.get((j + serverIdx) % numServers);
682        if (cluster.wouldLowerAvailability(region, server)) {
683          continue;
684        } else {
685          assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region);
686          cluster.doAssignRegion(region, server);
687          serverIdx = (j + serverIdx + 1) % numServers; // remain from next server
688          assigned = true;
689          break;
690        }
691      }
692      if (!assigned) {
693        lastFewRegions.add(region);
694      }
695    }
696    // just sprinkle the rest of the regions on random regionservers. The balanceCluster will
697    // make it optimal later. we can end up with this if numReplicas > numServers.
698    for (RegionInfo region : lastFewRegions) {
699      int i = rand.nextInt(numServers);
700      ServerName server = servers.get(i);
701      assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region);
702      cluster.doAssignRegion(region, server);
703    }
704  }
705
706  private Map<ServerName, List<RegionInfo>>
707    getRegionAssignmentsByServer(Collection<RegionInfo> regions) {
708    if (this.services != null && this.services.getAssignmentManager() != null) {
709      return this.services.getAssignmentManager().getSnapShotOfAssignment(regions);
710    } else {
711      return new HashMap<>();
712    }
713  }
714
715  protected final Map<ServerName, List<RegionInfo>>
716    toEnsumbleTableLoad(Map<TableName, Map<ServerName, List<RegionInfo>>> LoadOfAllTable) {
717    Map<ServerName, List<RegionInfo>> returnMap = new TreeMap<>();
718    for (Map<ServerName, List<RegionInfo>> serverNameListMap : LoadOfAllTable.values()) {
719      serverNameListMap.forEach((serverName, regionInfoList) -> {
720        List<RegionInfo> regionInfos =
721          returnMap.computeIfAbsent(serverName, k -> new ArrayList<>());
722        regionInfos.addAll(regionInfoList);
723      });
724    }
725    return returnMap;
726  }
727
728  /**
729   * Perform the major balance operation for table, all sub classes should override this method.
730   * <p/>
731   * Will be invoked by {@link #balanceCluster(Map)}. If
732   * {@link HConstants#HBASE_MASTER_LOADBALANCE_BYTABLE} is enabled, we will call this method
733   * multiple times, one table a time, where we will only pass in the regions for a single table
734   * each time. If not, we will pass in all the regions at once, and the {@code tableName} will be
735   * {@link HConstants#ENSEMBLE_TABLE_NAME}.
736   * @param tableName      the table to be balanced
737   * @param loadOfOneTable region load of servers for the specific one table
738   * @return List of plans
739   */
740  protected abstract List<RegionPlan> balanceTable(TableName tableName,
741    Map<ServerName, List<RegionInfo>> loadOfOneTable);
742
743  /**
744   * Called before actually executing balanceCluster. The sub classes could override this method to
745   * do some initialization work.
746   */
747  protected void
748    preBalanceCluster(Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable) {
749  }
750
751  /**
752   * Perform the major balance operation for cluster, will invoke
753   * {@link #balanceTable(TableName, Map)} to do actual balance.
754   * <p/>
755   * THIs method is marked as final which means you should not override this method. See the javadoc
756   * for {@link #balanceTable(TableName, Map)} for more details.
757   * @param loadOfAllTable region load of servers for all table
758   * @return a list of regions to be moved, including source and destination, or null if cluster is
759   *         already balanced
760   * @see #balanceTable(TableName, Map)
761   */
762  @Override
763  public synchronized final List<RegionPlan>
764    balanceCluster(Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable) {
765    preBalanceCluster(loadOfAllTable);
766    if (isByTable) {
767      List<RegionPlan> result = new ArrayList<>();
768      loadOfAllTable.forEach((tableName, loadOfOneTable) -> {
769        LOG.info("Start Generate Balance plan for table: " + tableName);
770        List<RegionPlan> partialPlans = balanceTable(tableName, loadOfOneTable);
771        if (partialPlans != null) {
772          result.addAll(partialPlans);
773        }
774      });
775      return result;
776    } else {
777      LOG.debug("Start Generate Balance plan for cluster.");
778      return balanceTable(HConstants.ENSEMBLE_TABLE_NAME, toEnsumbleTableLoad(loadOfAllTable));
779    }
780  }
781
782  @Override
783  public synchronized void onConfigurationChange(Configuration conf) {
784    loadConf(conf);
785  }
786}