View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Collection;
23  import java.util.Collections;
24  import java.util.Comparator;
25  import java.util.Deque;
26  import java.util.HashMap;
27  import java.util.HashSet;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.Map.Entry;
32  import java.util.NavigableMap;
33  import java.util.Random;
34  import java.util.Set;
35  import java.util.TreeMap;
36  
37  import org.apache.commons.lang.NotImplementedException;
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.conf.Configuration;
41  import org.apache.hadoop.hbase.ClusterStatus;
42  import org.apache.hadoop.hbase.HBaseIOException;
43  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
44  import org.apache.hadoop.hbase.HRegionInfo;
45  import org.apache.hadoop.hbase.RegionLoad;
46  import org.apache.hadoop.hbase.ServerName;
47  import org.apache.hadoop.hbase.TableName;
48  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
49  import org.apache.hadoop.hbase.master.LoadBalancer;
50  import org.apache.hadoop.hbase.master.MasterServices;
51  import org.apache.hadoop.hbase.master.RackManager;
52  import org.apache.hadoop.hbase.master.RegionPlan;
53  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
54  import org.apache.hadoop.hbase.security.access.AccessControlLists;
55  import org.apache.hadoop.util.StringUtils;
56  
57  import com.google.common.base.Joiner;
58  import com.google.common.collect.ArrayListMultimap;
59  import com.google.common.collect.Lists;
60  import com.google.common.collect.Sets;
61  
62  /**
63   * The base class for load balancers. It provides the the functions used to by
64   * {@link org.apache.hadoop.hbase.master.AssignmentManager} to assign regions
65   * in the edge cases. It doesn't provide an implementation of the
66   * actual balancing algorithm.
67   *
68   */
69  public abstract class BaseLoadBalancer implements LoadBalancer {
70    private static final int MIN_SERVER_BALANCE = 2;
71    private volatile boolean stopped = false;
72  
73    private static final List<HRegionInfo> EMPTY_REGION_LIST = new ArrayList<HRegionInfo>(0);
74  
75    protected final RegionLocationFinder regionFinder = new RegionLocationFinder();
76  
77    private static class DefaultRackManager extends RackManager {
78      @Override
79      public String getRack(ServerName server) {
80        return UNKNOWN_RACK;
81      }
82    }
83    
84    /**
85     * The constructor that uses the basic MetricsBalancer
86     */
87    protected BaseLoadBalancer() {
88      metricsBalancer = new MetricsBalancer();
89    }
90    
91    /**
92     * This Constructor accepts an instance of MetricsBalancer, 
93     * which will be used instead of creating a new one
94     */
95    protected BaseLoadBalancer(MetricsBalancer metricsBalancer) {
96      this.metricsBalancer = (metricsBalancer != null) ? metricsBalancer : new MetricsBalancer();
97    }
98  
99    /**
100    * An efficient array based implementation similar to ClusterState for keeping
101    * the status of the cluster in terms of region assignment and distribution.
102    * LoadBalancers, such as StochasticLoadBalancer uses this Cluster object because of
103    * hundreds of thousands of hashmap manipulations are very costly, which is why this
104    * class uses mostly indexes and arrays.
105    *
106    * Cluster tracks a list of unassigned regions, region assignments, and the server
107    * topology in terms of server names, hostnames and racks.
108    */
109   protected static class Cluster {
110     ServerName[] servers;
111     String[] hosts; // ServerName uniquely identifies a region server. multiple RS can run on the same host
112     String[] racks;
113     boolean multiServersPerHost = false; // whether or not any host has more than one server
114 
115     ArrayList<String> tables;
116     HRegionInfo[] regions;
117     Deque<RegionLoad>[] regionLoads;
118     private RegionLocationFinder regionFinder;
119 
120     int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality
121 
122     int[]   serverIndexToHostIndex;      //serverIndex -> host index
123     int[]   serverIndexToRackIndex;      //serverIndex -> rack index
124 
125     int[][] regionsPerServer;            //serverIndex -> region list
126     int[][] regionsPerHost;              //hostIndex -> list of regions
127     int[][] regionsPerRack;              //rackIndex -> region list
128     int[][] primariesOfRegionsPerServer; //serverIndex -> sorted list of regions by primary region index
129     int[][] primariesOfRegionsPerHost;   //hostIndex -> sorted list of regions by primary region index
130     int[][] primariesOfRegionsPerRack;   //rackIndex -> sorted list of regions by primary region index
131 
132     int[][] serversPerHost;              //hostIndex -> list of server indexes
133     int[][] serversPerRack;              //rackIndex -> list of server indexes
134     int[]   regionIndexToServerIndex;    //regionIndex -> serverIndex
135     int[]   initialRegionIndexToServerIndex;    //regionIndex -> serverIndex (initial cluster state)
136     int[]   regionIndexToTableIndex;     //regionIndex -> tableIndex
137     int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions
138     int[]   numMaxRegionsPerTable;       //tableIndex -> max number of regions in a single RS
139     int[]   regionIndexToPrimaryIndex;   //regionIndex -> regionIndex of the primary
140     boolean hasRegionReplicas = false;   //whether there is regions with replicas
141 
142     Integer[] serverIndicesSortedByRegionCount;
143     Integer[] serverIndicesSortedByLocality;
144 
145     Map<String, Integer> serversToIndex;
146     Map<String, Integer> hostsToIndex;
147     Map<String, Integer> racksToIndex;
148     Map<String, Integer> tablesToIndex;
149     Map<HRegionInfo, Integer> regionsToIndex;
150     float[] localityPerServer;
151 
152     int numServers;
153     int numHosts;
154     int numRacks;
155     int numTables;
156     int numRegions;
157 
158     int numMovedRegions = 0; //num moved regions from the initial configuration
159     Map<ServerName, List<HRegionInfo>> clusterState;
160 
161     protected final RackManager rackManager;
162 
163     protected Cluster(
164         Map<ServerName, List<HRegionInfo>> clusterState,
165         Map<String, Deque<RegionLoad>> loads,
166         RegionLocationFinder regionFinder,
167         RackManager rackManager) {
168       this(null, clusterState, loads, regionFinder,
169         rackManager);
170     }
171 
172     @SuppressWarnings("unchecked")
173     protected Cluster(
174         Collection<HRegionInfo> unassignedRegions,
175         Map<ServerName, List<HRegionInfo>> clusterState,
176         Map<String, Deque<RegionLoad>> loads,
177         RegionLocationFinder regionFinder,
178         RackManager rackManager) {
179 
180       if (unassignedRegions == null) {
181         unassignedRegions = EMPTY_REGION_LIST;
182       }
183 
184       serversToIndex = new HashMap<String, Integer>();
185       hostsToIndex = new HashMap<String, Integer>();
186       racksToIndex = new HashMap<String, Integer>();
187       tablesToIndex = new HashMap<String, Integer>();
188 
189       //TODO: We should get the list of tables from master
190       tables = new ArrayList<String>();
191       this.rackManager = rackManager != null ? rackManager : new DefaultRackManager();
192 
193       numRegions = 0;
194 
195       List<List<Integer>> serversPerHostList = new ArrayList<List<Integer>>();
196       List<List<Integer>> serversPerRackList = new ArrayList<List<Integer>>();
197       this.clusterState = clusterState;
198       this.regionFinder = regionFinder;
199 
200       // Use servername and port as there can be dead servers in this list. We want everything with
201       // a matching hostname and port to have the same index.
202       for (ServerName sn : clusterState.keySet()) {
203         if (serversToIndex.get(sn.getHostAndPort()) == null) {
204           serversToIndex.put(sn.getHostAndPort(), numServers++);
205         }
206         if (!hostsToIndex.containsKey(sn.getHostname())) {
207           hostsToIndex.put(sn.getHostname(), numHosts++);
208           serversPerHostList.add(new ArrayList<Integer>(1));
209         }
210 
211         int serverIndex = serversToIndex.get(sn.getHostAndPort());
212         int hostIndex = hostsToIndex.get(sn.getHostname());
213         serversPerHostList.get(hostIndex).add(serverIndex);
214 
215         String rack = this.rackManager.getRack(sn);
216         if (!racksToIndex.containsKey(rack)) {
217           racksToIndex.put(rack, numRacks++);
218           serversPerRackList.add(new ArrayList<Integer>());
219         }
220         int rackIndex = racksToIndex.get(rack);
221         serversPerRackList.get(rackIndex).add(serverIndex);
222       }
223 
224       // Count how many regions there are.
225       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
226         numRegions += entry.getValue().size();
227       }
228       numRegions += unassignedRegions.size();
229 
230       regionsToIndex = new HashMap<HRegionInfo, Integer>(numRegions);
231       servers = new ServerName[numServers];
232       serversPerHost = new int[numHosts][];
233       serversPerRack = new int[numRacks][];
234       regions = new HRegionInfo[numRegions];
235       regionIndexToServerIndex = new int[numRegions];
236       initialRegionIndexToServerIndex = new int[numRegions];
237       regionIndexToTableIndex = new int[numRegions];
238       regionIndexToPrimaryIndex = new int[numRegions];
239       regionLoads = new Deque[numRegions];
240       regionLocations = new int[numRegions][];
241       serverIndicesSortedByRegionCount = new Integer[numServers];
242       serverIndicesSortedByLocality = new Integer[numServers];
243       localityPerServer = new float[numServers];
244 
245       serverIndexToHostIndex = new int[numServers];
246       serverIndexToRackIndex = new int[numServers];
247       regionsPerServer = new int[numServers][];
248       regionsPerHost = new int[numHosts][];
249       regionsPerRack = new int[numRacks][];
250       primariesOfRegionsPerServer = new int[numServers][];
251       primariesOfRegionsPerHost = new int[numHosts][];
252       primariesOfRegionsPerRack = new int[numRacks][];
253 
254       int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
255 
256       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
257         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
258 
259         // keep the servername if this is the first server name for this hostname
260         // or this servername has the newest startcode.
261         if (servers[serverIndex] == null ||
262             servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
263           servers[serverIndex] = entry.getKey();
264         }
265 
266         if (regionsPerServer[serverIndex] != null) {
267           // there is another server with the same hostAndPort in ClusterState.
268           // allocate the array for the total size
269           regionsPerServer[serverIndex] = new int[entry.getValue().size() + regionsPerServer[serverIndex].length];
270         } else {
271           regionsPerServer[serverIndex] = new int[entry.getValue().size()];
272         }
273         primariesOfRegionsPerServer[serverIndex] = new int[regionsPerServer[serverIndex].length];
274         serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
275         serverIndicesSortedByLocality[serverIndex] = serverIndex;
276       }
277 
278       hosts = new String[numHosts];
279       for (Entry<String, Integer> entry : hostsToIndex.entrySet()) {
280         hosts[entry.getValue()] = entry.getKey();
281       }
282       racks = new String[numRacks];
283       for (Entry<String, Integer> entry : racksToIndex.entrySet()) {
284         racks[entry.getValue()] = entry.getKey();
285       }
286 
287       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
288         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
289         regionPerServerIndex = 0;
290 
291         int hostIndex = hostsToIndex.get(entry.getKey().getHostname());
292         serverIndexToHostIndex[serverIndex] = hostIndex;
293 
294         int rackIndex = racksToIndex.get(this.rackManager.getRack(entry.getKey()));
295         serverIndexToRackIndex[serverIndex] = rackIndex;
296 
297         for (HRegionInfo region : entry.getValue()) {
298           registerRegion(region, regionIndex, serverIndex, loads, regionFinder);
299 
300           regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
301           regionIndex++;
302         }
303       }
304       for (HRegionInfo region : unassignedRegions) {
305         registerRegion(region, regionIndex, -1, loads, regionFinder);
306         regionIndex++;
307       }
308 
309       for (int i = 0; i < serversPerHostList.size(); i++) {
310         serversPerHost[i] = new int[serversPerHostList.get(i).size()];
311         for (int j = 0; j < serversPerHost[i].length; j++) {
312           serversPerHost[i][j] = serversPerHostList.get(i).get(j);
313         }
314         if (serversPerHost[i].length > 1) {
315           multiServersPerHost = true;
316         }
317       }
318 
319       for (int i = 0; i < serversPerRackList.size(); i++) {
320         serversPerRack[i] = new int[serversPerRackList.get(i).size()];
321         for (int j = 0; j < serversPerRack[i].length; j++) {
322           serversPerRack[i][j] = serversPerRackList.get(i).get(j);
323         }
324       }
325 
326       numTables = tables.size();
327       numRegionsPerServerPerTable = new int[numServers][numTables];
328 
329       for (int i = 0; i < numServers; i++) {
330         for (int j = 0; j < numTables; j++) {
331           numRegionsPerServerPerTable[i][j] = 0;
332         }
333       }
334 
335       for (int i=0; i < regionIndexToServerIndex.length; i++) {
336         if (regionIndexToServerIndex[i] >= 0) {
337           numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
338         }
339       }
340 
341       numMaxRegionsPerTable = new int[numTables];
342       for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
343         for (tableIndex = 0 ; tableIndex < numRegionsPerServerPerTable[serverIndex].length; tableIndex++) {
344           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
345             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
346           }
347         }
348       }
349 
350       for (int i = 0; i < regions.length; i ++) {
351         HRegionInfo info = regions[i];
352         if (RegionReplicaUtil.isDefaultReplica(info)) {
353           regionIndexToPrimaryIndex[i] = i;
354         } else {
355           hasRegionReplicas = true;
356           HRegionInfo primaryInfo = RegionReplicaUtil.getRegionInfoForDefaultReplica(info);
357           regionIndexToPrimaryIndex[i] =
358               regionsToIndex.containsKey(primaryInfo) ?
359               regionsToIndex.get(primaryInfo):
360               -1;
361         }
362       }
363 
364       for (int i = 0; i < regionsPerServer.length; i++) {
365         primariesOfRegionsPerServer[i] = new int[regionsPerServer[i].length];
366         for (int j = 0; j < regionsPerServer[i].length; j++) {
367           int primaryIndex = regionIndexToPrimaryIndex[regionsPerServer[i][j]];
368           primariesOfRegionsPerServer[i][j] = primaryIndex;
369         }
370         // sort the regions by primaries.
371         Arrays.sort(primariesOfRegionsPerServer[i]);
372       }
373 
374       // compute regionsPerHost
375       if (multiServersPerHost) {
376         for (int i = 0 ; i < serversPerHost.length; i++) {
377           int numRegionsPerHost = 0;
378           for (int j = 0; j < serversPerHost[i].length; j++) {
379             numRegionsPerHost += regionsPerServer[serversPerHost[i][j]].length;
380           }
381           regionsPerHost[i] = new int[numRegionsPerHost];
382           primariesOfRegionsPerHost[i] = new int[numRegionsPerHost];
383         }
384         for (int i = 0 ; i < serversPerHost.length; i++) {
385           int numRegionPerHostIndex = 0;
386           for (int j = 0; j < serversPerHost[i].length; j++) {
387             for (int k = 0; k < regionsPerServer[serversPerHost[i][j]].length; k++) {
388               int region = regionsPerServer[serversPerHost[i][j]][k];
389               regionsPerHost[i][numRegionPerHostIndex] = region;
390               int primaryIndex = regionIndexToPrimaryIndex[region];
391               primariesOfRegionsPerHost[i][numRegionPerHostIndex] = primaryIndex;
392               numRegionPerHostIndex++;
393             }
394           }
395           // sort the regions by primaries.
396           Arrays.sort(primariesOfRegionsPerHost[i]);
397         }
398       }
399 
400       // compute regionsPerRack
401       if (numRacks > 1) {
402         for (int i = 0 ; i < serversPerRack.length; i++) {
403           int numRegionsPerRack = 0;
404           for (int j = 0; j < serversPerRack[i].length; j++) {
405             numRegionsPerRack += regionsPerServer[serversPerRack[i][j]].length;
406           }
407           regionsPerRack[i] = new int[numRegionsPerRack];
408           primariesOfRegionsPerRack[i] = new int[numRegionsPerRack];
409         }
410 
411         for (int i = 0 ; i < serversPerRack.length; i++) {
412           int numRegionPerRackIndex = 0;
413           for (int j = 0; j < serversPerRack[i].length; j++) {
414             for (int k = 0; k < regionsPerServer[serversPerRack[i][j]].length; k++) {
415               int region = regionsPerServer[serversPerRack[i][j]][k];
416               regionsPerRack[i][numRegionPerRackIndex] = region;
417               int primaryIndex = regionIndexToPrimaryIndex[region];
418               primariesOfRegionsPerRack[i][numRegionPerRackIndex] = primaryIndex;
419               numRegionPerRackIndex++;
420             }
421           }
422           // sort the regions by primaries.
423           Arrays.sort(primariesOfRegionsPerRack[i]);
424         }
425       }
426     }
427 
428     /** Helper for Cluster constructor to handle a region */
429     private void registerRegion(HRegionInfo region, int regionIndex, int serverIndex,
430         Map<String, Deque<RegionLoad>> loads, RegionLocationFinder regionFinder) {
431       String tableName = region.getTable().getNameAsString();
432       if (!tablesToIndex.containsKey(tableName)) {
433         tables.add(tableName);
434         tablesToIndex.put(tableName, tablesToIndex.size());
435       }
436       int tableIndex = tablesToIndex.get(tableName);
437 
438       regionsToIndex.put(region, regionIndex);
439       regions[regionIndex] = region;
440       regionIndexToServerIndex[regionIndex] = serverIndex;
441       initialRegionIndexToServerIndex[regionIndex] = serverIndex;
442       regionIndexToTableIndex[regionIndex] = tableIndex;
443 
444       // region load
445       if (loads != null) {
446         Deque<RegionLoad> rl = loads.get(region.getRegionNameAsString());
447         // That could have failed if the RegionLoad is using the other regionName
448         if (rl == null) {
449           // Try getting the region load using encoded name.
450           rl = loads.get(region.getEncodedName());
451         }
452         regionLoads[regionIndex] = rl;
453       }
454 
455       if (regionFinder != null) {
456         //region location
457         List<ServerName> loc = regionFinder.getTopBlockLocations(region);
458         regionLocations[regionIndex] = new int[loc.size()];
459         for (int i=0; i < loc.size(); i++) {
460           regionLocations[regionIndex][i] =
461               loc.get(i) == null ? -1 :
462                 (serversToIndex.get(loc.get(i).getHostAndPort()) == null ? -1
463                     : serversToIndex.get(loc.get(i).getHostAndPort()));
464         }
465       }
466     }
467 
468     /** An action to move or swap a region */
469     public static class Action {
470       public static enum Type {
471         ASSIGN_REGION,
472         MOVE_REGION,
473         SWAP_REGIONS,
474         NULL,
475       }
476 
477       public Type type;
478       public Action (Type type) {this.type = type;}
479       /** Returns an Action which would undo this action */
480       public Action undoAction() { return this; }
481       @Override
482       public String toString() { return type + ":";}
483     }
484 
485     public static class AssignRegionAction extends Action {
486       public int region;
487       public int server;
488       public AssignRegionAction(int region, int server) {
489         super(Type.ASSIGN_REGION);
490         this.region = region;
491         this.server = server;
492       }
493       @Override
494       public Action undoAction() {
495         // TODO implement this. This action is not being used by the StochasticLB for now
496         // in case it uses it, we should implement this function.
497         throw new NotImplementedException();
498       }
499       @Override
500       public String toString() {
501         return type + ": " + region + ":" + server;
502       }
503     }
504 
505     public static class MoveRegionAction extends Action {
506       public int region;
507       public int fromServer;
508       public int toServer;
509 
510       public MoveRegionAction(int region, int fromServer, int toServer) {
511         super(Type.MOVE_REGION);
512         this.fromServer = fromServer;
513         this.region = region;
514         this.toServer = toServer;
515       }
516       @Override
517       public Action undoAction() {
518         return new MoveRegionAction (region, toServer, fromServer);
519       }
520       @Override
521       public String toString() {
522         return type + ": " + region + ":" + fromServer + " -> " + toServer;
523       }
524     }
525 
526     public static class SwapRegionsAction extends Action {
527       public int fromServer;
528       public int fromRegion;
529       public int toServer;
530       public int toRegion;
531       public SwapRegionsAction(int fromServer, int fromRegion, int toServer, int toRegion) {
532         super(Type.SWAP_REGIONS);
533         this.fromServer = fromServer;
534         this.fromRegion = fromRegion;
535         this.toServer = toServer;
536         this.toRegion = toRegion;
537       }
538       @Override
539       public Action undoAction() {
540         return new SwapRegionsAction (fromServer, toRegion, toServer, fromRegion);
541       }
542       @Override
543       public String toString() {
544         return type + ": " + fromRegion + ":" + fromServer + " <-> " + toRegion + ":" + toServer;
545       }
546     }
547 
548     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NM_FIELD_NAMING_CONVENTION",
549         justification="Mistake. Too disruptive to change now")
550     public static final Action NullAction = new Action(Type.NULL);
551 
552     public void doAction(Action action) {
553       switch (action.type) {
554       case NULL: break;
555       case ASSIGN_REGION:
556         AssignRegionAction ar = (AssignRegionAction) action;
557         regionsPerServer[ar.server] = addRegion(regionsPerServer[ar.server], ar.region);
558         regionMoved(ar.region, -1, ar.server);
559         break;
560       case MOVE_REGION:
561         MoveRegionAction mra = (MoveRegionAction) action;
562         regionsPerServer[mra.fromServer] = removeRegion(regionsPerServer[mra.fromServer], mra.region);
563         regionsPerServer[mra.toServer] = addRegion(regionsPerServer[mra.toServer], mra.region);
564         regionMoved(mra.region, mra.fromServer, mra.toServer);
565         break;
566       case SWAP_REGIONS:
567         SwapRegionsAction a = (SwapRegionsAction) action;
568         regionsPerServer[a.fromServer] = replaceRegion(regionsPerServer[a.fromServer], a.fromRegion, a.toRegion);
569         regionsPerServer[a.toServer] = replaceRegion(regionsPerServer[a.toServer], a.toRegion, a.fromRegion);
570         regionMoved(a.fromRegion, a.fromServer, a.toServer);
571         regionMoved(a.toRegion, a.toServer, a.fromServer);
572         break;
573       default:
574         throw new RuntimeException("Uknown action:" + action.type);
575       }
576     }
577 
578     /**
579      * Return true if the placement of region on server would lower the availability
580      * of the region in question
581      * @param server
582      * @param region
583      * @return true or false
584      */
585     boolean wouldLowerAvailability(HRegionInfo regionInfo, ServerName serverName) {
586       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
587         return false; // safeguard against race between cluster.servers and servers from LB method args
588       }
589       int server = serversToIndex.get(serverName.getHostAndPort());
590       int region = regionsToIndex.get(regionInfo);
591 
592       int primary = regionIndexToPrimaryIndex[region];
593 
594       // there is a subset relation for server < host < rack
595       // check server first
596 
597       if (contains(primariesOfRegionsPerServer[server], primary)) {
598         // check for whether there are other servers that we can place this region
599         for (int i = 0; i < primariesOfRegionsPerServer.length; i++) {
600           if (i != server && !contains(primariesOfRegionsPerServer[i], primary)) {
601             return true; // meaning there is a better server
602           }
603         }
604         return false; // there is not a better server to place this
605       }
606 
607       // check host
608       if (multiServersPerHost) { // these arrays would only be allocated if we have more than one server per host
609         int host = serverIndexToHostIndex[server];
610         if (contains(primariesOfRegionsPerHost[host], primary)) {
611           // check for whether there are other hosts that we can place this region
612           for (int i = 0; i < primariesOfRegionsPerHost.length; i++) {
613             if (i != host && !contains(primariesOfRegionsPerHost[i], primary)) {
614               return true; // meaning there is a better host
615             }
616           }
617           return false; // there is not a better host to place this
618         }
619       }
620 
621       // check rack
622       if (numRacks > 1) {
623         int rack = serverIndexToRackIndex[server];
624         if (contains(primariesOfRegionsPerRack[rack], primary)) {
625           // check for whether there are other racks that we can place this region
626           for (int i = 0; i < primariesOfRegionsPerRack.length; i++) {
627             if (i != rack && !contains(primariesOfRegionsPerRack[i], primary)) {
628               return true; // meaning there is a better rack
629             }
630           }
631           return false; // there is not a better rack to place this
632         }
633       }
634       return false;
635     }
636 
637     void doAssignRegion(HRegionInfo regionInfo, ServerName serverName) {
638       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
639         return;
640       }
641       int server = serversToIndex.get(serverName.getHostAndPort());
642       int region = regionsToIndex.get(regionInfo);
643       doAction(new AssignRegionAction(region, server));
644     }
645 
646     void regionMoved(int region, int oldServer, int newServer) {
647       regionIndexToServerIndex[region] = newServer;
648       if (initialRegionIndexToServerIndex[region] == newServer) {
649         numMovedRegions--; //region moved back to original location
650       } else if (oldServer >= 0 && initialRegionIndexToServerIndex[region] == oldServer) {
651         numMovedRegions++; //region moved from original location
652       }
653       int tableIndex = regionIndexToTableIndex[region];
654       if (oldServer >= 0) {
655         numRegionsPerServerPerTable[oldServer][tableIndex]--;
656       }
657       numRegionsPerServerPerTable[newServer][tableIndex]++;
658 
659       //check whether this caused maxRegionsPerTable in the new Server to be updated
660       if (numRegionsPerServerPerTable[newServer][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
661         numRegionsPerServerPerTable[newServer][tableIndex] = numMaxRegionsPerTable[tableIndex];
662       } else if (oldServer >= 0 && (numRegionsPerServerPerTable[oldServer][tableIndex] + 1)
663           == numMaxRegionsPerTable[tableIndex]) {
664         //recompute maxRegionsPerTable since the previous value was coming from the old server
665         for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
666           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
667             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
668           }
669         }
670       }
671 
672       // update for servers
673       int primary = regionIndexToPrimaryIndex[region];
674       if (oldServer >= 0) {
675         primariesOfRegionsPerServer[oldServer] = removeRegion(
676           primariesOfRegionsPerServer[oldServer], primary);
677       }
678       primariesOfRegionsPerServer[newServer] = addRegionSorted(
679         primariesOfRegionsPerServer[newServer], primary);
680 
681       // update for hosts
682       if (multiServersPerHost) {
683         int oldHost = oldServer >= 0 ? serverIndexToHostIndex[oldServer] : -1;
684         int newHost = serverIndexToHostIndex[newServer];
685         if (newHost != oldHost) {
686           regionsPerHost[newHost] = addRegion(regionsPerHost[newHost], region);
687           primariesOfRegionsPerHost[newHost] = addRegionSorted(primariesOfRegionsPerHost[newHost], primary);
688           if (oldHost >= 0) {
689             regionsPerHost[oldHost] = removeRegion(regionsPerHost[oldHost], region);
690             primariesOfRegionsPerHost[oldHost] = removeRegion(
691               primariesOfRegionsPerHost[oldHost], primary); // will still be sorted
692           }
693         }
694       }
695 
696       // update for racks
697       if (numRacks > 1) {
698         int oldRack = oldServer >= 0 ? serverIndexToRackIndex[oldServer] : -1;
699         int newRack = serverIndexToRackIndex[newServer];
700         if (newRack != oldRack) {
701           regionsPerRack[newRack] = addRegion(regionsPerRack[newRack], region);
702           primariesOfRegionsPerRack[newRack] = addRegionSorted(primariesOfRegionsPerRack[newRack], primary);
703           if (oldRack >= 0) {
704             regionsPerRack[oldRack] = removeRegion(regionsPerRack[oldRack], region);
705             primariesOfRegionsPerRack[oldRack] = removeRegion(
706               primariesOfRegionsPerRack[oldRack], primary); // will still be sorted
707           }
708         }
709       }
710     }
711 
712     int[] removeRegion(int[] regions, int regionIndex) {
713       //TODO: this maybe costly. Consider using linked lists
714       int[] newRegions = new int[regions.length - 1];
715       int i = 0;
716       for (i = 0; i < regions.length; i++) {
717         if (regions[i] == regionIndex) {
718           break;
719         }
720         newRegions[i] = regions[i];
721       }
722       System.arraycopy(regions, i+1, newRegions, i, newRegions.length - i);
723       return newRegions;
724     }
725 
726     int[] addRegion(int[] regions, int regionIndex) {
727       int[] newRegions = new int[regions.length + 1];
728       System.arraycopy(regions, 0, newRegions, 0, regions.length);
729       newRegions[newRegions.length - 1] = regionIndex;
730       return newRegions;
731     }
732 
733     int[] addRegionSorted(int[] regions, int regionIndex) {
734       int[] newRegions = new int[regions.length + 1];
735       int i = 0;
736       for (i = 0; i < regions.length; i++) { // find the index to insert
737         if (regions[i] > regionIndex) {
738           break;
739         }
740       }
741       System.arraycopy(regions, 0, newRegions, 0, i); // copy first half
742       System.arraycopy(regions, i, newRegions, i+1, regions.length - i); // copy second half
743       newRegions[i] = regionIndex;
744 
745       return newRegions;
746     }
747 
748     int[] replaceRegion(int[] regions, int regionIndex, int newRegionIndex) {
749       int i = 0;
750       for (i = 0; i < regions.length; i++) {
751         if (regions[i] == regionIndex) {
752           regions[i] = newRegionIndex;
753           break;
754         }
755       }
756       return regions;
757     }
758 
759     void sortServersByRegionCount() {
760       Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
761     }
762 
763     int getNumRegions(int server) {
764       return regionsPerServer[server].length;
765     }
766 
767     boolean contains(int[] arr, int val) {
768       return Arrays.binarySearch(arr, val) >= 0;
769     }
770 
771     private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
772       @Override
773       public int compare(Integer integer, Integer integer2) {
774         return Integer.valueOf(getNumRegions(integer)).compareTo(getNumRegions(integer2));
775       }
776     };
777 
778     void sortServersByLocality() {
779       Arrays.sort(serverIndicesSortedByLocality, localityComparator);
780     }
781 
782     float getLocality(int server) {
783       return localityPerServer[server];
784     }
785 
786     private Comparator<Integer> localityComparator = new Comparator<Integer>() {
787       @Override
788       public int compare(Integer integer, Integer integer2) {
789         float locality1 = getLocality(integer);
790         float locality2 = getLocality(integer2);
791         if (locality1 < locality2) {
792           return -1;
793         } else if (locality1 > locality2) {
794           return 1;
795         } else {
796           return 0;
797         }
798       }
799     };
800 
801     int getLowestLocalityRegionServer() {
802       if (regionFinder == null) {
803         return -1;
804       } else {
805         sortServersByLocality();
806         // We want to find server with non zero regions having lowest locality.
807         int i = 0;
808         int lowestLocalityServerIndex = serverIndicesSortedByLocality[i];
809         while (localityPerServer[lowestLocalityServerIndex] == 0
810             && (regionsPerServer[lowestLocalityServerIndex].length == 0)) {
811           i++;
812           lowestLocalityServerIndex = serverIndicesSortedByLocality[i];
813         }
814         LOG.debug("Lowest locality region server with non zero regions is "
815             + servers[lowestLocalityServerIndex].getHostname() + " with locality "
816             + localityPerServer[lowestLocalityServerIndex]);
817         return lowestLocalityServerIndex;
818       }
819     }
820 
821     int getLowestLocalityRegionOnServer(int serverIndex) {
822       if (regionFinder != null) {
823         float lowestLocality = 1.0f;
824         int lowestLocalityRegionIndex = 0;
825         if (regionsPerServer[serverIndex].length == 0) {
826           // No regions on that region server
827           return -1;
828         }
829         for (int j = 0; j < regionsPerServer[serverIndex].length; j++) {
830           int regionIndex = regionsPerServer[serverIndex][j];
831           HDFSBlocksDistribution distribution = regionFinder
832               .getBlockDistribution(regions[regionIndex]);
833           float locality = distribution.getBlockLocalityIndex(servers[serverIndex].getHostname());
834           if (locality < lowestLocality) {
835             lowestLocality = locality;
836             lowestLocalityRegionIndex = j;
837           }
838         }
839         LOG.debug(" Lowest locality region index is " + lowestLocalityRegionIndex
840             + " and its region server contains " + regionsPerServer[serverIndex].length
841             + " regions");
842         return regionsPerServer[serverIndex][lowestLocalityRegionIndex];
843       } else {
844         return -1;
845       }
846     }
847 
848     float getLocalityOfRegion(int region, int server) {
849       if (regionFinder != null) {
850         HDFSBlocksDistribution distribution = regionFinder.getBlockDistribution(regions[region]);
851         return distribution.getBlockLocalityIndex(servers[server].getHostname());
852       } else {
853         return 0f;
854       }
855     }
856 
857     int getLeastLoadedTopServerForRegion(int region) {
858       if (regionFinder != null) {
859         List<ServerName> topLocalServers = regionFinder.getTopBlockLocations(regions[region]);
860         int leastLoadedServerIndex = -1;
861         int load = Integer.MAX_VALUE;
862         for (ServerName sn : topLocalServers) {
863           if (!serversToIndex.containsKey(sn.getHostAndPort())) {
864             continue;
865           }
866           int index = serversToIndex.get(sn.getHostAndPort());
867           if (regionsPerServer[index] == null) {
868             continue;
869           }
870           int tempLoad = regionsPerServer[index].length;
871           if (tempLoad <= load) {
872             leastLoadedServerIndex = index;
873             load = tempLoad;
874           }
875         }
876         return leastLoadedServerIndex;
877       } else {
878         return -1;
879       }
880     }
881 
882     void calculateRegionServerLocalities() {
883       if (regionFinder == null) {
884         LOG.warn("Region location finder found null, skipping locality calculations.");
885         return;
886       }
887       for (int i = 0; i < regionsPerServer.length; i++) {
888         HDFSBlocksDistribution distribution = new HDFSBlocksDistribution();
889         if (regionsPerServer[i].length > 0) {
890           for (int j = 0; j < regionsPerServer[i].length; j++) {
891             int regionIndex = regionsPerServer[i][j];
892             distribution.add(regionFinder.getBlockDistribution(regions[regionIndex]));
893           }
894         } else {
895           LOG.debug("Server " + servers[i].getHostname() + " had 0 regions.");
896         }
897         localityPerServer[i] = distribution.getBlockLocalityIndex(servers[i].getHostname());
898       }
899     }
900 
901     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SBSC_USE_STRINGBUFFER_CONCATENATION",
902         justification="Not important but should be fixed")
903     @Override
904     public String toString() {
905       String desc = "Cluster{" +
906           "servers=[";
907           for(ServerName sn:servers) {
908              desc += sn.getHostAndPort() + ", ";
909           }
910           desc +=
911           ", serverIndicesSortedByRegionCount="+
912           Arrays.toString(serverIndicesSortedByRegionCount) +
913           ", regionsPerServer=[";
914 
915           for (int[]r:regionsPerServer) {
916             desc += Arrays.toString(r);
917           }
918           desc += "]" +
919           ", numMaxRegionsPerTable=" +
920           Arrays.toString(numMaxRegionsPerTable) +
921           ", numRegions=" +
922           numRegions +
923           ", numServers=" +
924           numServers +
925           ", numTables=" +
926           numTables +
927           ", numMovedRegions=" +
928           numMovedRegions +
929           '}';
930       return desc;
931     }
932   }
933 
934   // slop for regions
935   protected float slop;
936   protected Configuration config;
937   protected RackManager rackManager;
938   private static final Random RANDOM = new Random(System.currentTimeMillis());
939   private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
940 
941   // Regions of these tables are put on the master by default.
942   private static final String[] DEFAULT_TABLES_ON_MASTER =
943     new String[] {AccessControlLists.ACL_TABLE_NAME.getNameAsString(),
944       TableName.NAMESPACE_TABLE_NAME.getNameAsString(),
945       TableName.META_TABLE_NAME.getNameAsString()};
946 
947   public static final String TABLES_ON_MASTER =
948     "hbase.balancer.tablesOnMaster";
949 
950   protected final Set<String> tablesOnMaster = new HashSet<String>();
951   protected MetricsBalancer metricsBalancer = null;
952   protected ClusterStatus clusterStatus = null;
953   protected ServerName masterServerName;
954   protected MasterServices services;
955 
956   /**
957    * By default, regions of some small system tables such as meta,
958    * namespace, and acl are assigned to the active master. If you don't
959    * want to assign any region to the active master, you need to
960    * configure "hbase.balancer.tablesOnMaster" to "none".
961    */
962   protected static String[] getTablesOnMaster(Configuration conf) {
963     String valueString = conf.get(TABLES_ON_MASTER);
964     if (valueString == null) {
965       return DEFAULT_TABLES_ON_MASTER;
966     }
967     valueString = valueString.trim();
968     if (valueString.equalsIgnoreCase("none")) {
969       return null;
970     }
971     return StringUtils.getStrings(valueString);
972   }
973 
974   /**
975    * Check if configured to put any tables on the active master
976    */
977   public static boolean tablesOnMaster(Configuration conf) {
978     String[] tables = getTablesOnMaster(conf);
979     return tables != null && tables.length > 0;
980   }
981 
982   @Override
983   public void setConf(Configuration conf) {
984     setSlop(conf);
985     if (slop < 0) slop = 0;
986     else if (slop > 1) slop = 1;
987 
988     this.config = conf;
989     String[] tables = getTablesOnMaster(conf);
990     if (tables != null && tables.length > 0) {
991       Collections.addAll(tablesOnMaster, tables);
992     }
993     this.rackManager = new RackManager(getConf());
994     regionFinder.setConf(conf);
995   }
996 
997   protected void setSlop(Configuration conf) {
998     this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
999   }
1000 
1001   /**
1002    * Check if a region belongs to some small system table.
1003    * If so, the primary replica may be expected to be put on the master regionserver.
1004    */
1005   public boolean shouldBeOnMaster(HRegionInfo region) {
1006     return tablesOnMaster.contains(region.getTable().getNameAsString())
1007         && region.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID;
1008   }
1009 
1010   /**
1011    * Balance the regions that should be on master regionserver.
1012    */
1013   protected List<RegionPlan> balanceMasterRegions(
1014       Map<ServerName, List<HRegionInfo>> clusterMap) {
1015     if (masterServerName == null
1016         || clusterMap == null || clusterMap.size() <= 1) return null;
1017     List<RegionPlan> plans = null;
1018     List<HRegionInfo> regions = clusterMap.get(masterServerName);
1019     if (regions != null) {
1020       Iterator<ServerName> keyIt = null;
1021       for (HRegionInfo region: regions) {
1022         if (shouldBeOnMaster(region)) continue;
1023 
1024         // Find a non-master regionserver to host the region
1025         if (keyIt == null || !keyIt.hasNext()) {
1026           keyIt = clusterMap.keySet().iterator();
1027         }
1028         ServerName dest = keyIt.next();
1029         if (masterServerName.equals(dest)) {
1030           if (!keyIt.hasNext()) {
1031             keyIt = clusterMap.keySet().iterator();
1032           }
1033           dest = keyIt.next();
1034         }
1035 
1036         // Move this region away from the master regionserver
1037         RegionPlan plan = new RegionPlan(region, masterServerName, dest);
1038         if (plans == null) {
1039           plans = new ArrayList<RegionPlan>();
1040         }
1041         plans.add(plan);
1042       }
1043     }
1044     for (Map.Entry<ServerName, List<HRegionInfo>> server: clusterMap.entrySet()) {
1045       if (masterServerName.equals(server.getKey())) continue;
1046       for (HRegionInfo region: server.getValue()) {
1047         if (!shouldBeOnMaster(region)) continue;
1048 
1049         // Move this region to the master regionserver
1050         RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName);
1051         if (plans == null) {
1052           plans = new ArrayList<RegionPlan>();
1053         }
1054         plans.add(plan);
1055       }
1056     }
1057     return plans;
1058   }
1059 
1060   /**
1061    * Assign the regions that should be on master regionserver.
1062    */
1063   protected Map<ServerName, List<HRegionInfo>> assignMasterRegions(
1064       Collection<HRegionInfo> regions, List<ServerName> servers) {
1065     if (servers == null || regions == null || regions.isEmpty()) {
1066       return null;
1067     }
1068     Map<ServerName, List<HRegionInfo>> assignments
1069       = new TreeMap<ServerName, List<HRegionInfo>>();
1070     if (masterServerName != null && servers.contains(masterServerName)) {
1071       assignments.put(masterServerName, new ArrayList<HRegionInfo>());
1072       for (HRegionInfo region: regions) {
1073         if (shouldBeOnMaster(region)) {
1074           assignments.get(masterServerName).add(region);
1075         }
1076       }
1077     }
1078     return assignments;
1079   }
1080 
1081   @Override
1082   public Configuration getConf() {
1083     return this.config;
1084   }
1085 
1086   @Override
1087   public void setClusterStatus(ClusterStatus st) {
1088     this.clusterStatus = st;
1089     regionFinder.setClusterStatus(st);
1090   }
1091 
1092   @Override
1093   public void setMasterServices(MasterServices masterServices) {
1094     masterServerName = masterServices.getServerName();
1095     this.services = masterServices;
1096     this.regionFinder.setServices(masterServices);
1097   }
1098 
1099   public void setRackManager(RackManager rackManager) {
1100     this.rackManager = rackManager;
1101   }
1102 
1103   protected boolean needsBalance(Cluster c) {
1104     ClusterLoadState cs = new ClusterLoadState(c.clusterState);
1105     if (cs.getNumServers() < MIN_SERVER_BALANCE) {
1106       if (LOG.isDebugEnabled()) {
1107         LOG.debug("Not running balancer because only " + cs.getNumServers()
1108             + " active regionserver(s)");
1109       }
1110       return false;
1111     }
1112     if(areSomeRegionReplicasColocated(c)) return true;
1113     // Check if we even need to do any load balancing
1114     // HBASE-3681 check sloppiness first
1115     float average = cs.getLoadAverage(); // for logging
1116     int floor = (int) Math.floor(average * (1 - slop));
1117     int ceiling = (int) Math.ceil(average * (1 + slop));
1118     if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
1119       NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
1120       if (LOG.isTraceEnabled()) {
1121         // If nothing to balance, then don't say anything unless trace-level logging.
1122         LOG.trace("Skipping load balancing because balanced cluster; " +
1123           "servers=" + cs.getNumServers() +
1124           " regions=" + cs.getNumRegions() + " average=" + average +
1125           " mostloaded=" + serversByLoad.lastKey().getLoad() +
1126           " leastloaded=" + serversByLoad.firstKey().getLoad());
1127       }
1128       return false;
1129     }
1130     return true;
1131   }
1132 
1133   /**
1134    * Subclasses should implement this to return true if the cluster has nodes that hosts
1135    * multiple replicas for the same region, or, if there are multiple racks and the same
1136    * rack hosts replicas of the same region
1137    * @param c Cluster information
1138    * @return whether region replicas are currently co-located
1139    */
1140   protected boolean areSomeRegionReplicasColocated(Cluster c) {
1141     return false;
1142   }
1143 
1144   /**
1145    * Generates a bulk assignment plan to be used on cluster startup using a
1146    * simple round-robin assignment.
1147    * <p>
1148    * Takes a list of all the regions and all the servers in the cluster and
1149    * returns a map of each server to the regions that it should be assigned.
1150    * <p>
1151    * Currently implemented as a round-robin assignment. Same invariant as load
1152    * balancing, all servers holding floor(avg) or ceiling(avg).
1153    *
1154    * TODO: Use block locations from HDFS to place regions with their blocks
1155    *
1156    * @param regions all regions
1157    * @param servers all servers
1158    * @return map of server to the regions it should take, or null if no
1159    *         assignment is possible (ie. no regions or no servers)
1160    */
1161   @Override
1162   public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
1163       List<ServerName> servers) {
1164     metricsBalancer.incrMiscInvocations();
1165     Map<ServerName, List<HRegionInfo>> assignments = assignMasterRegions(regions, servers);
1166     if (assignments != null && !assignments.isEmpty()) {
1167       servers = new ArrayList<ServerName>(servers);
1168       // Guarantee not to put other regions on master
1169       servers.remove(masterServerName);
1170       List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1171       if (!masterRegions.isEmpty()) {
1172         regions = new ArrayList<HRegionInfo>(regions);
1173         for (HRegionInfo region: masterRegions) {
1174           regions.remove(region);
1175         }
1176       }
1177     }
1178     if (regions == null || regions.isEmpty()) {
1179       return assignments;
1180     }
1181 
1182     int numServers = servers == null ? 0 : servers.size();
1183     if (numServers == 0) {
1184       LOG.warn("Wanted to do round robin assignment but no servers to assign to");
1185       return null;
1186     }
1187 
1188     // TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the
1189     // normal LB.balancerCluster() with unassignedRegions. We only need to have a candidate
1190     // generator for AssignRegionAction. The LB will ensure the regions are mostly local
1191     // and balanced. This should also run fast with fewer number of iterations.
1192 
1193     if (numServers == 1) { // Only one server, nothing fancy we can do here
1194       ServerName server = servers.get(0);
1195       assignments.put(server, new ArrayList<HRegionInfo>(regions));
1196       return assignments;
1197     }
1198 
1199     Cluster cluster = createCluster(servers, regions);
1200     List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>();
1201 
1202     roundRobinAssignment(cluster, regions, unassignedRegions,
1203       servers, assignments);
1204 
1205     List<HRegionInfo> lastFewRegions = new ArrayList<HRegionInfo>();
1206     // assign the remaining by going through the list and try to assign to servers one-by-one
1207     int serverIdx = RANDOM.nextInt(numServers);
1208     for (HRegionInfo region : unassignedRegions) {
1209       boolean assigned = false;
1210       for (int j = 0; j < numServers; j++) { // try all servers one by one
1211         ServerName serverName = servers.get((j + serverIdx) % numServers);
1212         if (!cluster.wouldLowerAvailability(region, serverName)) {
1213           List<HRegionInfo> serverRegions = assignments.get(serverName);
1214           if (serverRegions == null) {
1215             serverRegions = new ArrayList<HRegionInfo>();
1216             assignments.put(serverName, serverRegions);
1217           }
1218           serverRegions.add(region);
1219           cluster.doAssignRegion(region, serverName);
1220           serverIdx = (j + serverIdx + 1) % numServers; //remain from next server
1221           assigned = true;
1222           break;
1223         }
1224       }
1225       if (!assigned) {
1226         lastFewRegions.add(region);
1227       }
1228     }
1229     // just sprinkle the rest of the regions on random regionservers. The balanceCluster will
1230     // make it optimal later. we can end up with this if numReplicas > numServers.
1231     for (HRegionInfo region : lastFewRegions) {
1232       int i = RANDOM.nextInt(numServers);
1233       ServerName server = servers.get(i);
1234       List<HRegionInfo> serverRegions = assignments.get(server);
1235       if (serverRegions == null) {
1236         serverRegions = new ArrayList<HRegionInfo>();
1237         assignments.put(server, serverRegions);
1238       }
1239       serverRegions.add(region);
1240       cluster.doAssignRegion(region, server);
1241     }
1242     return assignments;
1243   }
1244 
1245   protected Cluster createCluster(List<ServerName> servers,
1246       Collection<HRegionInfo> regions) {
1247     // Get the snapshot of the current assignments for the regions in question, and then create
1248     // a cluster out of it. Note that we might have replicas already assigned to some servers
1249     // earlier. So we want to get the snapshot to see those assignments, but this will only contain
1250     // replicas of the regions that are passed (for performance).
1251     Map<ServerName, List<HRegionInfo>> clusterState = getRegionAssignmentsByServer(regions);
1252 
1253     for (ServerName server : servers) {
1254       if (!clusterState.containsKey(server)) {
1255         clusterState.put(server, EMPTY_REGION_LIST);
1256       }
1257     }
1258     return new Cluster(regions, clusterState, null, this.regionFinder,
1259       rackManager);
1260   }
1261 
1262   /**
1263    * Generates an immediate assignment plan to be used by a new master for
1264    * regions in transition that do not have an already known destination.
1265    *
1266    * Takes a list of regions that need immediate assignment and a list of all
1267    * available servers. Returns a map of regions to the server they should be
1268    * assigned to.
1269    *
1270    * This method will return quickly and does not do any intelligent balancing.
1271    * The goal is to make a fast decision not the best decision possible.
1272    *
1273    * Currently this is random.
1274    *
1275    * @param regions
1276    * @param servers
1277    * @return map of regions to the server it should be assigned to
1278    */
1279   @Override
1280   public Map<HRegionInfo, ServerName> immediateAssignment(List<HRegionInfo> regions,
1281       List<ServerName> servers) {
1282     metricsBalancer.incrMiscInvocations();
1283     if (servers == null || servers.isEmpty()) {
1284       LOG.warn("Wanted to do random assignment but no servers to assign to");
1285       return null;
1286     }
1287 
1288     Map<HRegionInfo, ServerName> assignments = new TreeMap<HRegionInfo, ServerName>();
1289     for (HRegionInfo region : regions) {
1290       assignments.put(region, randomAssignment(region, servers));
1291     }
1292     return assignments;
1293   }
1294 
1295   /**
1296    * Used to assign a single region to a random server.
1297    */
1298   @Override
1299   public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
1300     metricsBalancer.incrMiscInvocations();
1301     if (servers != null && servers.contains(masterServerName)) {
1302       if (shouldBeOnMaster(regionInfo)) {
1303         return masterServerName;
1304       }
1305       servers = new ArrayList<ServerName>(servers);
1306       // Guarantee not to put other regions on master
1307       servers.remove(masterServerName);
1308     }
1309 
1310     int numServers = servers == null ? 0 : servers.size();
1311     if (numServers == 0) {
1312       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1313       return null;
1314     }
1315     if (numServers == 1) { // Only one server, nothing fancy we can do here
1316       return servers.get(0);
1317     }
1318 
1319     List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
1320     Cluster cluster = createCluster(servers, regions);
1321     return randomAssignment(cluster, regionInfo, servers);
1322   }
1323 
1324   /**
1325    * Generates a bulk assignment startup plan, attempting to reuse the existing
1326    * assignment information from META, but adjusting for the specified list of
1327    * available/online servers available for assignment.
1328    * <p>
1329    * Takes a map of all regions to their existing assignment from META. Also
1330    * takes a list of online servers for regions to be assigned to. Attempts to
1331    * retain all assignment, so in some instances initial assignment will not be
1332    * completely balanced.
1333    * <p>
1334    * Any leftover regions without an existing server to be assigned to will be
1335    * assigned randomly to available servers.
1336    *
1337    * @param regions regions and existing assignment from meta
1338    * @param servers available servers
1339    * @return map of servers and regions to be assigned to them
1340    */
1341   @Override
1342   public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
1343       List<ServerName> servers) {
1344     // Update metrics
1345     metricsBalancer.incrMiscInvocations();
1346     Map<ServerName, List<HRegionInfo>> assignments
1347       = assignMasterRegions(regions.keySet(), servers);
1348     if (assignments != null && !assignments.isEmpty()) {
1349       servers = new ArrayList<ServerName>(servers);
1350       // Guarantee not to put other regions on master
1351       servers.remove(masterServerName);
1352       List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1353       if (!masterRegions.isEmpty()) {
1354         regions = new HashMap<HRegionInfo, ServerName>(regions);
1355         for (HRegionInfo region: masterRegions) {
1356           regions.remove(region);
1357         }
1358       }
1359     }
1360     if (regions == null || regions.isEmpty()) {
1361       return assignments;
1362     }
1363 
1364     int numServers = servers == null ? 0 : servers.size();
1365     if (numServers == 0) {
1366       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1367       return null;
1368     }
1369     if (numServers == 1) { // Only one server, nothing fancy we can do here
1370       ServerName server = servers.get(0);
1371       assignments.put(server, new ArrayList<HRegionInfo>(regions.keySet()));
1372       return assignments;
1373     }
1374 
1375     // Group all of the old assignments by their hostname.
1376     // We can't group directly by ServerName since the servers all have
1377     // new start-codes.
1378 
1379     // Group the servers by their hostname. It's possible we have multiple
1380     // servers on the same host on different ports.
1381     ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
1382     for (ServerName server : servers) {
1383       assignments.put(server, new ArrayList<HRegionInfo>());
1384       serversByHostname.put(server.getHostname(), server);
1385     }
1386 
1387     // Collection of the hostnames that used to have regions
1388     // assigned, but for which we no longer have any RS running
1389     // after the cluster restart.
1390     Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
1391 
1392     int numRandomAssignments = 0;
1393     int numRetainedAssigments = 0;
1394 
1395     Cluster cluster = createCluster(servers, regions.keySet());
1396 
1397     for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
1398       HRegionInfo region = entry.getKey();
1399       ServerName oldServerName = entry.getValue();
1400       List<ServerName> localServers = new ArrayList<ServerName>();
1401       if (oldServerName != null) {
1402         localServers = serversByHostname.get(oldServerName.getHostname());
1403       }
1404       if (localServers.isEmpty()) {
1405         // No servers on the new cluster match up with this hostname,
1406         // assign randomly.
1407         ServerName randomServer = randomAssignment(cluster, region, servers);
1408         assignments.get(randomServer).add(region);
1409         numRandomAssignments++;
1410         if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
1411       } else if (localServers.size() == 1) {
1412         // the usual case - one new server on same host
1413         ServerName target = localServers.get(0);
1414         assignments.get(target).add(region);
1415         cluster.doAssignRegion(region, target);
1416         numRetainedAssigments++;
1417       } else {
1418         // multiple new servers in the cluster on this same host
1419         if (localServers.contains(oldServerName)) {
1420           assignments.get(oldServerName).add(region);
1421           cluster.doAssignRegion(region, oldServerName);
1422         } else {
1423           ServerName target = null;
1424           for (ServerName tmp: localServers) {
1425             if (tmp.getPort() == oldServerName.getPort()) {
1426               target = tmp;
1427               break;
1428             }
1429           }
1430           if (target == null) {
1431             target = randomAssignment(cluster, region, localServers);
1432           }
1433           assignments.get(target).add(region);
1434         }
1435         numRetainedAssigments++;
1436       }
1437     }
1438 
1439     String randomAssignMsg = "";
1440     if (numRandomAssignments > 0) {
1441       randomAssignMsg =
1442           numRandomAssignments + " regions were assigned "
1443               + "to random hosts, since the old hosts for these regions are no "
1444               + "longer present in the cluster. These hosts were:\n  "
1445               + Joiner.on("\n  ").join(oldHostsNoLongerPresent);
1446     }
1447 
1448     LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
1449         + " retained the pre-restart assignment. " + randomAssignMsg);
1450     return assignments;
1451   }
1452 
1453   @Override
1454   public void initialize() throws HBaseIOException{
1455   }
1456 
1457   @Override
1458   public void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1459   }
1460 
1461   @Override
1462   public void regionOffline(HRegionInfo regionInfo) {
1463   }
1464 
1465   @Override
1466   public boolean isStopped() {
1467     return stopped;
1468   }
1469 
1470   @Override
1471   public void stop(String why) {
1472     LOG.info("Load Balancer stop requested: "+why);
1473     stopped = true;
1474   }
1475 
1476   /**
1477    * Used to assign a single region to a random server.
1478    */
1479   private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo,
1480       List<ServerName> servers) {
1481     int numServers = servers.size(); // servers is not null, numServers > 1
1482     ServerName sn = null;
1483     final int maxIterations = numServers * 4;
1484     int iterations = 0;
1485 
1486     do {
1487       int i = RANDOM.nextInt(numServers);
1488       sn = servers.get(i);
1489     } while (cluster.wouldLowerAvailability(regionInfo, sn)
1490         && iterations++ < maxIterations);
1491     cluster.doAssignRegion(regionInfo, sn);
1492     return sn;
1493   }
1494 
1495   /**
1496    * Round robin a list of regions to a list of servers
1497    */
1498   private void roundRobinAssignment(Cluster cluster, List<HRegionInfo> regions,
1499       List<HRegionInfo> unassignedRegions, List<ServerName> servers,
1500       Map<ServerName, List<HRegionInfo>> assignments) {
1501 
1502     int numServers = servers.size();
1503     int numRegions = regions.size();
1504     int max = (int) Math.ceil((float) numRegions / numServers);
1505     int serverIdx = 0;
1506     if (numServers > 1) {
1507       serverIdx = RANDOM.nextInt(numServers);
1508     }
1509     int regionIdx = 0;
1510 
1511     for (int j = 0; j < numServers; j++) {
1512       ServerName server = servers.get((j + serverIdx) % numServers);
1513       List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
1514       for (int i = regionIdx; i < numRegions; i += numServers) {
1515         HRegionInfo region = regions.get(i % numRegions);
1516         if (cluster.wouldLowerAvailability(region, server)) {
1517           unassignedRegions.add(region);
1518         } else {
1519           serverRegions.add(region);
1520           cluster.doAssignRegion(region, server);
1521         }
1522       }
1523       assignments.put(server, serverRegions);
1524       regionIdx++;
1525     }
1526   }
1527 
1528   protected Map<ServerName, List<HRegionInfo>> getRegionAssignmentsByServer(
1529     Collection<HRegionInfo> regions) {
1530     if (this.services != null && this.services.getAssignmentManager() != null) {
1531       return this.services.getAssignmentManager().getSnapShotOfAssignment(regions);
1532     } else {
1533       return new HashMap<ServerName, List<HRegionInfo>>();
1534     }
1535   }
1536 
1537   @Override
1538   public void onConfigurationChange(Configuration conf) {
1539   }
1540 }