View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Collection;
23  import java.util.Collections;
24  import java.util.Comparator;
25  import java.util.Deque;
26  import java.util.HashMap;
27  import java.util.HashSet;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.Map.Entry;
32  import java.util.NavigableMap;
33  import java.util.Random;
34  import java.util.Set;
35  import java.util.TreeMap;
36  
37  import org.apache.commons.lang.NotImplementedException;
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.conf.Configuration;
41  import org.apache.hadoop.hbase.ClusterStatus;
42  import org.apache.hadoop.hbase.HBaseIOException;
43  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
44  import org.apache.hadoop.hbase.HRegionInfo;
45  import org.apache.hadoop.hbase.RegionLoad;
46  import org.apache.hadoop.hbase.ServerName;
47  import org.apache.hadoop.hbase.TableName;
48  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
49  import org.apache.hadoop.hbase.master.LoadBalancer;
50  import org.apache.hadoop.hbase.master.MasterServices;
51  import org.apache.hadoop.hbase.master.RackManager;
52  import org.apache.hadoop.hbase.master.RegionPlan;
53  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
54  import org.apache.hadoop.hbase.security.access.AccessControlLists;
55  import org.apache.hadoop.util.StringUtils;
56  
57  import com.google.common.annotations.VisibleForTesting;
58  import com.google.common.base.Joiner;
59  import com.google.common.collect.ArrayListMultimap;
60  import com.google.common.collect.Lists;
61  import com.google.common.collect.Sets;
62  
63  /**
64   * The base class for load balancers. It provides the the functions used to by
65   * {@link org.apache.hadoop.hbase.master.AssignmentManager} to assign regions
66   * in the edge cases. It doesn't provide an implementation of the
67   * actual balancing algorithm.
68   *
69   */
70  public abstract class BaseLoadBalancer implements LoadBalancer {
71    private static final int MIN_SERVER_BALANCE = 2;
72    private volatile boolean stopped = false;
73  
74    private static final List<HRegionInfo> EMPTY_REGION_LIST = new ArrayList<HRegionInfo>(0);
75  
76    protected final RegionLocationFinder regionFinder = new RegionLocationFinder();
77  
78    private static class DefaultRackManager extends RackManager {
79      @Override
80      public String getRack(ServerName server) {
81        return UNKNOWN_RACK;
82      }
83    }
84  
85    /**
86     * The constructor that uses the basic MetricsBalancer
87     */
88    protected BaseLoadBalancer() {
89      metricsBalancer = new MetricsBalancer();
90    }
91  
92    /**
93     * This Constructor accepts an instance of MetricsBalancer,
94     * which will be used instead of creating a new one
95     */
96    protected BaseLoadBalancer(MetricsBalancer metricsBalancer) {
97      this.metricsBalancer = (metricsBalancer != null) ? metricsBalancer : new MetricsBalancer();
98    }
99  
100   /**
101    * An efficient array based implementation similar to ClusterState for keeping
102    * the status of the cluster in terms of region assignment and distribution.
103    * LoadBalancers, such as StochasticLoadBalancer uses this Cluster object because of
104    * hundreds of thousands of hashmap manipulations are very costly, which is why this
105    * class uses mostly indexes and arrays.
106    *
107    * Cluster tracks a list of unassigned regions, region assignments, and the server
108    * topology in terms of server names, hostnames and racks.
109    */
110   protected static class Cluster {
111     ServerName[] servers;
112     String[] hosts; // ServerName uniquely identifies a region server. multiple RS can run on the same host
113     String[] racks;
114     boolean multiServersPerHost = false; // whether or not any host has more than one server
115 
116     ArrayList<String> tables;
117     HRegionInfo[] regions;
118     Deque<RegionLoad>[] regionLoads;
119     private RegionLocationFinder regionFinder;
120 
121     int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality
122 
123     int[]   serverIndexToHostIndex;      //serverIndex -> host index
124     int[]   serverIndexToRackIndex;      //serverIndex -> rack index
125 
126     int[][] regionsPerServer;            //serverIndex -> region list
127     int[][] regionsPerHost;              //hostIndex -> list of regions
128     int[][] regionsPerRack;              //rackIndex -> region list
129     int[][] primariesOfRegionsPerServer; //serverIndex -> sorted list of regions by primary region index
130     int[][] primariesOfRegionsPerHost;   //hostIndex -> sorted list of regions by primary region index
131     int[][] primariesOfRegionsPerRack;   //rackIndex -> sorted list of regions by primary region index
132 
133     int[][] serversPerHost;              //hostIndex -> list of server indexes
134     int[][] serversPerRack;              //rackIndex -> list of server indexes
135     int[]   regionIndexToServerIndex;    //regionIndex -> serverIndex
136     int[]   initialRegionIndexToServerIndex;    //regionIndex -> serverIndex (initial cluster state)
137     int[]   regionIndexToTableIndex;     //regionIndex -> tableIndex
138     int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions
139     int[]   numMaxRegionsPerTable;       //tableIndex -> max number of regions in a single RS
140     int[]   regionIndexToPrimaryIndex;   //regionIndex -> regionIndex of the primary
141     boolean hasRegionReplicas = false;   //whether there is regions with replicas
142 
143     Integer[] serverIndicesSortedByRegionCount;
144     Integer[] serverIndicesSortedByLocality;
145 
146     Map<String, Integer> serversToIndex;
147     Map<String, Integer> hostsToIndex;
148     Map<String, Integer> racksToIndex;
149     Map<String, Integer> tablesToIndex;
150     Map<HRegionInfo, Integer> regionsToIndex;
151     float[] localityPerServer;
152 
153     int numServers;
154     int numHosts;
155     int numRacks;
156     int numTables;
157     int numRegions;
158 
159     int numMovedRegions = 0; //num moved regions from the initial configuration
160     Map<ServerName, List<HRegionInfo>> clusterState;
161 
162     protected final RackManager rackManager;
163 
164     protected Cluster(
165         Map<ServerName, List<HRegionInfo>> clusterState,
166         Map<String, Deque<RegionLoad>> loads,
167         RegionLocationFinder regionFinder,
168         RackManager rackManager) {
169       this(null, clusterState, loads, regionFinder,
170         rackManager);
171     }
172 
173     @SuppressWarnings("unchecked")
174     protected Cluster(
175         Collection<HRegionInfo> unassignedRegions,
176         Map<ServerName, List<HRegionInfo>> clusterState,
177         Map<String, Deque<RegionLoad>> loads,
178         RegionLocationFinder regionFinder,
179         RackManager rackManager) {
180 
181       if (unassignedRegions == null) {
182         unassignedRegions = EMPTY_REGION_LIST;
183       }
184 
185       serversToIndex = new HashMap<String, Integer>();
186       hostsToIndex = new HashMap<String, Integer>();
187       racksToIndex = new HashMap<String, Integer>();
188       tablesToIndex = new HashMap<String, Integer>();
189 
190       //TODO: We should get the list of tables from master
191       tables = new ArrayList<String>();
192       this.rackManager = rackManager != null ? rackManager : new DefaultRackManager();
193 
194       numRegions = 0;
195 
196       List<List<Integer>> serversPerHostList = new ArrayList<List<Integer>>();
197       List<List<Integer>> serversPerRackList = new ArrayList<List<Integer>>();
198       this.clusterState = clusterState;
199       this.regionFinder = regionFinder;
200 
201       // Use servername and port as there can be dead servers in this list. We want everything with
202       // a matching hostname and port to have the same index.
203       for (ServerName sn : clusterState.keySet()) {
204         if (serversToIndex.get(sn.getHostAndPort()) == null) {
205           serversToIndex.put(sn.getHostAndPort(), numServers++);
206         }
207         if (!hostsToIndex.containsKey(sn.getHostname())) {
208           hostsToIndex.put(sn.getHostname(), numHosts++);
209           serversPerHostList.add(new ArrayList<Integer>(1));
210         }
211 
212         int serverIndex = serversToIndex.get(sn.getHostAndPort());
213         int hostIndex = hostsToIndex.get(sn.getHostname());
214         serversPerHostList.get(hostIndex).add(serverIndex);
215 
216         String rack = this.rackManager.getRack(sn);
217         if (!racksToIndex.containsKey(rack)) {
218           racksToIndex.put(rack, numRacks++);
219           serversPerRackList.add(new ArrayList<Integer>());
220         }
221         int rackIndex = racksToIndex.get(rack);
222         serversPerRackList.get(rackIndex).add(serverIndex);
223       }
224 
225       // Count how many regions there are.
226       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
227         numRegions += entry.getValue().size();
228       }
229       numRegions += unassignedRegions.size();
230 
231       regionsToIndex = new HashMap<HRegionInfo, Integer>(numRegions);
232       servers = new ServerName[numServers];
233       serversPerHost = new int[numHosts][];
234       serversPerRack = new int[numRacks][];
235       regions = new HRegionInfo[numRegions];
236       regionIndexToServerIndex = new int[numRegions];
237       initialRegionIndexToServerIndex = new int[numRegions];
238       regionIndexToTableIndex = new int[numRegions];
239       regionIndexToPrimaryIndex = new int[numRegions];
240       regionLoads = new Deque[numRegions];
241       regionLocations = new int[numRegions][];
242       serverIndicesSortedByRegionCount = new Integer[numServers];
243       serverIndicesSortedByLocality = new Integer[numServers];
244       localityPerServer = new float[numServers];
245 
246       serverIndexToHostIndex = new int[numServers];
247       serverIndexToRackIndex = new int[numServers];
248       regionsPerServer = new int[numServers][];
249       regionsPerHost = new int[numHosts][];
250       regionsPerRack = new int[numRacks][];
251       primariesOfRegionsPerServer = new int[numServers][];
252       primariesOfRegionsPerHost = new int[numHosts][];
253       primariesOfRegionsPerRack = new int[numRacks][];
254 
255       int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
256 
257       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
258         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
259 
260         // keep the servername if this is the first server name for this hostname
261         // or this servername has the newest startcode.
262         if (servers[serverIndex] == null ||
263             servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
264           servers[serverIndex] = entry.getKey();
265         }
266 
267         if (regionsPerServer[serverIndex] != null) {
268           // there is another server with the same hostAndPort in ClusterState.
269           // allocate the array for the total size
270           regionsPerServer[serverIndex] = new int[entry.getValue().size() + regionsPerServer[serverIndex].length];
271         } else {
272           regionsPerServer[serverIndex] = new int[entry.getValue().size()];
273         }
274         primariesOfRegionsPerServer[serverIndex] = new int[regionsPerServer[serverIndex].length];
275         serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
276         serverIndicesSortedByLocality[serverIndex] = serverIndex;
277       }
278 
279       hosts = new String[numHosts];
280       for (Entry<String, Integer> entry : hostsToIndex.entrySet()) {
281         hosts[entry.getValue()] = entry.getKey();
282       }
283       racks = new String[numRacks];
284       for (Entry<String, Integer> entry : racksToIndex.entrySet()) {
285         racks[entry.getValue()] = entry.getKey();
286       }
287 
288       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
289         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
290         regionPerServerIndex = 0;
291 
292         int hostIndex = hostsToIndex.get(entry.getKey().getHostname());
293         serverIndexToHostIndex[serverIndex] = hostIndex;
294 
295         int rackIndex = racksToIndex.get(this.rackManager.getRack(entry.getKey()));
296         serverIndexToRackIndex[serverIndex] = rackIndex;
297 
298         for (HRegionInfo region : entry.getValue()) {
299           registerRegion(region, regionIndex, serverIndex, loads, regionFinder);
300 
301           regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
302           regionIndex++;
303         }
304       }
305       for (HRegionInfo region : unassignedRegions) {
306         registerRegion(region, regionIndex, -1, loads, regionFinder);
307         regionIndex++;
308       }
309 
310       for (int i = 0; i < serversPerHostList.size(); i++) {
311         serversPerHost[i] = new int[serversPerHostList.get(i).size()];
312         for (int j = 0; j < serversPerHost[i].length; j++) {
313           serversPerHost[i][j] = serversPerHostList.get(i).get(j);
314         }
315         if (serversPerHost[i].length > 1) {
316           multiServersPerHost = true;
317         }
318       }
319 
320       for (int i = 0; i < serversPerRackList.size(); i++) {
321         serversPerRack[i] = new int[serversPerRackList.get(i).size()];
322         for (int j = 0; j < serversPerRack[i].length; j++) {
323           serversPerRack[i][j] = serversPerRackList.get(i).get(j);
324         }
325       }
326 
327       numTables = tables.size();
328       numRegionsPerServerPerTable = new int[numServers][numTables];
329 
330       for (int i = 0; i < numServers; i++) {
331         for (int j = 0; j < numTables; j++) {
332           numRegionsPerServerPerTable[i][j] = 0;
333         }
334       }
335 
336       for (int i=0; i < regionIndexToServerIndex.length; i++) {
337         if (regionIndexToServerIndex[i] >= 0) {
338           numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
339         }
340       }
341 
342       numMaxRegionsPerTable = new int[numTables];
343       for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
344         for (tableIndex = 0 ; tableIndex < numRegionsPerServerPerTable[serverIndex].length; tableIndex++) {
345           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
346             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
347           }
348         }
349       }
350 
351       for (int i = 0; i < regions.length; i ++) {
352         HRegionInfo info = regions[i];
353         if (RegionReplicaUtil.isDefaultReplica(info)) {
354           regionIndexToPrimaryIndex[i] = i;
355         } else {
356           hasRegionReplicas = true;
357           HRegionInfo primaryInfo = RegionReplicaUtil.getRegionInfoForDefaultReplica(info);
358           regionIndexToPrimaryIndex[i] =
359               regionsToIndex.containsKey(primaryInfo) ?
360               regionsToIndex.get(primaryInfo):
361               -1;
362         }
363       }
364 
365       for (int i = 0; i < regionsPerServer.length; i++) {
366         primariesOfRegionsPerServer[i] = new int[regionsPerServer[i].length];
367         for (int j = 0; j < regionsPerServer[i].length; j++) {
368           int primaryIndex = regionIndexToPrimaryIndex[regionsPerServer[i][j]];
369           primariesOfRegionsPerServer[i][j] = primaryIndex;
370         }
371         // sort the regions by primaries.
372         Arrays.sort(primariesOfRegionsPerServer[i]);
373       }
374 
375       // compute regionsPerHost
376       if (multiServersPerHost) {
377         for (int i = 0 ; i < serversPerHost.length; i++) {
378           int numRegionsPerHost = 0;
379           for (int j = 0; j < serversPerHost[i].length; j++) {
380             numRegionsPerHost += regionsPerServer[serversPerHost[i][j]].length;
381           }
382           regionsPerHost[i] = new int[numRegionsPerHost];
383           primariesOfRegionsPerHost[i] = new int[numRegionsPerHost];
384         }
385         for (int i = 0 ; i < serversPerHost.length; i++) {
386           int numRegionPerHostIndex = 0;
387           for (int j = 0; j < serversPerHost[i].length; j++) {
388             for (int k = 0; k < regionsPerServer[serversPerHost[i][j]].length; k++) {
389               int region = regionsPerServer[serversPerHost[i][j]][k];
390               regionsPerHost[i][numRegionPerHostIndex] = region;
391               int primaryIndex = regionIndexToPrimaryIndex[region];
392               primariesOfRegionsPerHost[i][numRegionPerHostIndex] = primaryIndex;
393               numRegionPerHostIndex++;
394             }
395           }
396           // sort the regions by primaries.
397           Arrays.sort(primariesOfRegionsPerHost[i]);
398         }
399       }
400 
401       // compute regionsPerRack
402       if (numRacks > 1) {
403         for (int i = 0 ; i < serversPerRack.length; i++) {
404           int numRegionsPerRack = 0;
405           for (int j = 0; j < serversPerRack[i].length; j++) {
406             numRegionsPerRack += regionsPerServer[serversPerRack[i][j]].length;
407           }
408           regionsPerRack[i] = new int[numRegionsPerRack];
409           primariesOfRegionsPerRack[i] = new int[numRegionsPerRack];
410         }
411 
412         for (int i = 0 ; i < serversPerRack.length; i++) {
413           int numRegionPerRackIndex = 0;
414           for (int j = 0; j < serversPerRack[i].length; j++) {
415             for (int k = 0; k < regionsPerServer[serversPerRack[i][j]].length; k++) {
416               int region = regionsPerServer[serversPerRack[i][j]][k];
417               regionsPerRack[i][numRegionPerRackIndex] = region;
418               int primaryIndex = regionIndexToPrimaryIndex[region];
419               primariesOfRegionsPerRack[i][numRegionPerRackIndex] = primaryIndex;
420               numRegionPerRackIndex++;
421             }
422           }
423           // sort the regions by primaries.
424           Arrays.sort(primariesOfRegionsPerRack[i]);
425         }
426       }
427     }
428 
429     /** Helper for Cluster constructor to handle a region */
430     private void registerRegion(HRegionInfo region, int regionIndex, int serverIndex,
431         Map<String, Deque<RegionLoad>> loads, RegionLocationFinder regionFinder) {
432       String tableName = region.getTable().getNameAsString();
433       if (!tablesToIndex.containsKey(tableName)) {
434         tables.add(tableName);
435         tablesToIndex.put(tableName, tablesToIndex.size());
436       }
437       int tableIndex = tablesToIndex.get(tableName);
438 
439       regionsToIndex.put(region, regionIndex);
440       regions[regionIndex] = region;
441       regionIndexToServerIndex[regionIndex] = serverIndex;
442       initialRegionIndexToServerIndex[regionIndex] = serverIndex;
443       regionIndexToTableIndex[regionIndex] = tableIndex;
444 
445       // region load
446       if (loads != null) {
447         Deque<RegionLoad> rl = loads.get(region.getRegionNameAsString());
448         // That could have failed if the RegionLoad is using the other regionName
449         if (rl == null) {
450           // Try getting the region load using encoded name.
451           rl = loads.get(region.getEncodedName());
452         }
453         regionLoads[regionIndex] = rl;
454       }
455 
456       if (regionFinder != null) {
457         //region location
458         List<ServerName> loc = regionFinder.getTopBlockLocations(region);
459         regionLocations[regionIndex] = new int[loc.size()];
460         for (int i=0; i < loc.size(); i++) {
461           regionLocations[regionIndex][i] =
462               loc.get(i) == null ? -1 :
463                 (serversToIndex.get(loc.get(i).getHostAndPort()) == null ? -1
464                     : serversToIndex.get(loc.get(i).getHostAndPort()));
465         }
466       }
467     }
468 
469     /** An action to move or swap a region */
470     public static class Action {
471       public static enum Type {
472         ASSIGN_REGION,
473         MOVE_REGION,
474         SWAP_REGIONS,
475         NULL,
476       }
477 
478       public Type type;
479       public Action (Type type) {this.type = type;}
480       /** Returns an Action which would undo this action */
481       public Action undoAction() { return this; }
482       @Override
483       public String toString() { return type + ":";}
484     }
485 
486     public static class AssignRegionAction extends Action {
487       public int region;
488       public int server;
489       public AssignRegionAction(int region, int server) {
490         super(Type.ASSIGN_REGION);
491         this.region = region;
492         this.server = server;
493       }
494       @Override
495       public Action undoAction() {
496         // TODO implement this. This action is not being used by the StochasticLB for now
497         // in case it uses it, we should implement this function.
498         throw new NotImplementedException();
499       }
500       @Override
501       public String toString() {
502         return type + ": " + region + ":" + server;
503       }
504     }
505 
506     public static class MoveRegionAction extends Action {
507       public int region;
508       public int fromServer;
509       public int toServer;
510 
511       public MoveRegionAction(int region, int fromServer, int toServer) {
512         super(Type.MOVE_REGION);
513         this.fromServer = fromServer;
514         this.region = region;
515         this.toServer = toServer;
516       }
517       @Override
518       public Action undoAction() {
519         return new MoveRegionAction (region, toServer, fromServer);
520       }
521       @Override
522       public String toString() {
523         return type + ": " + region + ":" + fromServer + " -> " + toServer;
524       }
525     }
526 
527     public static class SwapRegionsAction extends Action {
528       public int fromServer;
529       public int fromRegion;
530       public int toServer;
531       public int toRegion;
532       public SwapRegionsAction(int fromServer, int fromRegion, int toServer, int toRegion) {
533         super(Type.SWAP_REGIONS);
534         this.fromServer = fromServer;
535         this.fromRegion = fromRegion;
536         this.toServer = toServer;
537         this.toRegion = toRegion;
538       }
539       @Override
540       public Action undoAction() {
541         return new SwapRegionsAction (fromServer, toRegion, toServer, fromRegion);
542       }
543       @Override
544       public String toString() {
545         return type + ": " + fromRegion + ":" + fromServer + " <-> " + toRegion + ":" + toServer;
546       }
547     }
548 
549     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NM_FIELD_NAMING_CONVENTION",
550         justification="Mistake. Too disruptive to change now")
551     public static final Action NullAction = new Action(Type.NULL);
552 
553     public void doAction(Action action) {
554       switch (action.type) {
555       case NULL: break;
556       case ASSIGN_REGION:
557         // FindBugs: Having the assert quietens FB BC_UNCONFIRMED_CAST warnings
558         assert action instanceof AssignRegionAction: action.getClass();
559         AssignRegionAction ar = (AssignRegionAction) action;
560         regionsPerServer[ar.server] = addRegion(regionsPerServer[ar.server], ar.region);
561         regionMoved(ar.region, -1, ar.server);
562         break;
563       case MOVE_REGION:
564         assert action instanceof MoveRegionAction: action.getClass();
565         MoveRegionAction mra = (MoveRegionAction) action;
566         regionsPerServer[mra.fromServer] = removeRegion(regionsPerServer[mra.fromServer], mra.region);
567         regionsPerServer[mra.toServer] = addRegion(regionsPerServer[mra.toServer], mra.region);
568         regionMoved(mra.region, mra.fromServer, mra.toServer);
569         break;
570       case SWAP_REGIONS:
571         assert action instanceof SwapRegionsAction: action.getClass();
572         SwapRegionsAction a = (SwapRegionsAction) action;
573         regionsPerServer[a.fromServer] = replaceRegion(regionsPerServer[a.fromServer], a.fromRegion, a.toRegion);
574         regionsPerServer[a.toServer] = replaceRegion(regionsPerServer[a.toServer], a.toRegion, a.fromRegion);
575         regionMoved(a.fromRegion, a.fromServer, a.toServer);
576         regionMoved(a.toRegion, a.toServer, a.fromServer);
577         break;
578       default:
579         throw new RuntimeException("Uknown action:" + action.type);
580       }
581     }
582 
583     /**
584      * Return true if the placement of region on server would lower the availability
585      * of the region in question
586      * @param server
587      * @param region
588      * @return true or false
589      */
590     boolean wouldLowerAvailability(HRegionInfo regionInfo, ServerName serverName) {
591       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
592         return false; // safeguard against race between cluster.servers and servers from LB method args
593       }
594       int server = serversToIndex.get(serverName.getHostAndPort());
595       int region = regionsToIndex.get(regionInfo);
596 
597       int primary = regionIndexToPrimaryIndex[region];
598 
599       // there is a subset relation for server < host < rack
600       // check server first
601 
602       if (contains(primariesOfRegionsPerServer[server], primary)) {
603         // check for whether there are other servers that we can place this region
604         for (int i = 0; i < primariesOfRegionsPerServer.length; i++) {
605           if (i != server && !contains(primariesOfRegionsPerServer[i], primary)) {
606             return true; // meaning there is a better server
607           }
608         }
609         return false; // there is not a better server to place this
610       }
611 
612       // check host
613       if (multiServersPerHost) { // these arrays would only be allocated if we have more than one server per host
614         int host = serverIndexToHostIndex[server];
615         if (contains(primariesOfRegionsPerHost[host], primary)) {
616           // check for whether there are other hosts that we can place this region
617           for (int i = 0; i < primariesOfRegionsPerHost.length; i++) {
618             if (i != host && !contains(primariesOfRegionsPerHost[i], primary)) {
619               return true; // meaning there is a better host
620             }
621           }
622           return false; // there is not a better host to place this
623         }
624       }
625 
626       // check rack
627       if (numRacks > 1) {
628         int rack = serverIndexToRackIndex[server];
629         if (contains(primariesOfRegionsPerRack[rack], primary)) {
630           // check for whether there are other racks that we can place this region
631           for (int i = 0; i < primariesOfRegionsPerRack.length; i++) {
632             if (i != rack && !contains(primariesOfRegionsPerRack[i], primary)) {
633               return true; // meaning there is a better rack
634             }
635           }
636           return false; // there is not a better rack to place this
637         }
638       }
639       return false;
640     }
641 
642     void doAssignRegion(HRegionInfo regionInfo, ServerName serverName) {
643       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
644         return;
645       }
646       int server = serversToIndex.get(serverName.getHostAndPort());
647       int region = regionsToIndex.get(regionInfo);
648       doAction(new AssignRegionAction(region, server));
649     }
650 
651     void regionMoved(int region, int oldServer, int newServer) {
652       regionIndexToServerIndex[region] = newServer;
653       if (initialRegionIndexToServerIndex[region] == newServer) {
654         numMovedRegions--; //region moved back to original location
655       } else if (oldServer >= 0 && initialRegionIndexToServerIndex[region] == oldServer) {
656         numMovedRegions++; //region moved from original location
657       }
658       int tableIndex = regionIndexToTableIndex[region];
659       if (oldServer >= 0) {
660         numRegionsPerServerPerTable[oldServer][tableIndex]--;
661       }
662       numRegionsPerServerPerTable[newServer][tableIndex]++;
663 
664       //check whether this caused maxRegionsPerTable in the new Server to be updated
665       if (numRegionsPerServerPerTable[newServer][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
666         numRegionsPerServerPerTable[newServer][tableIndex] = numMaxRegionsPerTable[tableIndex];
667       } else if (oldServer >= 0 && (numRegionsPerServerPerTable[oldServer][tableIndex] + 1)
668           == numMaxRegionsPerTable[tableIndex]) {
669         //recompute maxRegionsPerTable since the previous value was coming from the old server
670         for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
671           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
672             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
673           }
674         }
675       }
676 
677       // update for servers
678       int primary = regionIndexToPrimaryIndex[region];
679       if (oldServer >= 0) {
680         primariesOfRegionsPerServer[oldServer] = removeRegion(
681           primariesOfRegionsPerServer[oldServer], primary);
682       }
683       primariesOfRegionsPerServer[newServer] = addRegionSorted(
684         primariesOfRegionsPerServer[newServer], primary);
685 
686       // update for hosts
687       if (multiServersPerHost) {
688         int oldHost = oldServer >= 0 ? serverIndexToHostIndex[oldServer] : -1;
689         int newHost = serverIndexToHostIndex[newServer];
690         if (newHost != oldHost) {
691           regionsPerHost[newHost] = addRegion(regionsPerHost[newHost], region);
692           primariesOfRegionsPerHost[newHost] = addRegionSorted(primariesOfRegionsPerHost[newHost], primary);
693           if (oldHost >= 0) {
694             regionsPerHost[oldHost] = removeRegion(regionsPerHost[oldHost], region);
695             primariesOfRegionsPerHost[oldHost] = removeRegion(
696               primariesOfRegionsPerHost[oldHost], primary); // will still be sorted
697           }
698         }
699       }
700 
701       // update for racks
702       if (numRacks > 1) {
703         int oldRack = oldServer >= 0 ? serverIndexToRackIndex[oldServer] : -1;
704         int newRack = serverIndexToRackIndex[newServer];
705         if (newRack != oldRack) {
706           regionsPerRack[newRack] = addRegion(regionsPerRack[newRack], region);
707           primariesOfRegionsPerRack[newRack] = addRegionSorted(primariesOfRegionsPerRack[newRack], primary);
708           if (oldRack >= 0) {
709             regionsPerRack[oldRack] = removeRegion(regionsPerRack[oldRack], region);
710             primariesOfRegionsPerRack[oldRack] = removeRegion(
711               primariesOfRegionsPerRack[oldRack], primary); // will still be sorted
712           }
713         }
714       }
715     }
716 
717     int[] removeRegion(int[] regions, int regionIndex) {
718       //TODO: this maybe costly. Consider using linked lists
719       int[] newRegions = new int[regions.length - 1];
720       int i = 0;
721       for (i = 0; i < regions.length; i++) {
722         if (regions[i] == regionIndex) {
723           break;
724         }
725         newRegions[i] = regions[i];
726       }
727       System.arraycopy(regions, i+1, newRegions, i, newRegions.length - i);
728       return newRegions;
729     }
730 
731     int[] addRegion(int[] regions, int regionIndex) {
732       int[] newRegions = new int[regions.length + 1];
733       System.arraycopy(regions, 0, newRegions, 0, regions.length);
734       newRegions[newRegions.length - 1] = regionIndex;
735       return newRegions;
736     }
737 
738     int[] addRegionSorted(int[] regions, int regionIndex) {
739       int[] newRegions = new int[regions.length + 1];
740       int i = 0;
741       for (i = 0; i < regions.length; i++) { // find the index to insert
742         if (regions[i] > regionIndex) {
743           break;
744         }
745       }
746       System.arraycopy(regions, 0, newRegions, 0, i); // copy first half
747       System.arraycopy(regions, i, newRegions, i+1, regions.length - i); // copy second half
748       newRegions[i] = regionIndex;
749 
750       return newRegions;
751     }
752 
753     int[] replaceRegion(int[] regions, int regionIndex, int newRegionIndex) {
754       int i = 0;
755       for (i = 0; i < regions.length; i++) {
756         if (regions[i] == regionIndex) {
757           regions[i] = newRegionIndex;
758           break;
759         }
760       }
761       return regions;
762     }
763 
764     void sortServersByRegionCount() {
765       Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
766     }
767 
768     int getNumRegions(int server) {
769       return regionsPerServer[server].length;
770     }
771 
772     boolean contains(int[] arr, int val) {
773       return Arrays.binarySearch(arr, val) >= 0;
774     }
775 
776     private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
777       @Override
778       public int compare(Integer integer, Integer integer2) {
779         return Integer.valueOf(getNumRegions(integer)).compareTo(getNumRegions(integer2));
780       }
781     };
782 
783     void sortServersByLocality() {
784       Arrays.sort(serverIndicesSortedByLocality, localityComparator);
785     }
786 
787     float getLocality(int server) {
788       return localityPerServer[server];
789     }
790 
791     private Comparator<Integer> localityComparator = new Comparator<Integer>() {
792       @Override
793       public int compare(Integer integer, Integer integer2) {
794         float locality1 = getLocality(integer);
795         float locality2 = getLocality(integer2);
796         if (locality1 < locality2) {
797           return -1;
798         } else if (locality1 > locality2) {
799           return 1;
800         } else {
801           return 0;
802         }
803       }
804     };
805 
806     int getLowestLocalityRegionServer() {
807       if (regionFinder == null) {
808         return -1;
809       } else {
810         sortServersByLocality();
811         // We want to find server with non zero regions having lowest locality.
812         int i = 0;
813         int lowestLocalityServerIndex = serverIndicesSortedByLocality[i];
814         while (localityPerServer[lowestLocalityServerIndex] == 0
815             && (regionsPerServer[lowestLocalityServerIndex].length == 0)) {
816           i++;
817           lowestLocalityServerIndex = serverIndicesSortedByLocality[i];
818         }
819         if (LOG.isTraceEnabled()) {
820           LOG.trace("Lowest locality region server with non zero regions is "
821             + servers[lowestLocalityServerIndex].getHostname() + " with locality "
822             + localityPerServer[lowestLocalityServerIndex]);
823         }
824         return lowestLocalityServerIndex;
825       }
826     }
827 
828     int getLowestLocalityRegionOnServer(int serverIndex) {
829       if (regionFinder != null) {
830         float lowestLocality = 1.0f;
831         int lowestLocalityRegionIndex = 0;
832         if (regionsPerServer[serverIndex].length == 0) {
833           // No regions on that region server
834           return -1;
835         }
836         for (int j = 0; j < regionsPerServer[serverIndex].length; j++) {
837           int regionIndex = regionsPerServer[serverIndex][j];
838           HDFSBlocksDistribution distribution = regionFinder
839               .getBlockDistribution(regions[regionIndex]);
840           float locality = distribution.getBlockLocalityIndex(servers[serverIndex].getHostname());
841           if (locality < lowestLocality) {
842             lowestLocality = locality;
843             lowestLocalityRegionIndex = j;
844           }
845         }
846         if (LOG.isTraceEnabled()) {
847           LOG.trace(" Lowest locality region index is " + lowestLocalityRegionIndex
848             + " and its region server contains " + regionsPerServer[serverIndex].length
849             + " regions");
850         }
851         return regionsPerServer[serverIndex][lowestLocalityRegionIndex];
852       } else {
853         return -1;
854       }
855     }
856 
857     float getLocalityOfRegion(int region, int server) {
858       if (regionFinder != null) {
859         HDFSBlocksDistribution distribution = regionFinder.getBlockDistribution(regions[region]);
860         return distribution.getBlockLocalityIndex(servers[server].getHostname());
861       } else {
862         return 0f;
863       }
864     }
865 
866     int getLeastLoadedTopServerForRegion(int region) {
867       if (regionFinder != null) {
868         List<ServerName> topLocalServers = regionFinder.getTopBlockLocations(regions[region]);
869         int leastLoadedServerIndex = -1;
870         int load = Integer.MAX_VALUE;
871         for (ServerName sn : topLocalServers) {
872           if (!serversToIndex.containsKey(sn.getHostAndPort())) {
873             continue;
874           }
875           int index = serversToIndex.get(sn.getHostAndPort());
876           if (regionsPerServer[index] == null) {
877             continue;
878           }
879           int tempLoad = regionsPerServer[index].length;
880           if (tempLoad <= load) {
881             leastLoadedServerIndex = index;
882             load = tempLoad;
883           }
884         }
885         return leastLoadedServerIndex;
886       } else {
887         return -1;
888       }
889     }
890 
891     void calculateRegionServerLocalities() {
892       if (regionFinder == null) {
893         LOG.warn("Region location finder found null, skipping locality calculations.");
894         return;
895       }
896       for (int i = 0; i < regionsPerServer.length; i++) {
897         HDFSBlocksDistribution distribution = new HDFSBlocksDistribution();
898         if (regionsPerServer[i].length > 0) {
899           for (int j = 0; j < regionsPerServer[i].length; j++) {
900             int regionIndex = regionsPerServer[i][j];
901             distribution.add(regionFinder.getBlockDistribution(regions[regionIndex]));
902           }
903         } else {
904           LOG.debug("Server " + servers[i].getHostname() + " had 0 regions.");
905         }
906         localityPerServer[i] = distribution.getBlockLocalityIndex(servers[i].getHostname());
907       }
908     }
909 
910     @VisibleForTesting
911     protected void setNumRegions(int numRegions) {
912       this.numRegions = numRegions;
913     }
914 
915     @VisibleForTesting
916     protected void setNumMovedRegions(int numMovedRegions) {
917       this.numMovedRegions = numMovedRegions;
918     }
919 
920     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SBSC_USE_STRINGBUFFER_CONCATENATION",
921         justification="Not important but should be fixed")
922     @Override
923     public String toString() {
924       String desc = "Cluster{" +
925           "servers=[";
926           for(ServerName sn:servers) {
927              desc += sn.getHostAndPort() + ", ";
928           }
929           desc +=
930           ", serverIndicesSortedByRegionCount="+
931           Arrays.toString(serverIndicesSortedByRegionCount) +
932           ", regionsPerServer=[";
933 
934           for (int[]r:regionsPerServer) {
935             desc += Arrays.toString(r);
936           }
937           desc += "]" +
938           ", numMaxRegionsPerTable=" +
939           Arrays.toString(numMaxRegionsPerTable) +
940           ", numRegions=" +
941           numRegions +
942           ", numServers=" +
943           numServers +
944           ", numTables=" +
945           numTables +
946           ", numMovedRegions=" +
947           numMovedRegions +
948           '}';
949       return desc;
950     }
951   }
952 
953   // slop for regions
954   protected float slop;
955   protected Configuration config;
956   protected RackManager rackManager;
957   private static final Random RANDOM = new Random(System.currentTimeMillis());
958   private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
959 
960   // Regions of these tables are put on the master by default.
961   private static final String[] DEFAULT_TABLES_ON_MASTER =
962     new String[] {AccessControlLists.ACL_TABLE_NAME.getNameAsString(),
963       TableName.NAMESPACE_TABLE_NAME.getNameAsString(),
964       TableName.META_TABLE_NAME.getNameAsString()};
965 
966   public static final String TABLES_ON_MASTER =
967     "hbase.balancer.tablesOnMaster";
968 
969   protected final Set<String> tablesOnMaster = new HashSet<String>();
970   protected MetricsBalancer metricsBalancer = null;
971   protected ClusterStatus clusterStatus = null;
972   protected ServerName masterServerName;
973   protected MasterServices services;
974 
975   /**
976    * By default, regions of some small system tables such as meta,
977    * namespace, and acl are assigned to the active master. If you don't
978    * want to assign any region to the active master, you need to
979    * configure "hbase.balancer.tablesOnMaster" to "none".
980    */
981   protected static String[] getTablesOnMaster(Configuration conf) {
982     String valueString = conf.get(TABLES_ON_MASTER);
983     if (valueString == null) {
984       return DEFAULT_TABLES_ON_MASTER;
985     }
986     valueString = valueString.trim();
987     if (valueString.equalsIgnoreCase("none")) {
988       return null;
989     }
990     return StringUtils.getStrings(valueString);
991   }
992 
993   /**
994    * Check if configured to put any tables on the active master
995    */
996   public static boolean tablesOnMaster(Configuration conf) {
997     String[] tables = getTablesOnMaster(conf);
998     return tables != null && tables.length > 0;
999   }
1000 
1001   @Override
1002   public void setConf(Configuration conf) {
1003     setSlop(conf);
1004     if (slop < 0) slop = 0;
1005     else if (slop > 1) slop = 1;
1006 
1007     this.config = conf;
1008     String[] tables = getTablesOnMaster(conf);
1009     if (tables != null && tables.length > 0) {
1010       Collections.addAll(tablesOnMaster, tables);
1011     }
1012     this.rackManager = new RackManager(getConf());
1013     regionFinder.setConf(conf);
1014   }
1015 
1016   protected void setSlop(Configuration conf) {
1017     this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
1018   }
1019 
1020   /**
1021    * Check if a region belongs to some small system table.
1022    * If so, the primary replica may be expected to be put on the master regionserver.
1023    */
1024   public boolean shouldBeOnMaster(HRegionInfo region) {
1025     return tablesOnMaster.contains(region.getTable().getNameAsString())
1026         && region.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID;
1027   }
1028 
1029   /**
1030    * Balance the regions that should be on master regionserver.
1031    */
1032   protected List<RegionPlan> balanceMasterRegions(
1033       Map<ServerName, List<HRegionInfo>> clusterMap) {
1034     if (masterServerName == null
1035         || clusterMap == null || clusterMap.size() <= 1) return null;
1036     List<RegionPlan> plans = null;
1037     List<HRegionInfo> regions = clusterMap.get(masterServerName);
1038     if (regions != null) {
1039       Iterator<ServerName> keyIt = null;
1040       for (HRegionInfo region: regions) {
1041         if (shouldBeOnMaster(region)) continue;
1042 
1043         // Find a non-master regionserver to host the region
1044         if (keyIt == null || !keyIt.hasNext()) {
1045           keyIt = clusterMap.keySet().iterator();
1046         }
1047         ServerName dest = keyIt.next();
1048         if (masterServerName.equals(dest)) {
1049           if (!keyIt.hasNext()) {
1050             keyIt = clusterMap.keySet().iterator();
1051           }
1052           dest = keyIt.next();
1053         }
1054 
1055         // Move this region away from the master regionserver
1056         RegionPlan plan = new RegionPlan(region, masterServerName, dest);
1057         if (plans == null) {
1058           plans = new ArrayList<RegionPlan>();
1059         }
1060         plans.add(plan);
1061       }
1062     }
1063     for (Map.Entry<ServerName, List<HRegionInfo>> server: clusterMap.entrySet()) {
1064       if (masterServerName.equals(server.getKey())) continue;
1065       for (HRegionInfo region: server.getValue()) {
1066         if (!shouldBeOnMaster(region)) continue;
1067 
1068         // Move this region to the master regionserver
1069         RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName);
1070         if (plans == null) {
1071           plans = new ArrayList<RegionPlan>();
1072         }
1073         plans.add(plan);
1074       }
1075     }
1076     return plans;
1077   }
1078 
1079   /**
1080    * Assign the regions that should be on master regionserver.
1081    */
1082   protected Map<ServerName, List<HRegionInfo>> assignMasterRegions(
1083       Collection<HRegionInfo> regions, List<ServerName> servers) {
1084     if (servers == null || regions == null || regions.isEmpty()) {
1085       return null;
1086     }
1087     Map<ServerName, List<HRegionInfo>> assignments
1088       = new TreeMap<ServerName, List<HRegionInfo>>();
1089     if (masterServerName != null && servers.contains(masterServerName)) {
1090       assignments.put(masterServerName, new ArrayList<HRegionInfo>());
1091       for (HRegionInfo region: regions) {
1092         if (shouldBeOnMaster(region)) {
1093           assignments.get(masterServerName).add(region);
1094         }
1095       }
1096     }
1097     return assignments;
1098   }
1099 
1100   @Override
1101   public Configuration getConf() {
1102     return this.config;
1103   }
1104 
1105   @Override
1106   public synchronized void setClusterStatus(ClusterStatus st) {
1107     this.clusterStatus = st;
1108     regionFinder.setClusterStatus(st);
1109   }
1110 
1111   @Override
1112   public void setMasterServices(MasterServices masterServices) {
1113     masterServerName = masterServices.getServerName();
1114     this.services = masterServices;
1115     this.regionFinder.setServices(masterServices);
1116   }
1117 
1118   public void setRackManager(RackManager rackManager) {
1119     this.rackManager = rackManager;
1120   }
1121 
1122   protected boolean needsBalance(Cluster c) {
1123     ClusterLoadState cs = new ClusterLoadState(c.clusterState);
1124     if (cs.getNumServers() < MIN_SERVER_BALANCE) {
1125       if (LOG.isDebugEnabled()) {
1126         LOG.debug("Not running balancer because only " + cs.getNumServers()
1127             + " active regionserver(s)");
1128       }
1129       return false;
1130     }
1131     if(areSomeRegionReplicasColocated(c)) return true;
1132     // Check if we even need to do any load balancing
1133     // HBASE-3681 check sloppiness first
1134     float average = cs.getLoadAverage(); // for logging
1135     int floor = (int) Math.floor(average * (1 - slop));
1136     int ceiling = (int) Math.ceil(average * (1 + slop));
1137     if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
1138       NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
1139       if (LOG.isTraceEnabled()) {
1140         // If nothing to balance, then don't say anything unless trace-level logging.
1141         LOG.trace("Skipping load balancing because balanced cluster; " +
1142           "servers=" + cs.getNumServers() +
1143           " regions=" + cs.getNumRegions() + " average=" + average +
1144           " mostloaded=" + serversByLoad.lastKey().getLoad() +
1145           " leastloaded=" + serversByLoad.firstKey().getLoad());
1146       }
1147       return false;
1148     }
1149     return true;
1150   }
1151 
1152   /**
1153    * Subclasses should implement this to return true if the cluster has nodes that hosts
1154    * multiple replicas for the same region, or, if there are multiple racks and the same
1155    * rack hosts replicas of the same region
1156    * @param c Cluster information
1157    * @return whether region replicas are currently co-located
1158    */
1159   protected boolean areSomeRegionReplicasColocated(Cluster c) {
1160     return false;
1161   }
1162 
1163   /**
1164    * Generates a bulk assignment plan to be used on cluster startup using a
1165    * simple round-robin assignment.
1166    * <p>
1167    * Takes a list of all the regions and all the servers in the cluster and
1168    * returns a map of each server to the regions that it should be assigned.
1169    * <p>
1170    * Currently implemented as a round-robin assignment. Same invariant as load
1171    * balancing, all servers holding floor(avg) or ceiling(avg).
1172    *
1173    * TODO: Use block locations from HDFS to place regions with their blocks
1174    *
1175    * @param regions all regions
1176    * @param servers all servers
1177    * @return map of server to the regions it should take, or null if no
1178    *         assignment is possible (ie. no regions or no servers)
1179    */
1180   @Override
1181   public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
1182       List<ServerName> servers) {
1183     metricsBalancer.incrMiscInvocations();
1184     Map<ServerName, List<HRegionInfo>> assignments = assignMasterRegions(regions, servers);
1185     if (assignments != null && !assignments.isEmpty()) {
1186       servers = new ArrayList<ServerName>(servers);
1187       // Guarantee not to put other regions on master
1188       servers.remove(masterServerName);
1189       List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1190       if (!masterRegions.isEmpty()) {
1191         regions = new ArrayList<HRegionInfo>(regions);
1192         for (HRegionInfo region: masterRegions) {
1193           regions.remove(region);
1194         }
1195       }
1196     }
1197     if (regions == null || regions.isEmpty()) {
1198       return assignments;
1199     }
1200 
1201     int numServers = servers == null ? 0 : servers.size();
1202     if (numServers == 0) {
1203       LOG.warn("Wanted to do round robin assignment but no servers to assign to");
1204       return null;
1205     }
1206 
1207     // TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the
1208     // normal LB.balancerCluster() with unassignedRegions. We only need to have a candidate
1209     // generator for AssignRegionAction. The LB will ensure the regions are mostly local
1210     // and balanced. This should also run fast with fewer number of iterations.
1211 
1212     if (numServers == 1) { // Only one server, nothing fancy we can do here
1213       ServerName server = servers.get(0);
1214       assignments.put(server, new ArrayList<HRegionInfo>(regions));
1215       return assignments;
1216     }
1217 
1218     Cluster cluster = createCluster(servers, regions);
1219     List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>();
1220 
1221     roundRobinAssignment(cluster, regions, unassignedRegions,
1222       servers, assignments);
1223 
1224     List<HRegionInfo> lastFewRegions = new ArrayList<HRegionInfo>();
1225     // assign the remaining by going through the list and try to assign to servers one-by-one
1226     int serverIdx = RANDOM.nextInt(numServers);
1227     for (HRegionInfo region : unassignedRegions) {
1228       boolean assigned = false;
1229       for (int j = 0; j < numServers; j++) { // try all servers one by one
1230         ServerName serverName = servers.get((j + serverIdx) % numServers);
1231         if (!cluster.wouldLowerAvailability(region, serverName)) {
1232           List<HRegionInfo> serverRegions = assignments.get(serverName);
1233           if (serverRegions == null) {
1234             serverRegions = new ArrayList<HRegionInfo>();
1235             assignments.put(serverName, serverRegions);
1236           }
1237           serverRegions.add(region);
1238           cluster.doAssignRegion(region, serverName);
1239           serverIdx = (j + serverIdx + 1) % numServers; //remain from next server
1240           assigned = true;
1241           break;
1242         }
1243       }
1244       if (!assigned) {
1245         lastFewRegions.add(region);
1246       }
1247     }
1248     // just sprinkle the rest of the regions on random regionservers. The balanceCluster will
1249     // make it optimal later. we can end up with this if numReplicas > numServers.
1250     for (HRegionInfo region : lastFewRegions) {
1251       int i = RANDOM.nextInt(numServers);
1252       ServerName server = servers.get(i);
1253       List<HRegionInfo> serverRegions = assignments.get(server);
1254       if (serverRegions == null) {
1255         serverRegions = new ArrayList<HRegionInfo>();
1256         assignments.put(server, serverRegions);
1257       }
1258       serverRegions.add(region);
1259       cluster.doAssignRegion(region, server);
1260     }
1261     return assignments;
1262   }
1263 
1264   protected Cluster createCluster(List<ServerName> servers,
1265       Collection<HRegionInfo> regions) {
1266     // Get the snapshot of the current assignments for the regions in question, and then create
1267     // a cluster out of it. Note that we might have replicas already assigned to some servers
1268     // earlier. So we want to get the snapshot to see those assignments, but this will only contain
1269     // replicas of the regions that are passed (for performance).
1270     Map<ServerName, List<HRegionInfo>> clusterState = getRegionAssignmentsByServer(regions);
1271 
1272     for (ServerName server : servers) {
1273       if (!clusterState.containsKey(server)) {
1274         clusterState.put(server, EMPTY_REGION_LIST);
1275       }
1276     }
1277     return new Cluster(regions, clusterState, null, this.regionFinder,
1278       rackManager);
1279   }
1280 
1281   /**
1282    * Used to assign a single region to a random server.
1283    */
1284   @Override
1285   public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
1286     metricsBalancer.incrMiscInvocations();
1287     if (servers != null && servers.contains(masterServerName)) {
1288       if (shouldBeOnMaster(regionInfo)) {
1289         return masterServerName;
1290       }
1291       servers = new ArrayList<ServerName>(servers);
1292       // Guarantee not to put other regions on master
1293       servers.remove(masterServerName);
1294     }
1295 
1296     int numServers = servers == null ? 0 : servers.size();
1297     if (numServers == 0) {
1298       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1299       return null;
1300     }
1301     if (numServers == 1) { // Only one server, nothing fancy we can do here
1302       return servers.get(0);
1303     }
1304 
1305     List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
1306     Cluster cluster = createCluster(servers, regions);
1307     return randomAssignment(cluster, regionInfo, servers);
1308   }
1309 
1310   /**
1311    * Generates a bulk assignment startup plan, attempting to reuse the existing
1312    * assignment information from META, but adjusting for the specified list of
1313    * available/online servers available for assignment.
1314    * <p>
1315    * Takes a map of all regions to their existing assignment from META. Also
1316    * takes a list of online servers for regions to be assigned to. Attempts to
1317    * retain all assignment, so in some instances initial assignment will not be
1318    * completely balanced.
1319    * <p>
1320    * Any leftover regions without an existing server to be assigned to will be
1321    * assigned randomly to available servers.
1322    *
1323    * @param regions regions and existing assignment from meta
1324    * @param servers available servers
1325    * @return map of servers and regions to be assigned to them
1326    */
1327   @Override
1328   public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
1329       List<ServerName> servers) {
1330     // Update metrics
1331     metricsBalancer.incrMiscInvocations();
1332     Map<ServerName, List<HRegionInfo>> assignments
1333       = assignMasterRegions(regions.keySet(), servers);
1334     if (assignments != null && !assignments.isEmpty()) {
1335       servers = new ArrayList<ServerName>(servers);
1336       // Guarantee not to put other regions on master
1337       servers.remove(masterServerName);
1338       List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1339       if (!masterRegions.isEmpty()) {
1340         regions = new HashMap<HRegionInfo, ServerName>(regions);
1341         for (HRegionInfo region: masterRegions) {
1342           regions.remove(region);
1343         }
1344       }
1345     }
1346     if (regions == null || regions.isEmpty()) {
1347       return assignments;
1348     }
1349 
1350     int numServers = servers == null ? 0 : servers.size();
1351     if (numServers == 0) {
1352       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1353       return null;
1354     }
1355     if (numServers == 1) { // Only one server, nothing fancy we can do here
1356       ServerName server = servers.get(0);
1357       assignments.put(server, new ArrayList<HRegionInfo>(regions.keySet()));
1358       return assignments;
1359     }
1360 
1361     // Group all of the old assignments by their hostname.
1362     // We can't group directly by ServerName since the servers all have
1363     // new start-codes.
1364 
1365     // Group the servers by their hostname. It's possible we have multiple
1366     // servers on the same host on different ports.
1367     ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
1368     for (ServerName server : servers) {
1369       assignments.put(server, new ArrayList<HRegionInfo>());
1370       serversByHostname.put(server.getHostname(), server);
1371     }
1372 
1373     // Collection of the hostnames that used to have regions
1374     // assigned, but for which we no longer have any RS running
1375     // after the cluster restart.
1376     Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
1377 
1378     int numRandomAssignments = 0;
1379     int numRetainedAssigments = 0;
1380 
1381     Cluster cluster = createCluster(servers, regions.keySet());
1382 
1383     for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
1384       HRegionInfo region = entry.getKey();
1385       ServerName oldServerName = entry.getValue();
1386       List<ServerName> localServers = new ArrayList<ServerName>();
1387       if (oldServerName != null) {
1388         localServers = serversByHostname.get(oldServerName.getHostname());
1389       }
1390       if (localServers.isEmpty()) {
1391         // No servers on the new cluster match up with this hostname,
1392         // assign randomly.
1393         ServerName randomServer = randomAssignment(cluster, region, servers);
1394         assignments.get(randomServer).add(region);
1395         numRandomAssignments++;
1396         if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
1397       } else if (localServers.size() == 1) {
1398         // the usual case - one new server on same host
1399         ServerName target = localServers.get(0);
1400         assignments.get(target).add(region);
1401         cluster.doAssignRegion(region, target);
1402         numRetainedAssigments++;
1403       } else {
1404         // multiple new servers in the cluster on this same host
1405         if (localServers.contains(oldServerName)) {
1406           assignments.get(oldServerName).add(region);
1407           cluster.doAssignRegion(region, oldServerName);
1408         } else {
1409           ServerName target = null;
1410           for (ServerName tmp: localServers) {
1411             if (tmp.getPort() == oldServerName.getPort()) {
1412               target = tmp;
1413               break;
1414             }
1415           }
1416           if (target == null) {
1417             target = randomAssignment(cluster, region, localServers);
1418           }
1419           assignments.get(target).add(region);
1420         }
1421         numRetainedAssigments++;
1422       }
1423     }
1424 
1425     String randomAssignMsg = "";
1426     if (numRandomAssignments > 0) {
1427       randomAssignMsg =
1428           numRandomAssignments + " regions were assigned "
1429               + "to random hosts, since the old hosts for these regions are no "
1430               + "longer present in the cluster. These hosts were:\n  "
1431               + Joiner.on("\n  ").join(oldHostsNoLongerPresent);
1432     }
1433 
1434     LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
1435         + " retained the pre-restart assignment. " + randomAssignMsg);
1436     return assignments;
1437   }
1438 
1439   @Override
1440   public void initialize() throws HBaseIOException{
1441   }
1442 
1443   @Override
1444   public void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1445   }
1446 
1447   @Override
1448   public void regionOffline(HRegionInfo regionInfo) {
1449   }
1450 
1451   @Override
1452   public boolean isStopped() {
1453     return stopped;
1454   }
1455 
1456   @Override
1457   public void stop(String why) {
1458     LOG.info("Load Balancer stop requested: "+why);
1459     stopped = true;
1460   }
1461 
1462   /**
1463    * Used to assign a single region to a random server.
1464    */
1465   private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo,
1466       List<ServerName> servers) {
1467     int numServers = servers.size(); // servers is not null, numServers > 1
1468     ServerName sn = null;
1469     final int maxIterations = numServers * 4;
1470     int iterations = 0;
1471 
1472     do {
1473       int i = RANDOM.nextInt(numServers);
1474       sn = servers.get(i);
1475     } while (cluster.wouldLowerAvailability(regionInfo, sn)
1476         && iterations++ < maxIterations);
1477     cluster.doAssignRegion(regionInfo, sn);
1478     return sn;
1479   }
1480 
1481   /**
1482    * Round robin a list of regions to a list of servers
1483    */
1484   private void roundRobinAssignment(Cluster cluster, List<HRegionInfo> regions,
1485       List<HRegionInfo> unassignedRegions, List<ServerName> servers,
1486       Map<ServerName, List<HRegionInfo>> assignments) {
1487 
1488     int numServers = servers.size();
1489     int numRegions = regions.size();
1490     int max = (int) Math.ceil((float) numRegions / numServers);
1491     int serverIdx = 0;
1492     if (numServers > 1) {
1493       serverIdx = RANDOM.nextInt(numServers);
1494     }
1495     int regionIdx = 0;
1496 
1497     for (int j = 0; j < numServers; j++) {
1498       ServerName server = servers.get((j + serverIdx) % numServers);
1499       List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
1500       for (int i = regionIdx; i < numRegions; i += numServers) {
1501         HRegionInfo region = regions.get(i % numRegions);
1502         if (cluster.wouldLowerAvailability(region, server)) {
1503           unassignedRegions.add(region);
1504         } else {
1505           serverRegions.add(region);
1506           cluster.doAssignRegion(region, server);
1507         }
1508       }
1509       assignments.put(server, serverRegions);
1510       regionIdx++;
1511     }
1512   }
1513 
1514   protected Map<ServerName, List<HRegionInfo>> getRegionAssignmentsByServer(
1515     Collection<HRegionInfo> regions) {
1516     if (this.services != null && this.services.getAssignmentManager() != null) {
1517       return this.services.getAssignmentManager().getSnapShotOfAssignment(regions);
1518     } else {
1519       return new HashMap<ServerName, List<HRegionInfo>>();
1520     }
1521   }
1522 
1523   @Override
1524   public void onConfigurationChange(Configuration conf) {
1525   }
1526 }