View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Collection;
23  import java.util.Collections;
24  import java.util.Comparator;
25  import java.util.Deque;
26  import java.util.HashMap;
27  import java.util.HashSet;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.Map.Entry;
32  import java.util.NavigableMap;
33  import java.util.Random;
34  import java.util.Set;
35  import java.util.TreeMap;
36  
37  import org.apache.commons.lang.NotImplementedException;
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.conf.Configuration;
41  import org.apache.hadoop.hbase.ClusterStatus;
42  import org.apache.hadoop.hbase.HBaseIOException;
43  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
44  import org.apache.hadoop.hbase.HRegionInfo;
45  import org.apache.hadoop.hbase.RegionLoad;
46  import org.apache.hadoop.hbase.ServerName;
47  import org.apache.hadoop.hbase.TableName;
48  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
49  import org.apache.hadoop.hbase.master.LoadBalancer;
50  import org.apache.hadoop.hbase.master.MasterServices;
51  import org.apache.hadoop.hbase.master.RackManager;
52  import org.apache.hadoop.hbase.master.RegionPlan;
53  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
54  import org.apache.hadoop.hbase.security.access.AccessControlLists;
55  import org.apache.hadoop.util.StringUtils;
56  
57  import com.google.common.annotations.VisibleForTesting;
58  import com.google.common.base.Joiner;
59  import com.google.common.collect.ArrayListMultimap;
60  import com.google.common.collect.Lists;
61  import com.google.common.collect.Sets;
62  
63  /**
64   * The base class for load balancers. It provides the the functions used to by
65   * {@link org.apache.hadoop.hbase.master.AssignmentManager} to assign regions
66   * in the edge cases. It doesn't provide an implementation of the
67   * actual balancing algorithm.
68   *
69   */
70  public abstract class BaseLoadBalancer implements LoadBalancer {
71    private static final int MIN_SERVER_BALANCE = 2;
72    private volatile boolean stopped = false;
73  
74    private static final List<HRegionInfo> EMPTY_REGION_LIST = new ArrayList<HRegionInfo>(0);
75  
76    protected final RegionLocationFinder regionFinder = new RegionLocationFinder();
77  
78    private static class DefaultRackManager extends RackManager {
79      @Override
80      public String getRack(ServerName server) {
81        return UNKNOWN_RACK;
82      }
83    }
84  
85    /**
86     * The constructor that uses the basic MetricsBalancer
87     */
88    protected BaseLoadBalancer() {
89      metricsBalancer = new MetricsBalancer();
90    }
91  
92    /**
93     * This Constructor accepts an instance of MetricsBalancer,
94     * which will be used instead of creating a new one
95     */
96    protected BaseLoadBalancer(MetricsBalancer metricsBalancer) {
97      this.metricsBalancer = (metricsBalancer != null) ? metricsBalancer : new MetricsBalancer();
98    }
99  
100   /**
101    * An efficient array based implementation similar to ClusterState for keeping
102    * the status of the cluster in terms of region assignment and distribution.
103    * LoadBalancers, such as StochasticLoadBalancer uses this Cluster object because of
104    * hundreds of thousands of hashmap manipulations are very costly, which is why this
105    * class uses mostly indexes and arrays.
106    *
107    * Cluster tracks a list of unassigned regions, region assignments, and the server
108    * topology in terms of server names, hostnames and racks.
109    */
110   protected static class Cluster {
111     ServerName[] servers;
112     String[] hosts; // ServerName uniquely identifies a region server. multiple RS can run on the same host
113     String[] racks;
114     boolean multiServersPerHost = false; // whether or not any host has more than one server
115 
116     ArrayList<String> tables;
117     HRegionInfo[] regions;
118     Deque<RegionLoad>[] regionLoads;
119     private RegionLocationFinder regionFinder;
120 
121     int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality
122 
123     int[]   serverIndexToHostIndex;      //serverIndex -> host index
124     int[]   serverIndexToRackIndex;      //serverIndex -> rack index
125 
126     int[][] regionsPerServer;            //serverIndex -> region list
127     int[][] regionsPerHost;              //hostIndex -> list of regions
128     int[][] regionsPerRack;              //rackIndex -> region list
129     int[][] primariesOfRegionsPerServer; //serverIndex -> sorted list of regions by primary region index
130     int[][] primariesOfRegionsPerHost;   //hostIndex -> sorted list of regions by primary region index
131     int[][] primariesOfRegionsPerRack;   //rackIndex -> sorted list of regions by primary region index
132 
133     int[][] serversPerHost;              //hostIndex -> list of server indexes
134     int[][] serversPerRack;              //rackIndex -> list of server indexes
135     int[]   regionIndexToServerIndex;    //regionIndex -> serverIndex
136     int[]   initialRegionIndexToServerIndex;    //regionIndex -> serverIndex (initial cluster state)
137     int[]   regionIndexToTableIndex;     //regionIndex -> tableIndex
138     int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions
139     int[]   numMaxRegionsPerTable;       //tableIndex -> max number of regions in a single RS
140     int[]   regionIndexToPrimaryIndex;   //regionIndex -> regionIndex of the primary
141     boolean hasRegionReplicas = false;   //whether there is regions with replicas
142 
143     Integer[] serverIndicesSortedByRegionCount;
144     Integer[] serverIndicesSortedByLocality;
145 
146     Map<String, Integer> serversToIndex;
147     Map<String, Integer> hostsToIndex;
148     Map<String, Integer> racksToIndex;
149     Map<String, Integer> tablesToIndex;
150     Map<HRegionInfo, Integer> regionsToIndex;
151     float[] localityPerServer;
152 
153     int numServers;
154     int numHosts;
155     int numRacks;
156     int numTables;
157     int numRegions;
158 
159     int numMovedRegions = 0; //num moved regions from the initial configuration
160     Map<ServerName, List<HRegionInfo>> clusterState;
161 
162     protected final RackManager rackManager;
163 
164     protected Cluster(
165         Map<ServerName, List<HRegionInfo>> clusterState,
166         Map<String, Deque<RegionLoad>> loads,
167         RegionLocationFinder regionFinder,
168         RackManager rackManager) {
169       this(null, clusterState, loads, regionFinder,
170         rackManager);
171     }
172 
173     @SuppressWarnings("unchecked")
174     protected Cluster(
175         Collection<HRegionInfo> unassignedRegions,
176         Map<ServerName, List<HRegionInfo>> clusterState,
177         Map<String, Deque<RegionLoad>> loads,
178         RegionLocationFinder regionFinder,
179         RackManager rackManager) {
180 
181       if (unassignedRegions == null) {
182         unassignedRegions = EMPTY_REGION_LIST;
183       }
184 
185       serversToIndex = new HashMap<String, Integer>();
186       hostsToIndex = new HashMap<String, Integer>();
187       racksToIndex = new HashMap<String, Integer>();
188       tablesToIndex = new HashMap<String, Integer>();
189 
190       //TODO: We should get the list of tables from master
191       tables = new ArrayList<String>();
192       this.rackManager = rackManager != null ? rackManager : new DefaultRackManager();
193 
194       numRegions = 0;
195 
196       List<List<Integer>> serversPerHostList = new ArrayList<List<Integer>>();
197       List<List<Integer>> serversPerRackList = new ArrayList<List<Integer>>();
198       this.clusterState = clusterState;
199       this.regionFinder = regionFinder;
200 
201       // Use servername and port as there can be dead servers in this list. We want everything with
202       // a matching hostname and port to have the same index.
203       for (ServerName sn : clusterState.keySet()) {
204         if (serversToIndex.get(sn.getHostAndPort()) == null) {
205           serversToIndex.put(sn.getHostAndPort(), numServers++);
206         }
207         if (!hostsToIndex.containsKey(sn.getHostname())) {
208           hostsToIndex.put(sn.getHostname(), numHosts++);
209           serversPerHostList.add(new ArrayList<Integer>(1));
210         }
211 
212         int serverIndex = serversToIndex.get(sn.getHostAndPort());
213         int hostIndex = hostsToIndex.get(sn.getHostname());
214         serversPerHostList.get(hostIndex).add(serverIndex);
215 
216         String rack = this.rackManager.getRack(sn);
217         if (!racksToIndex.containsKey(rack)) {
218           racksToIndex.put(rack, numRacks++);
219           serversPerRackList.add(new ArrayList<Integer>());
220         }
221         int rackIndex = racksToIndex.get(rack);
222         serversPerRackList.get(rackIndex).add(serverIndex);
223       }
224 
225       // Count how many regions there are.
226       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
227         numRegions += entry.getValue().size();
228       }
229       numRegions += unassignedRegions.size();
230 
231       regionsToIndex = new HashMap<HRegionInfo, Integer>(numRegions);
232       servers = new ServerName[numServers];
233       serversPerHost = new int[numHosts][];
234       serversPerRack = new int[numRacks][];
235       regions = new HRegionInfo[numRegions];
236       regionIndexToServerIndex = new int[numRegions];
237       initialRegionIndexToServerIndex = new int[numRegions];
238       regionIndexToTableIndex = new int[numRegions];
239       regionIndexToPrimaryIndex = new int[numRegions];
240       regionLoads = new Deque[numRegions];
241       regionLocations = new int[numRegions][];
242       serverIndicesSortedByRegionCount = new Integer[numServers];
243       serverIndicesSortedByLocality = new Integer[numServers];
244       localityPerServer = new float[numServers];
245 
246       serverIndexToHostIndex = new int[numServers];
247       serverIndexToRackIndex = new int[numServers];
248       regionsPerServer = new int[numServers][];
249       regionsPerHost = new int[numHosts][];
250       regionsPerRack = new int[numRacks][];
251       primariesOfRegionsPerServer = new int[numServers][];
252       primariesOfRegionsPerHost = new int[numHosts][];
253       primariesOfRegionsPerRack = new int[numRacks][];
254 
255       int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
256 
257       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
258         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
259 
260         // keep the servername if this is the first server name for this hostname
261         // or this servername has the newest startcode.
262         if (servers[serverIndex] == null ||
263             servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
264           servers[serverIndex] = entry.getKey();
265         }
266 
267         if (regionsPerServer[serverIndex] != null) {
268           // there is another server with the same hostAndPort in ClusterState.
269           // allocate the array for the total size
270           regionsPerServer[serverIndex] = new int[entry.getValue().size() + regionsPerServer[serverIndex].length];
271         } else {
272           regionsPerServer[serverIndex] = new int[entry.getValue().size()];
273         }
274         primariesOfRegionsPerServer[serverIndex] = new int[regionsPerServer[serverIndex].length];
275         serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
276         serverIndicesSortedByLocality[serverIndex] = serverIndex;
277       }
278 
279       hosts = new String[numHosts];
280       for (Entry<String, Integer> entry : hostsToIndex.entrySet()) {
281         hosts[entry.getValue()] = entry.getKey();
282       }
283       racks = new String[numRacks];
284       for (Entry<String, Integer> entry : racksToIndex.entrySet()) {
285         racks[entry.getValue()] = entry.getKey();
286       }
287 
288       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
289         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
290         regionPerServerIndex = 0;
291 
292         int hostIndex = hostsToIndex.get(entry.getKey().getHostname());
293         serverIndexToHostIndex[serverIndex] = hostIndex;
294 
295         int rackIndex = racksToIndex.get(this.rackManager.getRack(entry.getKey()));
296         serverIndexToRackIndex[serverIndex] = rackIndex;
297 
298         for (HRegionInfo region : entry.getValue()) {
299           registerRegion(region, regionIndex, serverIndex, loads, regionFinder);
300 
301           regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
302           regionIndex++;
303         }
304       }
305       for (HRegionInfo region : unassignedRegions) {
306         registerRegion(region, regionIndex, -1, loads, regionFinder);
307         regionIndex++;
308       }
309 
310       for (int i = 0; i < serversPerHostList.size(); i++) {
311         serversPerHost[i] = new int[serversPerHostList.get(i).size()];
312         for (int j = 0; j < serversPerHost[i].length; j++) {
313           serversPerHost[i][j] = serversPerHostList.get(i).get(j);
314         }
315         if (serversPerHost[i].length > 1) {
316           multiServersPerHost = true;
317         }
318       }
319 
320       for (int i = 0; i < serversPerRackList.size(); i++) {
321         serversPerRack[i] = new int[serversPerRackList.get(i).size()];
322         for (int j = 0; j < serversPerRack[i].length; j++) {
323           serversPerRack[i][j] = serversPerRackList.get(i).get(j);
324         }
325       }
326 
327       numTables = tables.size();
328       numRegionsPerServerPerTable = new int[numServers][numTables];
329 
330       for (int i = 0; i < numServers; i++) {
331         for (int j = 0; j < numTables; j++) {
332           numRegionsPerServerPerTable[i][j] = 0;
333         }
334       }
335 
336       for (int i=0; i < regionIndexToServerIndex.length; i++) {
337         if (regionIndexToServerIndex[i] >= 0) {
338           numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
339         }
340       }
341 
342       numMaxRegionsPerTable = new int[numTables];
343       for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
344         for (tableIndex = 0 ; tableIndex < numRegionsPerServerPerTable[serverIndex].length; tableIndex++) {
345           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
346             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
347           }
348         }
349       }
350 
351       for (int i = 0; i < regions.length; i ++) {
352         HRegionInfo info = regions[i];
353         if (RegionReplicaUtil.isDefaultReplica(info)) {
354           regionIndexToPrimaryIndex[i] = i;
355         } else {
356           hasRegionReplicas = true;
357           HRegionInfo primaryInfo = RegionReplicaUtil.getRegionInfoForDefaultReplica(info);
358           regionIndexToPrimaryIndex[i] =
359               regionsToIndex.containsKey(primaryInfo) ?
360               regionsToIndex.get(primaryInfo):
361               -1;
362         }
363       }
364 
365       for (int i = 0; i < regionsPerServer.length; i++) {
366         primariesOfRegionsPerServer[i] = new int[regionsPerServer[i].length];
367         for (int j = 0; j < regionsPerServer[i].length; j++) {
368           int primaryIndex = regionIndexToPrimaryIndex[regionsPerServer[i][j]];
369           primariesOfRegionsPerServer[i][j] = primaryIndex;
370         }
371         // sort the regions by primaries.
372         Arrays.sort(primariesOfRegionsPerServer[i]);
373       }
374 
375       // compute regionsPerHost
376       if (multiServersPerHost) {
377         for (int i = 0 ; i < serversPerHost.length; i++) {
378           int numRegionsPerHost = 0;
379           for (int j = 0; j < serversPerHost[i].length; j++) {
380             numRegionsPerHost += regionsPerServer[serversPerHost[i][j]].length;
381           }
382           regionsPerHost[i] = new int[numRegionsPerHost];
383           primariesOfRegionsPerHost[i] = new int[numRegionsPerHost];
384         }
385         for (int i = 0 ; i < serversPerHost.length; i++) {
386           int numRegionPerHostIndex = 0;
387           for (int j = 0; j < serversPerHost[i].length; j++) {
388             for (int k = 0; k < regionsPerServer[serversPerHost[i][j]].length; k++) {
389               int region = regionsPerServer[serversPerHost[i][j]][k];
390               regionsPerHost[i][numRegionPerHostIndex] = region;
391               int primaryIndex = regionIndexToPrimaryIndex[region];
392               primariesOfRegionsPerHost[i][numRegionPerHostIndex] = primaryIndex;
393               numRegionPerHostIndex++;
394             }
395           }
396           // sort the regions by primaries.
397           Arrays.sort(primariesOfRegionsPerHost[i]);
398         }
399       }
400 
401       // compute regionsPerRack
402       if (numRacks > 1) {
403         for (int i = 0 ; i < serversPerRack.length; i++) {
404           int numRegionsPerRack = 0;
405           for (int j = 0; j < serversPerRack[i].length; j++) {
406             numRegionsPerRack += regionsPerServer[serversPerRack[i][j]].length;
407           }
408           regionsPerRack[i] = new int[numRegionsPerRack];
409           primariesOfRegionsPerRack[i] = new int[numRegionsPerRack];
410         }
411 
412         for (int i = 0 ; i < serversPerRack.length; i++) {
413           int numRegionPerRackIndex = 0;
414           for (int j = 0; j < serversPerRack[i].length; j++) {
415             for (int k = 0; k < regionsPerServer[serversPerRack[i][j]].length; k++) {
416               int region = regionsPerServer[serversPerRack[i][j]][k];
417               regionsPerRack[i][numRegionPerRackIndex] = region;
418               int primaryIndex = regionIndexToPrimaryIndex[region];
419               primariesOfRegionsPerRack[i][numRegionPerRackIndex] = primaryIndex;
420               numRegionPerRackIndex++;
421             }
422           }
423           // sort the regions by primaries.
424           Arrays.sort(primariesOfRegionsPerRack[i]);
425         }
426       }
427     }
428 
429     /** Helper for Cluster constructor to handle a region */
430     private void registerRegion(HRegionInfo region, int regionIndex, int serverIndex,
431         Map<String, Deque<RegionLoad>> loads, RegionLocationFinder regionFinder) {
432       String tableName = region.getTable().getNameAsString();
433       if (!tablesToIndex.containsKey(tableName)) {
434         tables.add(tableName);
435         tablesToIndex.put(tableName, tablesToIndex.size());
436       }
437       int tableIndex = tablesToIndex.get(tableName);
438 
439       regionsToIndex.put(region, regionIndex);
440       regions[regionIndex] = region;
441       regionIndexToServerIndex[regionIndex] = serverIndex;
442       initialRegionIndexToServerIndex[regionIndex] = serverIndex;
443       regionIndexToTableIndex[regionIndex] = tableIndex;
444 
445       // region load
446       if (loads != null) {
447         Deque<RegionLoad> rl = loads.get(region.getRegionNameAsString());
448         // That could have failed if the RegionLoad is using the other regionName
449         if (rl == null) {
450           // Try getting the region load using encoded name.
451           rl = loads.get(region.getEncodedName());
452         }
453         regionLoads[regionIndex] = rl;
454       }
455 
456       if (regionFinder != null) {
457         //region location
458         List<ServerName> loc = regionFinder.getTopBlockLocations(region);
459         regionLocations[regionIndex] = new int[loc.size()];
460         for (int i=0; i < loc.size(); i++) {
461           regionLocations[regionIndex][i] =
462               loc.get(i) == null ? -1 :
463                 (serversToIndex.get(loc.get(i).getHostAndPort()) == null ? -1
464                     : serversToIndex.get(loc.get(i).getHostAndPort()));
465         }
466       }
467     }
468 
469     /** An action to move or swap a region */
470     public static class Action {
471       public static enum Type {
472         ASSIGN_REGION,
473         MOVE_REGION,
474         SWAP_REGIONS,
475         NULL,
476       }
477 
478       public Type type;
479       public Action (Type type) {this.type = type;}
480       /** Returns an Action which would undo this action */
481       public Action undoAction() { return this; }
482       @Override
483       public String toString() { return type + ":";}
484     }
485 
486     public static class AssignRegionAction extends Action {
487       public int region;
488       public int server;
489       public AssignRegionAction(int region, int server) {
490         super(Type.ASSIGN_REGION);
491         this.region = region;
492         this.server = server;
493       }
494       @Override
495       public Action undoAction() {
496         // TODO implement this. This action is not being used by the StochasticLB for now
497         // in case it uses it, we should implement this function.
498         throw new NotImplementedException();
499       }
500       @Override
501       public String toString() {
502         return type + ": " + region + ":" + server;
503       }
504     }
505 
506     public static class MoveRegionAction extends Action {
507       public int region;
508       public int fromServer;
509       public int toServer;
510 
511       public MoveRegionAction(int region, int fromServer, int toServer) {
512         super(Type.MOVE_REGION);
513         this.fromServer = fromServer;
514         this.region = region;
515         this.toServer = toServer;
516       }
517       @Override
518       public Action undoAction() {
519         return new MoveRegionAction (region, toServer, fromServer);
520       }
521       @Override
522       public String toString() {
523         return type + ": " + region + ":" + fromServer + " -> " + toServer;
524       }
525     }
526 
527     public static class SwapRegionsAction extends Action {
528       public int fromServer;
529       public int fromRegion;
530       public int toServer;
531       public int toRegion;
532       public SwapRegionsAction(int fromServer, int fromRegion, int toServer, int toRegion) {
533         super(Type.SWAP_REGIONS);
534         this.fromServer = fromServer;
535         this.fromRegion = fromRegion;
536         this.toServer = toServer;
537         this.toRegion = toRegion;
538       }
539       @Override
540       public Action undoAction() {
541         return new SwapRegionsAction (fromServer, toRegion, toServer, fromRegion);
542       }
543       @Override
544       public String toString() {
545         return type + ": " + fromRegion + ":" + fromServer + " <-> " + toRegion + ":" + toServer;
546       }
547     }
548 
549     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NM_FIELD_NAMING_CONVENTION",
550         justification="Mistake. Too disruptive to change now")
551     public static final Action NullAction = new Action(Type.NULL);
552 
553     public void doAction(Action action) {
554       switch (action.type) {
555       case NULL: break;
556       case ASSIGN_REGION:
557         // FindBugs: Having the assert quietens FB BC_UNCONFIRMED_CAST warnings
558         assert action instanceof AssignRegionAction: action.getClass();
559         AssignRegionAction ar = (AssignRegionAction) action;
560         regionsPerServer[ar.server] = addRegion(regionsPerServer[ar.server], ar.region);
561         regionMoved(ar.region, -1, ar.server);
562         break;
563       case MOVE_REGION:
564         assert action instanceof MoveRegionAction: action.getClass();
565         MoveRegionAction mra = (MoveRegionAction) action;
566         regionsPerServer[mra.fromServer] = removeRegion(regionsPerServer[mra.fromServer], mra.region);
567         regionsPerServer[mra.toServer] = addRegion(regionsPerServer[mra.toServer], mra.region);
568         regionMoved(mra.region, mra.fromServer, mra.toServer);
569         break;
570       case SWAP_REGIONS:
571         assert action instanceof SwapRegionsAction: action.getClass();
572         SwapRegionsAction a = (SwapRegionsAction) action;
573         regionsPerServer[a.fromServer] = replaceRegion(regionsPerServer[a.fromServer], a.fromRegion, a.toRegion);
574         regionsPerServer[a.toServer] = replaceRegion(regionsPerServer[a.toServer], a.toRegion, a.fromRegion);
575         regionMoved(a.fromRegion, a.fromServer, a.toServer);
576         regionMoved(a.toRegion, a.toServer, a.fromServer);
577         break;
578       default:
579         throw new RuntimeException("Uknown action:" + action.type);
580       }
581     }
582 
583     /**
584      * Return true if the placement of region on server would lower the availability
585      * of the region in question
586      * @param server
587      * @param region
588      * @return true or false
589      */
590     boolean wouldLowerAvailability(HRegionInfo regionInfo, ServerName serverName) {
591       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
592         return false; // safeguard against race between cluster.servers and servers from LB method args
593       }
594       int server = serversToIndex.get(serverName.getHostAndPort());
595       int region = regionsToIndex.get(regionInfo);
596 
597       int primary = regionIndexToPrimaryIndex[region];
598 
599       // there is a subset relation for server < host < rack
600       // check server first
601 
602       if (contains(primariesOfRegionsPerServer[server], primary)) {
603         // check for whether there are other servers that we can place this region
604         for (int i = 0; i < primariesOfRegionsPerServer.length; i++) {
605           if (i != server && !contains(primariesOfRegionsPerServer[i], primary)) {
606             return true; // meaning there is a better server
607           }
608         }
609         return false; // there is not a better server to place this
610       }
611 
612       // check host
613       if (multiServersPerHost) { // these arrays would only be allocated if we have more than one server per host
614         int host = serverIndexToHostIndex[server];
615         if (contains(primariesOfRegionsPerHost[host], primary)) {
616           // check for whether there are other hosts that we can place this region
617           for (int i = 0; i < primariesOfRegionsPerHost.length; i++) {
618             if (i != host && !contains(primariesOfRegionsPerHost[i], primary)) {
619               return true; // meaning there is a better host
620             }
621           }
622           return false; // there is not a better host to place this
623         }
624       }
625 
626       // check rack
627       if (numRacks > 1) {
628         int rack = serverIndexToRackIndex[server];
629         if (contains(primariesOfRegionsPerRack[rack], primary)) {
630           // check for whether there are other racks that we can place this region
631           for (int i = 0; i < primariesOfRegionsPerRack.length; i++) {
632             if (i != rack && !contains(primariesOfRegionsPerRack[i], primary)) {
633               return true; // meaning there is a better rack
634             }
635           }
636           return false; // there is not a better rack to place this
637         }
638       }
639       return false;
640     }
641 
642     void doAssignRegion(HRegionInfo regionInfo, ServerName serverName) {
643       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
644         return;
645       }
646       int server = serversToIndex.get(serverName.getHostAndPort());
647       int region = regionsToIndex.get(regionInfo);
648       doAction(new AssignRegionAction(region, server));
649     }
650 
651     void regionMoved(int region, int oldServer, int newServer) {
652       regionIndexToServerIndex[region] = newServer;
653       if (initialRegionIndexToServerIndex[region] == newServer) {
654         numMovedRegions--; //region moved back to original location
655       } else if (oldServer >= 0 && initialRegionIndexToServerIndex[region] == oldServer) {
656         numMovedRegions++; //region moved from original location
657       }
658       int tableIndex = regionIndexToTableIndex[region];
659       if (oldServer >= 0) {
660         numRegionsPerServerPerTable[oldServer][tableIndex]--;
661       }
662       numRegionsPerServerPerTable[newServer][tableIndex]++;
663 
664       //check whether this caused maxRegionsPerTable in the new Server to be updated
665       if (numRegionsPerServerPerTable[newServer][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
666         numRegionsPerServerPerTable[newServer][tableIndex] = numMaxRegionsPerTable[tableIndex];
667       } else if (oldServer >= 0 && (numRegionsPerServerPerTable[oldServer][tableIndex] + 1)
668           == numMaxRegionsPerTable[tableIndex]) {
669         //recompute maxRegionsPerTable since the previous value was coming from the old server
670         for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
671           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
672             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
673           }
674         }
675       }
676 
677       // update for servers
678       int primary = regionIndexToPrimaryIndex[region];
679       if (oldServer >= 0) {
680         primariesOfRegionsPerServer[oldServer] = removeRegion(
681           primariesOfRegionsPerServer[oldServer], primary);
682       }
683       primariesOfRegionsPerServer[newServer] = addRegionSorted(
684         primariesOfRegionsPerServer[newServer], primary);
685 
686       // update for hosts
687       if (multiServersPerHost) {
688         int oldHost = oldServer >= 0 ? serverIndexToHostIndex[oldServer] : -1;
689         int newHost = serverIndexToHostIndex[newServer];
690         if (newHost != oldHost) {
691           regionsPerHost[newHost] = addRegion(regionsPerHost[newHost], region);
692           primariesOfRegionsPerHost[newHost] = addRegionSorted(primariesOfRegionsPerHost[newHost], primary);
693           if (oldHost >= 0) {
694             regionsPerHost[oldHost] = removeRegion(regionsPerHost[oldHost], region);
695             primariesOfRegionsPerHost[oldHost] = removeRegion(
696               primariesOfRegionsPerHost[oldHost], primary); // will still be sorted
697           }
698         }
699       }
700 
701       // update for racks
702       if (numRacks > 1) {
703         int oldRack = oldServer >= 0 ? serverIndexToRackIndex[oldServer] : -1;
704         int newRack = serverIndexToRackIndex[newServer];
705         if (newRack != oldRack) {
706           regionsPerRack[newRack] = addRegion(regionsPerRack[newRack], region);
707           primariesOfRegionsPerRack[newRack] = addRegionSorted(primariesOfRegionsPerRack[newRack], primary);
708           if (oldRack >= 0) {
709             regionsPerRack[oldRack] = removeRegion(regionsPerRack[oldRack], region);
710             primariesOfRegionsPerRack[oldRack] = removeRegion(
711               primariesOfRegionsPerRack[oldRack], primary); // will still be sorted
712           }
713         }
714       }
715     }
716 
717     int[] removeRegion(int[] regions, int regionIndex) {
718       //TODO: this maybe costly. Consider using linked lists
719       int[] newRegions = new int[regions.length - 1];
720       int i = 0;
721       for (i = 0; i < regions.length; i++) {
722         if (regions[i] == regionIndex) {
723           break;
724         }
725         newRegions[i] = regions[i];
726       }
727       System.arraycopy(regions, i+1, newRegions, i, newRegions.length - i);
728       return newRegions;
729     }
730 
731     int[] addRegion(int[] regions, int regionIndex) {
732       int[] newRegions = new int[regions.length + 1];
733       System.arraycopy(regions, 0, newRegions, 0, regions.length);
734       newRegions[newRegions.length - 1] = regionIndex;
735       return newRegions;
736     }
737 
738     int[] addRegionSorted(int[] regions, int regionIndex) {
739       int[] newRegions = new int[regions.length + 1];
740       int i = 0;
741       for (i = 0; i < regions.length; i++) { // find the index to insert
742         if (regions[i] > regionIndex) {
743           break;
744         }
745       }
746       System.arraycopy(regions, 0, newRegions, 0, i); // copy first half
747       System.arraycopy(regions, i, newRegions, i+1, regions.length - i); // copy second half
748       newRegions[i] = regionIndex;
749 
750       return newRegions;
751     }
752 
753     int[] replaceRegion(int[] regions, int regionIndex, int newRegionIndex) {
754       int i = 0;
755       for (i = 0; i < regions.length; i++) {
756         if (regions[i] == regionIndex) {
757           regions[i] = newRegionIndex;
758           break;
759         }
760       }
761       return regions;
762     }
763 
764     void sortServersByRegionCount() {
765       Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
766     }
767 
768     int getNumRegions(int server) {
769       return regionsPerServer[server].length;
770     }
771 
772     boolean contains(int[] arr, int val) {
773       return Arrays.binarySearch(arr, val) >= 0;
774     }
775 
776     private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
777       @Override
778       public int compare(Integer integer, Integer integer2) {
779         return Integer.valueOf(getNumRegions(integer)).compareTo(getNumRegions(integer2));
780       }
781     };
782 
783     void sortServersByLocality() {
784       Arrays.sort(serverIndicesSortedByLocality, localityComparator);
785     }
786 
787     float getLocality(int server) {
788       return localityPerServer[server];
789     }
790 
791     private Comparator<Integer> localityComparator = new Comparator<Integer>() {
792       @Override
793       public int compare(Integer integer, Integer integer2) {
794         float locality1 = getLocality(integer);
795         float locality2 = getLocality(integer2);
796         if (locality1 < locality2) {
797           return -1;
798         } else if (locality1 > locality2) {
799           return 1;
800         } else {
801           return 0;
802         }
803       }
804     };
805 
806     int getLowestLocalityRegionServer() {
807       if (regionFinder == null) {
808         return -1;
809       } else {
810         sortServersByLocality();
811         // We want to find server with non zero regions having lowest locality.
812         int i = 0;
813         int lowestLocalityServerIndex = serverIndicesSortedByLocality[i];
814         while (localityPerServer[lowestLocalityServerIndex] == 0
815             && (regionsPerServer[lowestLocalityServerIndex].length == 0)) {
816           i++;
817           lowestLocalityServerIndex = serverIndicesSortedByLocality[i];
818         }
819         if (LOG.isTraceEnabled()) {
820           LOG.trace("Lowest locality region server with non zero regions is "
821             + servers[lowestLocalityServerIndex].getHostname() + " with locality "
822             + localityPerServer[lowestLocalityServerIndex]);
823         }
824         return lowestLocalityServerIndex;
825       }
826     }
827 
828     int getLowestLocalityRegionOnServer(int serverIndex) {
829       if (regionFinder != null) {
830         float lowestLocality = 1.0f;
831         int lowestLocalityRegionIndex = -1;
832         if (regionsPerServer[serverIndex].length == 0) {
833           // No regions on that region server
834           return -1;
835         }
836         for (int j = 0; j < regionsPerServer[serverIndex].length; j++) {
837           int regionIndex = regionsPerServer[serverIndex][j];
838           HDFSBlocksDistribution distribution = regionFinder
839               .getBlockDistribution(regions[regionIndex]);
840           float locality = distribution.getBlockLocalityIndex(servers[serverIndex].getHostname());
841           // skip empty region
842           if (distribution.getUniqueBlocksTotalWeight() == 0) {
843             continue;
844           }
845           if (locality < lowestLocality) {
846             lowestLocality = locality;
847             lowestLocalityRegionIndex = j;
848           }
849         }
850         if (lowestLocalityRegionIndex == -1) {
851           return -1;
852         }
853         if (LOG.isTraceEnabled()) {
854           LOG.trace("Lowest locality region is "
855               + regions[regionsPerServer[serverIndex][lowestLocalityRegionIndex]]
856                   .getRegionNameAsString() + " with locality " + lowestLocality
857               + " and its region server contains " + regionsPerServer[serverIndex].length
858               + " regions");
859         }
860         return regionsPerServer[serverIndex][lowestLocalityRegionIndex];
861       } else {
862         return -1;
863       }
864     }
865 
866     float getLocalityOfRegion(int region, int server) {
867       if (regionFinder != null) {
868         HDFSBlocksDistribution distribution = regionFinder.getBlockDistribution(regions[region]);
869         return distribution.getBlockLocalityIndex(servers[server].getHostname());
870       } else {
871         return 0f;
872       }
873     }
874 
875     /**
876      * Returns a least loaded server which has better locality for this region
877      * than the current server.
878      */
879     int getLeastLoadedTopServerForRegion(int region, int currentServer) {
880       if (regionFinder != null) {
881         List<ServerName> topLocalServers = regionFinder.getTopBlockLocations(regions[region],
882           servers[currentServer].getHostname());
883         int leastLoadedServerIndex = -1;
884         int load = Integer.MAX_VALUE;
885         for (ServerName sn : topLocalServers) {
886           if (!serversToIndex.containsKey(sn.getHostAndPort())) {
887             continue;
888           }
889           int index = serversToIndex.get(sn.getHostAndPort());
890           if (regionsPerServer[index] == null) {
891             continue;
892           }
893           int tempLoad = regionsPerServer[index].length;
894           if (tempLoad <= load) {
895             leastLoadedServerIndex = index;
896             load = tempLoad;
897           }
898         }
899         if (leastLoadedServerIndex != -1) {
900           LOG.debug("Pick the least loaded server " + servers[leastLoadedServerIndex].getHostname()
901             + " with better locality for region " + regions[region]);
902         }
903         return leastLoadedServerIndex;
904       } else {
905         return -1;
906       }
907     }
908 
909     void calculateRegionServerLocalities() {
910       if (regionFinder == null) {
911         LOG.warn("Region location finder found null, skipping locality calculations.");
912         return;
913       }
914       for (int i = 0; i < regionsPerServer.length; i++) {
915         HDFSBlocksDistribution distribution = new HDFSBlocksDistribution();
916         if (regionsPerServer[i].length > 0) {
917           for (int j = 0; j < regionsPerServer[i].length; j++) {
918             int regionIndex = regionsPerServer[i][j];
919             distribution.add(regionFinder.getBlockDistribution(regions[regionIndex]));
920           }
921         } else {
922           LOG.debug("Server " + servers[i].getHostname() + " had 0 regions.");
923         }
924         localityPerServer[i] = distribution.getBlockLocalityIndex(servers[i].getHostname());
925       }
926     }
927 
928     @VisibleForTesting
929     protected void setNumRegions(int numRegions) {
930       this.numRegions = numRegions;
931     }
932 
933     @VisibleForTesting
934     protected void setNumMovedRegions(int numMovedRegions) {
935       this.numMovedRegions = numMovedRegions;
936     }
937 
938     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SBSC_USE_STRINGBUFFER_CONCATENATION",
939         justification="Not important but should be fixed")
940     @Override
941     public String toString() {
942       String desc = "Cluster{" +
943           "servers=[";
944           for(ServerName sn:servers) {
945              desc += sn.getHostAndPort() + ", ";
946           }
947           desc +=
948           ", serverIndicesSortedByRegionCount="+
949           Arrays.toString(serverIndicesSortedByRegionCount) +
950           ", regionsPerServer=[";
951 
952           for (int[]r:regionsPerServer) {
953             desc += Arrays.toString(r);
954           }
955           desc += "]" +
956           ", numMaxRegionsPerTable=" +
957           Arrays.toString(numMaxRegionsPerTable) +
958           ", numRegions=" +
959           numRegions +
960           ", numServers=" +
961           numServers +
962           ", numTables=" +
963           numTables +
964           ", numMovedRegions=" +
965           numMovedRegions +
966           '}';
967       return desc;
968     }
969   }
970 
971   // slop for regions
972   protected float slop;
973   protected Configuration config;
974   protected RackManager rackManager;
975   private static final Random RANDOM = new Random(System.currentTimeMillis());
976   private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
977 
978   // Regions of these tables are put on the master by default.
979   private static final String[] DEFAULT_TABLES_ON_MASTER =
980     new String[] {AccessControlLists.ACL_TABLE_NAME.getNameAsString(),
981       TableName.NAMESPACE_TABLE_NAME.getNameAsString(),
982       TableName.META_TABLE_NAME.getNameAsString()};
983 
984   public static final String TABLES_ON_MASTER =
985     "hbase.balancer.tablesOnMaster";
986 
987   protected final Set<String> tablesOnMaster = new HashSet<String>();
988   protected MetricsBalancer metricsBalancer = null;
989   protected ClusterStatus clusterStatus = null;
990   protected ServerName masterServerName;
991   protected MasterServices services;
992 
993   /**
994    * By default, regions of some small system tables such as meta,
995    * namespace, and acl are assigned to the active master. If you don't
996    * want to assign any region to the active master, you need to
997    * configure "hbase.balancer.tablesOnMaster" to "none".
998    */
999   protected static String[] getTablesOnMaster(Configuration conf) {
1000     String valueString = conf.get(TABLES_ON_MASTER);
1001     if (valueString == null) {
1002       return DEFAULT_TABLES_ON_MASTER;
1003     }
1004     valueString = valueString.trim();
1005     if (valueString.equalsIgnoreCase("none")) {
1006       return null;
1007     }
1008     return StringUtils.getStrings(valueString);
1009   }
1010 
1011   /**
1012    * Check if configured to put any tables on the active master
1013    */
1014   public static boolean tablesOnMaster(Configuration conf) {
1015     String[] tables = getTablesOnMaster(conf);
1016     return tables != null && tables.length > 0;
1017   }
1018 
1019   @Override
1020   public void setConf(Configuration conf) {
1021     setSlop(conf);
1022     if (slop < 0) slop = 0;
1023     else if (slop > 1) slop = 1;
1024 
1025     this.config = conf;
1026     String[] tables = getTablesOnMaster(conf);
1027     if (tables != null && tables.length > 0) {
1028       Collections.addAll(tablesOnMaster, tables);
1029     }
1030     this.rackManager = new RackManager(getConf());
1031     regionFinder.setConf(conf);
1032   }
1033 
1034   protected void setSlop(Configuration conf) {
1035     this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
1036   }
1037 
1038   /**
1039    * Check if a region belongs to some small system table.
1040    * If so, the primary replica may be expected to be put on the master regionserver.
1041    */
1042   public boolean shouldBeOnMaster(HRegionInfo region) {
1043     return tablesOnMaster.contains(region.getTable().getNameAsString())
1044         && region.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID;
1045   }
1046 
1047   /**
1048    * Balance the regions that should be on master regionserver.
1049    */
1050   protected List<RegionPlan> balanceMasterRegions(
1051       Map<ServerName, List<HRegionInfo>> clusterMap) {
1052     if (masterServerName == null
1053         || clusterMap == null || clusterMap.size() <= 1) return null;
1054     List<RegionPlan> plans = null;
1055     List<HRegionInfo> regions = clusterMap.get(masterServerName);
1056     if (regions != null) {
1057       Iterator<ServerName> keyIt = null;
1058       for (HRegionInfo region: regions) {
1059         if (shouldBeOnMaster(region)) continue;
1060 
1061         // Find a non-master regionserver to host the region
1062         if (keyIt == null || !keyIt.hasNext()) {
1063           keyIt = clusterMap.keySet().iterator();
1064         }
1065         ServerName dest = keyIt.next();
1066         if (masterServerName.equals(dest)) {
1067           if (!keyIt.hasNext()) {
1068             keyIt = clusterMap.keySet().iterator();
1069           }
1070           dest = keyIt.next();
1071         }
1072 
1073         // Move this region away from the master regionserver
1074         RegionPlan plan = new RegionPlan(region, masterServerName, dest);
1075         if (plans == null) {
1076           plans = new ArrayList<RegionPlan>();
1077         }
1078         plans.add(plan);
1079       }
1080     }
1081     for (Map.Entry<ServerName, List<HRegionInfo>> server: clusterMap.entrySet()) {
1082       if (masterServerName.equals(server.getKey())) continue;
1083       for (HRegionInfo region: server.getValue()) {
1084         if (!shouldBeOnMaster(region)) continue;
1085 
1086         // Move this region to the master regionserver
1087         RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName);
1088         if (plans == null) {
1089           plans = new ArrayList<RegionPlan>();
1090         }
1091         plans.add(plan);
1092       }
1093     }
1094     return plans;
1095   }
1096 
1097   /**
1098    * Assign the regions that should be on master regionserver.
1099    */
1100   protected Map<ServerName, List<HRegionInfo>> assignMasterRegions(
1101       Collection<HRegionInfo> regions, List<ServerName> servers) {
1102     if (servers == null || regions == null || regions.isEmpty()) {
1103       return null;
1104     }
1105     Map<ServerName, List<HRegionInfo>> assignments
1106       = new TreeMap<ServerName, List<HRegionInfo>>();
1107     if (masterServerName != null && servers.contains(masterServerName)) {
1108       assignments.put(masterServerName, new ArrayList<HRegionInfo>());
1109       for (HRegionInfo region: regions) {
1110         if (shouldBeOnMaster(region)) {
1111           assignments.get(masterServerName).add(region);
1112         }
1113       }
1114     }
1115     return assignments;
1116   }
1117 
1118   @Override
1119   public Configuration getConf() {
1120     return this.config;
1121   }
1122 
1123   @Override
1124   public synchronized void setClusterStatus(ClusterStatus st) {
1125     this.clusterStatus = st;
1126     regionFinder.setClusterStatus(st);
1127   }
1128 
1129   @Override
1130   public void setMasterServices(MasterServices masterServices) {
1131     masterServerName = masterServices.getServerName();
1132     this.services = masterServices;
1133     this.regionFinder.setServices(masterServices);
1134   }
1135 
1136   public void setRackManager(RackManager rackManager) {
1137     this.rackManager = rackManager;
1138   }
1139 
1140   protected boolean needsBalance(Cluster c) {
1141     ClusterLoadState cs = new ClusterLoadState(c.clusterState);
1142     if (cs.getNumServers() < MIN_SERVER_BALANCE) {
1143       if (LOG.isDebugEnabled()) {
1144         LOG.debug("Not running balancer because only " + cs.getNumServers()
1145             + " active regionserver(s)");
1146       }
1147       return false;
1148     }
1149     if(areSomeRegionReplicasColocated(c)) return true;
1150     // Check if we even need to do any load balancing
1151     // HBASE-3681 check sloppiness first
1152     float average = cs.getLoadAverage(); // for logging
1153     int floor = (int) Math.floor(average * (1 - slop));
1154     int ceiling = (int) Math.ceil(average * (1 + slop));
1155     if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
1156       NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
1157       if (LOG.isTraceEnabled()) {
1158         // If nothing to balance, then don't say anything unless trace-level logging.
1159         LOG.trace("Skipping load balancing because balanced cluster; " +
1160           "servers=" + cs.getNumServers() +
1161           " regions=" + cs.getNumRegions() + " average=" + average +
1162           " mostloaded=" + serversByLoad.lastKey().getLoad() +
1163           " leastloaded=" + serversByLoad.firstKey().getLoad());
1164       }
1165       return false;
1166     }
1167     return true;
1168   }
1169 
1170   /**
1171    * Subclasses should implement this to return true if the cluster has nodes that hosts
1172    * multiple replicas for the same region, or, if there are multiple racks and the same
1173    * rack hosts replicas of the same region
1174    * @param c Cluster information
1175    * @return whether region replicas are currently co-located
1176    */
1177   protected boolean areSomeRegionReplicasColocated(Cluster c) {
1178     return false;
1179   }
1180 
1181   /**
1182    * Generates a bulk assignment plan to be used on cluster startup using a
1183    * simple round-robin assignment.
1184    * <p>
1185    * Takes a list of all the regions and all the servers in the cluster and
1186    * returns a map of each server to the regions that it should be assigned.
1187    * <p>
1188    * Currently implemented as a round-robin assignment. Same invariant as load
1189    * balancing, all servers holding floor(avg) or ceiling(avg).
1190    *
1191    * TODO: Use block locations from HDFS to place regions with their blocks
1192    *
1193    * @param regions all regions
1194    * @param servers all servers
1195    * @return map of server to the regions it should take, or null if no
1196    *         assignment is possible (ie. no regions or no servers)
1197    */
1198   @Override
1199   public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
1200       List<ServerName> servers) {
1201     metricsBalancer.incrMiscInvocations();
1202     Map<ServerName, List<HRegionInfo>> assignments = assignMasterRegions(regions, servers);
1203     if (assignments != null && !assignments.isEmpty()) {
1204       servers = new ArrayList<ServerName>(servers);
1205       // Guarantee not to put other regions on master
1206       servers.remove(masterServerName);
1207       List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1208       if (!masterRegions.isEmpty()) {
1209         regions = new ArrayList<HRegionInfo>(regions);
1210         for (HRegionInfo region: masterRegions) {
1211           regions.remove(region);
1212         }
1213       }
1214     }
1215     if (regions == null || regions.isEmpty()) {
1216       return assignments;
1217     }
1218 
1219     int numServers = servers == null ? 0 : servers.size();
1220     if (numServers == 0) {
1221       LOG.warn("Wanted to do round robin assignment but no servers to assign to");
1222       return null;
1223     }
1224 
1225     // TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the
1226     // normal LB.balancerCluster() with unassignedRegions. We only need to have a candidate
1227     // generator for AssignRegionAction. The LB will ensure the regions are mostly local
1228     // and balanced. This should also run fast with fewer number of iterations.
1229 
1230     if (numServers == 1) { // Only one server, nothing fancy we can do here
1231       ServerName server = servers.get(0);
1232       assignments.put(server, new ArrayList<HRegionInfo>(regions));
1233       return assignments;
1234     }
1235 
1236     Cluster cluster = createCluster(servers, regions);
1237     List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>();
1238 
1239     roundRobinAssignment(cluster, regions, unassignedRegions,
1240       servers, assignments);
1241 
1242     List<HRegionInfo> lastFewRegions = new ArrayList<HRegionInfo>();
1243     // assign the remaining by going through the list and try to assign to servers one-by-one
1244     int serverIdx = RANDOM.nextInt(numServers);
1245     for (HRegionInfo region : unassignedRegions) {
1246       boolean assigned = false;
1247       for (int j = 0; j < numServers; j++) { // try all servers one by one
1248         ServerName serverName = servers.get((j + serverIdx) % numServers);
1249         if (!cluster.wouldLowerAvailability(region, serverName)) {
1250           List<HRegionInfo> serverRegions = assignments.get(serverName);
1251           if (serverRegions == null) {
1252             serverRegions = new ArrayList<HRegionInfo>();
1253             assignments.put(serverName, serverRegions);
1254           }
1255           serverRegions.add(region);
1256           cluster.doAssignRegion(region, serverName);
1257           serverIdx = (j + serverIdx + 1) % numServers; //remain from next server
1258           assigned = true;
1259           break;
1260         }
1261       }
1262       if (!assigned) {
1263         lastFewRegions.add(region);
1264       }
1265     }
1266     // just sprinkle the rest of the regions on random regionservers. The balanceCluster will
1267     // make it optimal later. we can end up with this if numReplicas > numServers.
1268     for (HRegionInfo region : lastFewRegions) {
1269       int i = RANDOM.nextInt(numServers);
1270       ServerName server = servers.get(i);
1271       List<HRegionInfo> serverRegions = assignments.get(server);
1272       if (serverRegions == null) {
1273         serverRegions = new ArrayList<HRegionInfo>();
1274         assignments.put(server, serverRegions);
1275       }
1276       serverRegions.add(region);
1277       cluster.doAssignRegion(region, server);
1278     }
1279     return assignments;
1280   }
1281 
1282   protected Cluster createCluster(List<ServerName> servers,
1283       Collection<HRegionInfo> regions) {
1284     // Get the snapshot of the current assignments for the regions in question, and then create
1285     // a cluster out of it. Note that we might have replicas already assigned to some servers
1286     // earlier. So we want to get the snapshot to see those assignments, but this will only contain
1287     // replicas of the regions that are passed (for performance).
1288     Map<ServerName, List<HRegionInfo>> clusterState = getRegionAssignmentsByServer(regions);
1289 
1290     for (ServerName server : servers) {
1291       if (!clusterState.containsKey(server)) {
1292         clusterState.put(server, EMPTY_REGION_LIST);
1293       }
1294     }
1295     return new Cluster(regions, clusterState, null, this.regionFinder,
1296       rackManager);
1297   }
1298 
1299   /**
1300    * Used to assign a single region to a random server.
1301    */
1302   @Override
1303   public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
1304     metricsBalancer.incrMiscInvocations();
1305     if (servers != null && servers.contains(masterServerName)) {
1306       if (shouldBeOnMaster(regionInfo)) {
1307         return masterServerName;
1308       }
1309       servers = new ArrayList<ServerName>(servers);
1310       // Guarantee not to put other regions on master
1311       servers.remove(masterServerName);
1312     }
1313 
1314     int numServers = servers == null ? 0 : servers.size();
1315     if (numServers == 0) {
1316       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1317       return null;
1318     }
1319     if (numServers == 1) { // Only one server, nothing fancy we can do here
1320       return servers.get(0);
1321     }
1322 
1323     List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
1324     Cluster cluster = createCluster(servers, regions);
1325     return randomAssignment(cluster, regionInfo, servers);
1326   }
1327 
1328   /**
1329    * Generates a bulk assignment startup plan, attempting to reuse the existing
1330    * assignment information from META, but adjusting for the specified list of
1331    * available/online servers available for assignment.
1332    * <p>
1333    * Takes a map of all regions to their existing assignment from META. Also
1334    * takes a list of online servers for regions to be assigned to. Attempts to
1335    * retain all assignment, so in some instances initial assignment will not be
1336    * completely balanced.
1337    * <p>
1338    * Any leftover regions without an existing server to be assigned to will be
1339    * assigned randomly to available servers.
1340    *
1341    * @param regions regions and existing assignment from meta
1342    * @param servers available servers
1343    * @return map of servers and regions to be assigned to them
1344    */
1345   @Override
1346   public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
1347       List<ServerName> servers) {
1348     // Update metrics
1349     metricsBalancer.incrMiscInvocations();
1350     Map<ServerName, List<HRegionInfo>> assignments
1351       = assignMasterRegions(regions.keySet(), servers);
1352     if (assignments != null && !assignments.isEmpty()) {
1353       servers = new ArrayList<ServerName>(servers);
1354       // Guarantee not to put other regions on master
1355       servers.remove(masterServerName);
1356       List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1357       if (!masterRegions.isEmpty()) {
1358         regions = new HashMap<HRegionInfo, ServerName>(regions);
1359         for (HRegionInfo region: masterRegions) {
1360           regions.remove(region);
1361         }
1362       }
1363     }
1364     if (regions == null || regions.isEmpty()) {
1365       return assignments;
1366     }
1367 
1368     int numServers = servers == null ? 0 : servers.size();
1369     if (numServers == 0) {
1370       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1371       return null;
1372     }
1373     if (numServers == 1) { // Only one server, nothing fancy we can do here
1374       ServerName server = servers.get(0);
1375       assignments.put(server, new ArrayList<HRegionInfo>(regions.keySet()));
1376       return assignments;
1377     }
1378 
1379     // Group all of the old assignments by their hostname.
1380     // We can't group directly by ServerName since the servers all have
1381     // new start-codes.
1382 
1383     // Group the servers by their hostname. It's possible we have multiple
1384     // servers on the same host on different ports.
1385     ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
1386     for (ServerName server : servers) {
1387       assignments.put(server, new ArrayList<HRegionInfo>());
1388       serversByHostname.put(server.getHostname(), server);
1389     }
1390 
1391     // Collection of the hostnames that used to have regions
1392     // assigned, but for which we no longer have any RS running
1393     // after the cluster restart.
1394     Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
1395 
1396     int numRandomAssignments = 0;
1397     int numRetainedAssigments = 0;
1398 
1399     Cluster cluster = createCluster(servers, regions.keySet());
1400 
1401     for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
1402       HRegionInfo region = entry.getKey();
1403       ServerName oldServerName = entry.getValue();
1404       List<ServerName> localServers = new ArrayList<ServerName>();
1405       if (oldServerName != null) {
1406         localServers = serversByHostname.get(oldServerName.getHostname());
1407       }
1408       if (localServers.isEmpty()) {
1409         // No servers on the new cluster match up with this hostname,
1410         // assign randomly.
1411         ServerName randomServer = randomAssignment(cluster, region, servers);
1412         assignments.get(randomServer).add(region);
1413         numRandomAssignments++;
1414         if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
1415       } else if (localServers.size() == 1) {
1416         // the usual case - one new server on same host
1417         ServerName target = localServers.get(0);
1418         assignments.get(target).add(region);
1419         cluster.doAssignRegion(region, target);
1420         numRetainedAssigments++;
1421       } else {
1422         // multiple new servers in the cluster on this same host
1423         if (localServers.contains(oldServerName)) {
1424           assignments.get(oldServerName).add(region);
1425           cluster.doAssignRegion(region, oldServerName);
1426         } else {
1427           ServerName target = null;
1428           for (ServerName tmp: localServers) {
1429             if (tmp.getPort() == oldServerName.getPort()) {
1430               target = tmp;
1431               break;
1432             }
1433           }
1434           if (target == null) {
1435             target = randomAssignment(cluster, region, localServers);
1436           }
1437           assignments.get(target).add(region);
1438         }
1439         numRetainedAssigments++;
1440       }
1441     }
1442 
1443     String randomAssignMsg = "";
1444     if (numRandomAssignments > 0) {
1445       randomAssignMsg =
1446           numRandomAssignments + " regions were assigned "
1447               + "to random hosts, since the old hosts for these regions are no "
1448               + "longer present in the cluster. These hosts were:\n  "
1449               + Joiner.on("\n  ").join(oldHostsNoLongerPresent);
1450     }
1451 
1452     LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
1453         + " retained the pre-restart assignment. " + randomAssignMsg);
1454     return assignments;
1455   }
1456 
1457   @Override
1458   public void initialize() throws HBaseIOException{
1459   }
1460 
1461   @Override
1462   public void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1463   }
1464 
1465   @Override
1466   public void regionOffline(HRegionInfo regionInfo) {
1467   }
1468 
1469   @Override
1470   public boolean isStopped() {
1471     return stopped;
1472   }
1473 
1474   @Override
1475   public void stop(String why) {
1476     LOG.info("Load Balancer stop requested: "+why);
1477     stopped = true;
1478   }
1479 
1480   /**
1481    * Used to assign a single region to a random server.
1482    */
1483   private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo,
1484       List<ServerName> servers) {
1485     int numServers = servers.size(); // servers is not null, numServers > 1
1486     ServerName sn = null;
1487     final int maxIterations = numServers * 4;
1488     int iterations = 0;
1489 
1490     do {
1491       int i = RANDOM.nextInt(numServers);
1492       sn = servers.get(i);
1493     } while (cluster.wouldLowerAvailability(regionInfo, sn)
1494         && iterations++ < maxIterations);
1495     cluster.doAssignRegion(regionInfo, sn);
1496     return sn;
1497   }
1498 
1499   /**
1500    * Round robin a list of regions to a list of servers
1501    */
1502   private void roundRobinAssignment(Cluster cluster, List<HRegionInfo> regions,
1503       List<HRegionInfo> unassignedRegions, List<ServerName> servers,
1504       Map<ServerName, List<HRegionInfo>> assignments) {
1505 
1506     int numServers = servers.size();
1507     int numRegions = regions.size();
1508     int max = (int) Math.ceil((float) numRegions / numServers);
1509     int serverIdx = 0;
1510     if (numServers > 1) {
1511       serverIdx = RANDOM.nextInt(numServers);
1512     }
1513     int regionIdx = 0;
1514 
1515     for (int j = 0; j < numServers; j++) {
1516       ServerName server = servers.get((j + serverIdx) % numServers);
1517       List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
1518       for (int i = regionIdx; i < numRegions; i += numServers) {
1519         HRegionInfo region = regions.get(i % numRegions);
1520         if (cluster.wouldLowerAvailability(region, server)) {
1521           unassignedRegions.add(region);
1522         } else {
1523           serverRegions.add(region);
1524           cluster.doAssignRegion(region, server);
1525         }
1526       }
1527       assignments.put(server, serverRegions);
1528       regionIdx++;
1529     }
1530   }
1531 
1532   protected Map<ServerName, List<HRegionInfo>> getRegionAssignmentsByServer(
1533     Collection<HRegionInfo> regions) {
1534     if (this.services != null && this.services.getAssignmentManager() != null) {
1535       return this.services.getAssignmentManager().getSnapShotOfAssignment(regions);
1536     } else {
1537       return new HashMap<ServerName, List<HRegionInfo>>();
1538     }
1539   }
1540 
1541   @Override
1542   public void onConfigurationChange(Configuration conf) {
1543   }
1544 }