View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Collection;
23  import java.util.Collections;
24  import java.util.Comparator;
25  import java.util.Deque;
26  import java.util.HashMap;
27  import java.util.HashSet;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.Map.Entry;
32  import java.util.NavigableMap;
33  import java.util.Random;
34  import java.util.Set;
35  import java.util.TreeMap;
36  
37  import org.apache.commons.lang.NotImplementedException;
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.conf.Configuration;
41  import org.apache.hadoop.hbase.ClusterStatus;
42  import org.apache.hadoop.hbase.HBaseIOException;
43  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
44  import org.apache.hadoop.hbase.HRegionInfo;
45  import org.apache.hadoop.hbase.RegionLoad;
46  import org.apache.hadoop.hbase.ServerName;
47  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
48  import org.apache.hadoop.hbase.conf.ConfigurationObserver;
49  import org.apache.hadoop.hbase.master.LoadBalancer;
50  import org.apache.hadoop.hbase.master.MasterServices;
51  import org.apache.hadoop.hbase.master.RackManager;
52  import org.apache.hadoop.hbase.master.RegionPlan;
53  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
54  import org.apache.hadoop.util.StringUtils;
55  
56  import com.google.common.base.Joiner;
57  import com.google.common.collect.ArrayListMultimap;
58  import com.google.common.collect.Lists;
59  import com.google.common.collect.Sets;
60  
61  /**
62   * The base class for load balancers. It provides the the functions used to by
63   * {@link org.apache.hadoop.hbase.master.AssignmentManager} to assign regions
64   * in the edge cases. It doesn't provide an implementation of the actual
65   * balancing algorithm.
66   *
67   */
68  public abstract class BaseLoadBalancer implements LoadBalancer {
69    private static final int MIN_SERVER_BALANCE = 2;
70    private volatile boolean stopped = false;
71  
72    private static final List<HRegionInfo> EMPTY_REGION_LIST = new ArrayList<HRegionInfo>(0);
73  
74    protected final RegionLocationFinder regionFinder = new RegionLocationFinder();
75  
76    private static class DefaultRackManager extends RackManager {
77      @Override
78      public String getRack(ServerName server) {
79        return UNKNOWN_RACK;
80      }
81    }
82  
83    /**
84     * An efficient array based implementation similar to ClusterState for keeping
85     * the status of the cluster in terms of region assignment and distribution.
86     * LoadBalancers, such as StochasticLoadBalancer uses this Cluster object because of
87     * hundreds of thousands of hashmap manipulations are very costly, which is why this
88     * class uses mostly indexes and arrays.
89     *
90     * Cluster tracks a list of unassigned regions, region assignments, and the server
91     * topology in terms of server names, hostnames and racks.
92     */
93    protected static class Cluster {
94      ServerName[] servers;
95      String[] hosts; // ServerName uniquely identifies a region server. multiple RS can run on the same host
96      String[] racks;
97      boolean multiServersPerHost = false; // whether or not any host has more than one server
98  
99      ArrayList<String> tables;
100     HRegionInfo[] regions;
101     Deque<RegionLoad>[] regionLoads;
102     private RegionLocationFinder regionFinder;
103 
104     int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality
105 
106     int[]   serverIndexToHostIndex;      //serverIndex -> host index
107     int[]   serverIndexToRackIndex;      //serverIndex -> rack index
108 
109     int[][] regionsPerServer;            //serverIndex -> region list
110     int[][] regionsPerHost;              //hostIndex -> list of regions
111     int[][] regionsPerRack;              //rackIndex -> region list
112     int[][] primariesOfRegionsPerServer; //serverIndex -> sorted list of regions by primary region index
113     int[][] primariesOfRegionsPerHost;   //hostIndex -> sorted list of regions by primary region index
114     int[][] primariesOfRegionsPerRack;   //rackIndex -> sorted list of regions by primary region index
115 
116     int[][] serversPerHost;              //hostIndex -> list of server indexes
117     int[][] serversPerRack;              //rackIndex -> list of server indexes
118     int[]   regionIndexToServerIndex;    //regionIndex -> serverIndex
119     int[]   initialRegionIndexToServerIndex;    //regionIndex -> serverIndex (initial cluster state)
120     int[]   regionIndexToTableIndex;     //regionIndex -> tableIndex
121     int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions
122     int[]   numMaxRegionsPerTable;       //tableIndex -> max number of regions in a single RS
123     int[]   regionIndexToPrimaryIndex;   //regionIndex -> regionIndex of the primary
124     boolean hasRegionReplicas = false;   //whether there is regions with replicas
125 
126     Integer[] serverIndicesSortedByRegionCount;
127     Integer[] serverIndicesSortedByLocality;
128 
129     Map<String, Integer> serversToIndex;
130     Map<String, Integer> hostsToIndex;
131     Map<String, Integer> racksToIndex;
132     Map<String, Integer> tablesToIndex;
133     Map<HRegionInfo, Integer> regionsToIndex;
134     float[] localityPerServer;
135 
136     int numServers;
137     int numHosts;
138     int numRacks;
139     int numTables;
140     int numRegions;
141 
142     int numMovedRegions = 0; //num moved regions from the initial configuration
143     Map<ServerName, List<HRegionInfo>> clusterState;
144 
145     protected final RackManager rackManager;
146 
147     protected Cluster(
148         Map<ServerName, List<HRegionInfo>> clusterState,
149         Map<String, Deque<RegionLoad>> loads,
150         RegionLocationFinder regionFinder,
151         RackManager rackManager) {
152       this(null, clusterState, loads, regionFinder,
153         rackManager);
154     }
155 
156     @SuppressWarnings("unchecked")
157     protected Cluster(
158         Collection<HRegionInfo> unassignedRegions,
159         Map<ServerName, List<HRegionInfo>> clusterState,
160         Map<String, Deque<RegionLoad>> loads,
161         RegionLocationFinder regionFinder,
162         RackManager rackManager) {
163 
164       if (unassignedRegions == null) {
165         unassignedRegions = EMPTY_REGION_LIST;
166       }
167 
168       serversToIndex = new HashMap<String, Integer>();
169       hostsToIndex = new HashMap<String, Integer>();
170       racksToIndex = new HashMap<String, Integer>();
171       tablesToIndex = new HashMap<String, Integer>();
172 
173       //TODO: We should get the list of tables from master
174       tables = new ArrayList<String>();
175       this.rackManager = rackManager != null ? rackManager : new DefaultRackManager();
176 
177       numRegions = 0;
178 
179       List<List<Integer>> serversPerHostList = new ArrayList<List<Integer>>();
180       List<List<Integer>> serversPerRackList = new ArrayList<List<Integer>>();
181       this.clusterState = clusterState;
182       this.regionFinder = regionFinder;
183 
184       // Use servername and port as there can be dead servers in this list. We want everything with
185       // a matching hostname and port to have the same index.
186       for (ServerName sn : clusterState.keySet()) {
187         if (serversToIndex.get(sn.getHostAndPort()) == null) {
188           serversToIndex.put(sn.getHostAndPort(), numServers++);
189         }
190         if (!hostsToIndex.containsKey(sn.getHostname())) {
191           hostsToIndex.put(sn.getHostname(), numHosts++);
192           serversPerHostList.add(new ArrayList<Integer>(1));
193         }
194 
195         int serverIndex = serversToIndex.get(sn.getHostAndPort());
196         int hostIndex = hostsToIndex.get(sn.getHostname());
197         serversPerHostList.get(hostIndex).add(serverIndex);
198 
199         String rack = this.rackManager.getRack(sn);
200         if (!racksToIndex.containsKey(rack)) {
201           racksToIndex.put(rack, numRacks++);
202           serversPerRackList.add(new ArrayList<Integer>());
203         }
204         int rackIndex = racksToIndex.get(rack);
205         serversPerRackList.get(rackIndex).add(serverIndex);
206       }
207 
208       // Count how many regions there are.
209       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
210         numRegions += entry.getValue().size();
211       }
212       numRegions += unassignedRegions.size();
213 
214       regionsToIndex = new HashMap<HRegionInfo, Integer>(numRegions);
215       servers = new ServerName[numServers];
216       serversPerHost = new int[numHosts][];
217       serversPerRack = new int[numRacks][];
218       regions = new HRegionInfo[numRegions];
219       regionIndexToServerIndex = new int[numRegions];
220       initialRegionIndexToServerIndex = new int[numRegions];
221       regionIndexToTableIndex = new int[numRegions];
222       regionIndexToPrimaryIndex = new int[numRegions];
223       regionLoads = new Deque[numRegions];
224       regionLocations = new int[numRegions][];
225       serverIndicesSortedByRegionCount = new Integer[numServers];
226       serverIndicesSortedByLocality = new Integer[numServers];
227       localityPerServer = new float[numServers];
228 
229       serverIndexToHostIndex = new int[numServers];
230       serverIndexToRackIndex = new int[numServers];
231       regionsPerServer = new int[numServers][];
232       regionsPerHost = new int[numHosts][];
233       regionsPerRack = new int[numRacks][];
234       primariesOfRegionsPerServer = new int[numServers][];
235       primariesOfRegionsPerHost = new int[numHosts][];
236       primariesOfRegionsPerRack = new int[numRacks][];
237 
238       int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
239 
240       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
241         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
242 
243         // keep the servername if this is the first server name for this hostname
244         // or this servername has the newest startcode.
245         if (servers[serverIndex] == null ||
246             servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
247           servers[serverIndex] = entry.getKey();
248         }
249 
250         if (regionsPerServer[serverIndex] != null) {
251           // there is another server with the same hostAndPort in ClusterState.
252           // allocate the array for the total size
253           regionsPerServer[serverIndex] = new int[entry.getValue().size() + regionsPerServer[serverIndex].length];
254         } else {
255           regionsPerServer[serverIndex] = new int[entry.getValue().size()];
256         }
257         primariesOfRegionsPerServer[serverIndex] = new int[regionsPerServer[serverIndex].length];
258         serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
259         serverIndicesSortedByLocality[serverIndex] = serverIndex;
260       }
261 
262       hosts = new String[numHosts];
263       for (Entry<String, Integer> entry : hostsToIndex.entrySet()) {
264         hosts[entry.getValue()] = entry.getKey();
265       }
266       racks = new String[numRacks];
267       for (Entry<String, Integer> entry : racksToIndex.entrySet()) {
268         racks[entry.getValue()] = entry.getKey();
269       }
270 
271       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
272         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
273         regionPerServerIndex = 0;
274 
275         int hostIndex = hostsToIndex.get(entry.getKey().getHostname());
276         serverIndexToHostIndex[serverIndex] = hostIndex;
277 
278         int rackIndex = racksToIndex.get(this.rackManager.getRack(entry.getKey()));
279         serverIndexToRackIndex[serverIndex] = rackIndex;
280 
281         for (HRegionInfo region : entry.getValue()) {
282           registerRegion(region, regionIndex, serverIndex, loads, regionFinder);
283 
284           regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
285           regionIndex++;
286         }
287       }
288       for (HRegionInfo region : unassignedRegions) {
289         registerRegion(region, regionIndex, -1, loads, regionFinder);
290         regionIndex++;
291       }
292 
293       for (int i = 0; i < serversPerHostList.size(); i++) {
294         serversPerHost[i] = new int[serversPerHostList.get(i).size()];
295         for (int j = 0; j < serversPerHost[i].length; j++) {
296           serversPerHost[i][j] = serversPerHostList.get(i).get(j);
297         }
298         if (serversPerHost[i].length > 1) {
299           multiServersPerHost = true;
300         }
301       }
302 
303       for (int i = 0; i < serversPerRackList.size(); i++) {
304         serversPerRack[i] = new int[serversPerRackList.get(i).size()];
305         for (int j = 0; j < serversPerRack[i].length; j++) {
306           serversPerRack[i][j] = serversPerRackList.get(i).get(j);
307         }
308       }
309 
310       numTables = tables.size();
311       numRegionsPerServerPerTable = new int[numServers][numTables];
312 
313       for (int i = 0; i < numServers; i++) {
314         for (int j = 0; j < numTables; j++) {
315           numRegionsPerServerPerTable[i][j] = 0;
316         }
317       }
318 
319       for (int i=0; i < regionIndexToServerIndex.length; i++) {
320         if (regionIndexToServerIndex[i] >= 0) {
321           numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
322         }
323       }
324 
325       numMaxRegionsPerTable = new int[numTables];
326       for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
327         for (tableIndex = 0 ; tableIndex < numRegionsPerServerPerTable[serverIndex].length; tableIndex++) {
328           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
329             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
330           }
331         }
332       }
333 
334       for (int i = 0; i < regions.length; i ++) {
335         HRegionInfo info = regions[i];
336         if (RegionReplicaUtil.isDefaultReplica(info)) {
337           regionIndexToPrimaryIndex[i] = i;
338         } else {
339           hasRegionReplicas = true;
340           HRegionInfo primaryInfo = RegionReplicaUtil.getRegionInfoForDefaultReplica(info);
341           regionIndexToPrimaryIndex[i] =
342               regionsToIndex.containsKey(primaryInfo) ?
343               regionsToIndex.get(primaryInfo):
344               -1;
345         }
346       }
347 
348       for (int i = 0; i < regionsPerServer.length; i++) {
349         primariesOfRegionsPerServer[i] = new int[regionsPerServer[i].length];
350         for (int j = 0; j < regionsPerServer[i].length; j++) {
351           int primaryIndex = regionIndexToPrimaryIndex[regionsPerServer[i][j]];
352           primariesOfRegionsPerServer[i][j] = primaryIndex;
353         }
354         // sort the regions by primaries.
355         Arrays.sort(primariesOfRegionsPerServer[i]);
356       }
357 
358       // compute regionsPerHost
359       if (multiServersPerHost) {
360         for (int i = 0 ; i < serversPerHost.length; i++) {
361           int numRegionsPerHost = 0;
362           for (int j = 0; j < serversPerHost[i].length; j++) {
363             numRegionsPerHost += regionsPerServer[serversPerHost[i][j]].length;
364           }
365           regionsPerHost[i] = new int[numRegionsPerHost];
366           primariesOfRegionsPerHost[i] = new int[numRegionsPerHost];
367         }
368         for (int i = 0 ; i < serversPerHost.length; i++) {
369           int numRegionPerHostIndex = 0;
370           for (int j = 0; j < serversPerHost[i].length; j++) {
371             for (int k = 0; k < regionsPerServer[serversPerHost[i][j]].length; k++) {
372               int region = regionsPerServer[serversPerHost[i][j]][k];
373               regionsPerHost[i][numRegionPerHostIndex] = region;
374               int primaryIndex = regionIndexToPrimaryIndex[region];
375               primariesOfRegionsPerHost[i][numRegionPerHostIndex] = primaryIndex;
376               numRegionPerHostIndex++;
377             }
378           }
379           // sort the regions by primaries.
380           Arrays.sort(primariesOfRegionsPerHost[i]);
381         }
382       }
383 
384       // compute regionsPerRack
385       if (numRacks > 1) {
386         for (int i = 0 ; i < serversPerRack.length; i++) {
387           int numRegionsPerRack = 0;
388           for (int j = 0; j < serversPerRack[i].length; j++) {
389             numRegionsPerRack += regionsPerServer[serversPerRack[i][j]].length;
390           }
391           regionsPerRack[i] = new int[numRegionsPerRack];
392           primariesOfRegionsPerRack[i] = new int[numRegionsPerRack];
393         }
394 
395         for (int i = 0 ; i < serversPerRack.length; i++) {
396           int numRegionPerRackIndex = 0;
397           for (int j = 0; j < serversPerRack[i].length; j++) {
398             for (int k = 0; k < regionsPerServer[serversPerRack[i][j]].length; k++) {
399               int region = regionsPerServer[serversPerRack[i][j]][k];
400               regionsPerRack[i][numRegionPerRackIndex] = region;
401               int primaryIndex = regionIndexToPrimaryIndex[region];
402               primariesOfRegionsPerRack[i][numRegionPerRackIndex] = primaryIndex;
403               numRegionPerRackIndex++;
404             }
405           }
406           // sort the regions by primaries.
407           Arrays.sort(primariesOfRegionsPerRack[i]);
408         }
409       }
410     }
411 
412     /** Helper for Cluster constructor to handle a region */
413     private void registerRegion(HRegionInfo region, int regionIndex, int serverIndex,
414         Map<String, Deque<RegionLoad>> loads, RegionLocationFinder regionFinder) {
415       String tableName = region.getTable().getNameAsString();
416       if (!tablesToIndex.containsKey(tableName)) {
417         tables.add(tableName);
418         tablesToIndex.put(tableName, tablesToIndex.size());
419       }
420       int tableIndex = tablesToIndex.get(tableName);
421 
422       regionsToIndex.put(region, regionIndex);
423       regions[regionIndex] = region;
424       regionIndexToServerIndex[regionIndex] = serverIndex;
425       initialRegionIndexToServerIndex[regionIndex] = serverIndex;
426       regionIndexToTableIndex[regionIndex] = tableIndex;
427 
428       // region load
429       if (loads != null) {
430         Deque<RegionLoad> rl = loads.get(region.getRegionNameAsString());
431         // That could have failed if the RegionLoad is using the other regionName
432         if (rl == null) {
433           // Try getting the region load using encoded name.
434           rl = loads.get(region.getEncodedName());
435         }
436         regionLoads[regionIndex] = rl;
437       }
438 
439       if (regionFinder != null) {
440         //region location
441         List<ServerName> loc = regionFinder.getTopBlockLocations(region);
442         regionLocations[regionIndex] = new int[loc.size()];
443         for (int i=0; i < loc.size(); i++) {
444           regionLocations[regionIndex][i] =
445               loc.get(i) == null ? -1 :
446                 (serversToIndex.get(loc.get(i).getHostAndPort()) == null ? -1
447                     : serversToIndex.get(loc.get(i).getHostAndPort()));
448         }
449       }
450     }
451 
452     /** An action to move or swap a region */
453     public static class Action {
454       public static enum Type {
455         ASSIGN_REGION,
456         MOVE_REGION,
457         SWAP_REGIONS,
458         NULL,
459       }
460 
461       public Type type;
462       public Action (Type type) {this.type = type;}
463       /** Returns an Action which would undo this action */
464       public Action undoAction() { return this; }
465       @Override
466       public String toString() { return type + ":";}
467     }
468 
469     public static class AssignRegionAction extends Action {
470       public int region;
471       public int server;
472       public AssignRegionAction(int region, int server) {
473         super(Type.ASSIGN_REGION);
474         this.region = region;
475         this.server = server;
476       }
477       @Override
478       public Action undoAction() {
479         // TODO implement this. This action is not being used by the StochasticLB for now
480         // in case it uses it, we should implement this function.
481         throw new NotImplementedException();
482       }
483       @Override
484       public String toString() {
485         return type + ": " + region + ":" + server;
486       }
487     }
488 
489     public static class MoveRegionAction extends Action {
490       public int region;
491       public int fromServer;
492       public int toServer;
493 
494       public MoveRegionAction(int region, int fromServer, int toServer) {
495         super(Type.MOVE_REGION);
496         this.fromServer = fromServer;
497         this.region = region;
498         this.toServer = toServer;
499       }
500       @Override
501       public Action undoAction() {
502         return new MoveRegionAction (region, toServer, fromServer);
503       }
504       @Override
505       public String toString() {
506         return type + ": " + region + ":" + fromServer + " -> " + toServer;
507       }
508     }
509 
510     public static class SwapRegionsAction extends Action {
511       public int fromServer;
512       public int fromRegion;
513       public int toServer;
514       public int toRegion;
515       public SwapRegionsAction(int fromServer, int fromRegion, int toServer, int toRegion) {
516         super(Type.SWAP_REGIONS);
517         this.fromServer = fromServer;
518         this.fromRegion = fromRegion;
519         this.toServer = toServer;
520         this.toRegion = toRegion;
521       }
522       @Override
523       public Action undoAction() {
524         return new SwapRegionsAction (fromServer, toRegion, toServer, fromRegion);
525       }
526       @Override
527       public String toString() {
528         return type + ": " + fromRegion + ":" + fromServer + " <-> " + toRegion + ":" + toServer;
529       }
530     }
531 
532     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NM_FIELD_NAMING_CONVENTION",
533         justification="Mistake. Too disruptive to change now")
534     public static final Action NullAction = new Action(Type.NULL);
535 
536     public void doAction(Action action) {
537       switch (action.type) {
538       case NULL: break;
539       case ASSIGN_REGION:
540         // FindBugs: Having the assert quietens FB BC_UNCONFIRMED_CAST warnings
541         assert action instanceof AssignRegionAction: action.getClass();
542         AssignRegionAction ar = (AssignRegionAction) action;
543         regionsPerServer[ar.server] = addRegion(regionsPerServer[ar.server], ar.region);
544         regionMoved(ar.region, -1, ar.server);
545         break;
546       case MOVE_REGION:
547         assert action instanceof MoveRegionAction: action.getClass();
548         MoveRegionAction mra = (MoveRegionAction) action;
549         regionsPerServer[mra.fromServer] = removeRegion(regionsPerServer[mra.fromServer], mra.region);
550         regionsPerServer[mra.toServer] = addRegion(regionsPerServer[mra.toServer], mra.region);
551         regionMoved(mra.region, mra.fromServer, mra.toServer);
552         break;
553       case SWAP_REGIONS:
554         assert action instanceof SwapRegionsAction: action.getClass();
555         SwapRegionsAction a = (SwapRegionsAction) action;
556         regionsPerServer[a.fromServer] = replaceRegion(regionsPerServer[a.fromServer], a.fromRegion, a.toRegion);
557         regionsPerServer[a.toServer] = replaceRegion(regionsPerServer[a.toServer], a.toRegion, a.fromRegion);
558         regionMoved(a.fromRegion, a.fromServer, a.toServer);
559         regionMoved(a.toRegion, a.toServer, a.fromServer);
560         break;
561       default:
562         throw new RuntimeException("Uknown action:" + action.type);
563       }
564     }
565 
566     /**
567      * Return true if the placement of region on server would lower the availability
568      * of the region in question
569      * @param server
570      * @param region
571      * @return true or false
572      */
573     boolean wouldLowerAvailability(HRegionInfo regionInfo, ServerName serverName) {
574       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
575         return false; // safeguard against race between cluster.servers and servers from LB method args
576       }
577       int server = serversToIndex.get(serverName.getHostAndPort());
578       int region = regionsToIndex.get(regionInfo);
579 
580       int primary = regionIndexToPrimaryIndex[region];
581 
582       // there is a subset relation for server < host < rack
583       // check server first
584 
585       if (contains(primariesOfRegionsPerServer[server], primary)) {
586         // check for whether there are other servers that we can place this region
587         for (int i = 0; i < primariesOfRegionsPerServer.length; i++) {
588           if (i != server && !contains(primariesOfRegionsPerServer[i], primary)) {
589             return true; // meaning there is a better server
590           }
591         }
592         return false; // there is not a better server to place this
593       }
594 
595       // check host
596       if (multiServersPerHost) { // these arrays would only be allocated if we have more than one server per host
597         int host = serverIndexToHostIndex[server];
598         if (contains(primariesOfRegionsPerHost[host], primary)) {
599           // check for whether there are other hosts that we can place this region
600           for (int i = 0; i < primariesOfRegionsPerHost.length; i++) {
601             if (i != host && !contains(primariesOfRegionsPerHost[i], primary)) {
602               return true; // meaning there is a better host
603             }
604           }
605           return false; // there is not a better host to place this
606         }
607       }
608 
609       // check rack
610       if (numRacks > 1) {
611         int rack = serverIndexToRackIndex[server];
612         if (contains(primariesOfRegionsPerRack[rack], primary)) {
613           // check for whether there are other racks that we can place this region
614           for (int i = 0; i < primariesOfRegionsPerRack.length; i++) {
615             if (i != rack && !contains(primariesOfRegionsPerRack[i], primary)) {
616               return true; // meaning there is a better rack
617             }
618           }
619           return false; // there is not a better rack to place this
620         }
621       }
622       return false;
623     }
624 
625     void doAssignRegion(HRegionInfo regionInfo, ServerName serverName) {
626       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
627         return;
628       }
629       int server = serversToIndex.get(serverName.getHostAndPort());
630       int region = regionsToIndex.get(regionInfo);
631       doAction(new AssignRegionAction(region, server));
632     }
633 
634     void regionMoved(int region, int oldServer, int newServer) {
635       regionIndexToServerIndex[region] = newServer;
636       if (initialRegionIndexToServerIndex[region] == newServer) {
637         numMovedRegions--; //region moved back to original location
638       } else if (oldServer >= 0 && initialRegionIndexToServerIndex[region] == oldServer) {
639         numMovedRegions++; //region moved from original location
640       }
641       int tableIndex = regionIndexToTableIndex[region];
642       if (oldServer >= 0) {
643         numRegionsPerServerPerTable[oldServer][tableIndex]--;
644       }
645       numRegionsPerServerPerTable[newServer][tableIndex]++;
646 
647       //check whether this caused maxRegionsPerTable in the new Server to be updated
648       if (numRegionsPerServerPerTable[newServer][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
649         numRegionsPerServerPerTable[newServer][tableIndex] = numMaxRegionsPerTable[tableIndex];
650       } else if (oldServer >= 0 && (numRegionsPerServerPerTable[oldServer][tableIndex] + 1)
651           == numMaxRegionsPerTable[tableIndex]) {
652         //recompute maxRegionsPerTable since the previous value was coming from the old server
653         for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
654           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
655             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
656           }
657         }
658       }
659 
660       // update for servers
661       int primary = regionIndexToPrimaryIndex[region];
662       if (oldServer >= 0) {
663         primariesOfRegionsPerServer[oldServer] = removeRegion(
664           primariesOfRegionsPerServer[oldServer], primary);
665       }
666       primariesOfRegionsPerServer[newServer] = addRegionSorted(
667         primariesOfRegionsPerServer[newServer], primary);
668 
669       // update for hosts
670       if (multiServersPerHost) {
671         int oldHost = oldServer >= 0 ? serverIndexToHostIndex[oldServer] : -1;
672         int newHost = serverIndexToHostIndex[newServer];
673         if (newHost != oldHost) {
674           regionsPerHost[newHost] = addRegion(regionsPerHost[newHost], region);
675           primariesOfRegionsPerHost[newHost] = addRegionSorted(primariesOfRegionsPerHost[newHost], primary);
676           if (oldHost >= 0) {
677             regionsPerHost[oldHost] = removeRegion(regionsPerHost[oldHost], region);
678             primariesOfRegionsPerHost[oldHost] = removeRegion(
679               primariesOfRegionsPerHost[oldHost], primary); // will still be sorted
680           }
681         }
682       }
683 
684       // update for racks
685       if (numRacks > 1) {
686         int oldRack = oldServer >= 0 ? serverIndexToRackIndex[oldServer] : -1;
687         int newRack = serverIndexToRackIndex[newServer];
688         if (newRack != oldRack) {
689           regionsPerRack[newRack] = addRegion(regionsPerRack[newRack], region);
690           primariesOfRegionsPerRack[newRack] = addRegionSorted(primariesOfRegionsPerRack[newRack], primary);
691           if (oldRack >= 0) {
692             regionsPerRack[oldRack] = removeRegion(regionsPerRack[oldRack], region);
693             primariesOfRegionsPerRack[oldRack] = removeRegion(
694               primariesOfRegionsPerRack[oldRack], primary); // will still be sorted
695           }
696         }
697       }
698     }
699 
700     int[] removeRegion(int[] regions, int regionIndex) {
701       //TODO: this maybe costly. Consider using linked lists
702       int[] newRegions = new int[regions.length - 1];
703       int i = 0;
704       for (i = 0; i < regions.length; i++) {
705         if (regions[i] == regionIndex) {
706           break;
707         }
708         newRegions[i] = regions[i];
709       }
710       System.arraycopy(regions, i+1, newRegions, i, newRegions.length - i);
711       return newRegions;
712     }
713 
714     int[] addRegion(int[] regions, int regionIndex) {
715       int[] newRegions = new int[regions.length + 1];
716       System.arraycopy(regions, 0, newRegions, 0, regions.length);
717       newRegions[newRegions.length - 1] = regionIndex;
718       return newRegions;
719     }
720 
721     int[] addRegionSorted(int[] regions, int regionIndex) {
722       int[] newRegions = new int[regions.length + 1];
723       int i = 0;
724       for (i = 0; i < regions.length; i++) { // find the index to insert
725         if (regions[i] > regionIndex) {
726           break;
727         }
728       }
729       System.arraycopy(regions, 0, newRegions, 0, i); // copy first half
730       System.arraycopy(regions, i, newRegions, i+1, regions.length - i); // copy second half
731       newRegions[i] = regionIndex;
732 
733       return newRegions;
734     }
735 
736     int[] replaceRegion(int[] regions, int regionIndex, int newRegionIndex) {
737       int i = 0;
738       for (i = 0; i < regions.length; i++) {
739         if (regions[i] == regionIndex) {
740           regions[i] = newRegionIndex;
741           break;
742         }
743       }
744       return regions;
745     }
746 
747     void sortServersByRegionCount() {
748       Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
749     }
750 
751     int getNumRegions(int server) {
752       return regionsPerServer[server].length;
753     }
754 
755     boolean contains(int[] arr, int val) {
756       return Arrays.binarySearch(arr, val) >= 0;
757     }
758 
759     private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
760       @Override
761       public int compare(Integer integer, Integer integer2) {
762         return Integer.valueOf(getNumRegions(integer)).compareTo(getNumRegions(integer2));
763       }
764     };
765 
766     void sortServersByLocality() {
767       Arrays.sort(serverIndicesSortedByLocality, localityComparator);
768     }
769 
770     float getLocality(int server) {
771       return localityPerServer[server];
772     }
773 
774     private Comparator<Integer> localityComparator = new Comparator<Integer>() {
775       @Override
776       public int compare(Integer integer, Integer integer2) {
777         float locality1 = getLocality(integer);
778         float locality2 = getLocality(integer2);
779         if (locality1 < locality2) {
780           return -1;
781         } else if (locality1 > locality2) {
782           return 1;
783         } else {
784           return 0;
785         }
786       }
787     };
788 
789     int getLowestLocalityRegionServer() {
790       if (regionFinder == null) {
791         return -1;
792       } else {
793         sortServersByLocality();
794         // We want to find server with non zero regions having lowest locality.
795         int i = 0;
796         int lowestLocalityServerIndex = serverIndicesSortedByLocality[i];
797         while (localityPerServer[lowestLocalityServerIndex] == 0
798             && (regionsPerServer[lowestLocalityServerIndex].length == 0)) {
799           i++;
800           lowestLocalityServerIndex = serverIndicesSortedByLocality[i];
801         }
802         if (LOG.isTraceEnabled()) {
803           LOG.trace("Lowest locality region server with non zero regions is "
804             + servers[lowestLocalityServerIndex].getHostname() + " with locality "
805             + localityPerServer[lowestLocalityServerIndex]);
806         }
807         return lowestLocalityServerIndex;
808       }
809     }
810 
811     int getLowestLocalityRegionOnServer(int serverIndex) {
812       if (regionFinder != null) {
813         float lowestLocality = 1.0f;
814         int lowestLocalityRegionIndex = 0;
815         if (regionsPerServer[serverIndex].length == 0) {
816           // No regions on that region server
817           return -1;
818         }
819         for (int j = 0; j < regionsPerServer[serverIndex].length; j++) {
820           int regionIndex = regionsPerServer[serverIndex][j];
821           HDFSBlocksDistribution distribution = regionFinder
822               .getBlockDistribution(regions[regionIndex]);
823           float locality = distribution.getBlockLocalityIndex(servers[serverIndex].getHostname());
824           if (locality < lowestLocality) {
825             lowestLocality = locality;
826             lowestLocalityRegionIndex = j;
827           }
828         }
829         if (LOG.isTraceEnabled()) {
830           LOG.trace(" Lowest locality region index is " + lowestLocalityRegionIndex
831             + " and its region server contains " + regionsPerServer[serverIndex].length
832             + " regions");
833         }
834         return regionsPerServer[serverIndex][lowestLocalityRegionIndex];
835       } else {
836         return -1;
837       }
838     }
839 
840     float getLocalityOfRegion(int region, int server) {
841       if (regionFinder != null) {
842         HDFSBlocksDistribution distribution = regionFinder.getBlockDistribution(regions[region]);
843         return distribution.getBlockLocalityIndex(servers[server].getHostname());
844       } else {
845         return 0f;
846       }
847     }
848 
849     int getLeastLoadedTopServerForRegion(int region) {
850       if (regionFinder != null) {
851         List<ServerName> topLocalServers = regionFinder.getTopBlockLocations(regions[region]);
852         int leastLoadedServerIndex = -1;
853         int load = Integer.MAX_VALUE;
854         for (ServerName sn : topLocalServers) {
855           if (!serversToIndex.containsKey(sn.getHostAndPort())) {
856             continue;
857           }
858           int index = serversToIndex.get(sn.getHostAndPort());
859           if (regionsPerServer[index] == null) {
860             continue;
861           }
862           int tempLoad = regionsPerServer[index].length;
863           if (tempLoad <= load) {
864             leastLoadedServerIndex = index;
865             load = tempLoad;
866           }
867         }
868         return leastLoadedServerIndex;
869       } else {
870         return -1;
871       }
872     }
873 
874     void calculateRegionServerLocalities() {
875       if (regionFinder == null) {
876         LOG.warn("Region location finder found null, skipping locality calculations.");
877         return;
878       }
879       for (int i = 0; i < regionsPerServer.length; i++) {
880         HDFSBlocksDistribution distribution = new HDFSBlocksDistribution();
881         if (regionsPerServer[i].length > 0) {
882           for (int j = 0; j < regionsPerServer[i].length; j++) {
883             int regionIndex = regionsPerServer[i][j];
884             distribution.add(regionFinder.getBlockDistribution(regions[regionIndex]));
885           }
886         } else {
887           LOG.debug("Server " + servers[i].getHostname() + " had 0 regions.");
888         }
889         localityPerServer[i] = distribution.getBlockLocalityIndex(servers[i].getHostname());
890       }
891     }
892 
893     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SBSC_USE_STRINGBUFFER_CONCATENATION",
894         justification="Not important but should be fixed")
895     @Override
896     public String toString() {
897       String desc = "Cluster{" +
898           "servers=[";
899           for(ServerName sn:servers) {
900              desc += sn.getHostAndPort() + ", ";
901           }
902           desc +=
903           ", serverIndicesSortedByRegionCount="+
904           Arrays.toString(serverIndicesSortedByRegionCount) +
905           ", regionsPerServer=[";
906 
907           for (int[]r:regionsPerServer) {
908             desc += Arrays.toString(r);
909           }
910           desc += "]" +
911           ", numMaxRegionsPerTable=" +
912           Arrays.toString(numMaxRegionsPerTable) +
913           ", numRegions=" +
914           numRegions +
915           ", numServers=" +
916           numServers +
917           ", numTables=" +
918           numTables +
919           ", numMovedRegions=" +
920           numMovedRegions +
921           '}';
922       return desc;
923     }
924   }
925 
926   // slop for regions
927   protected float slop;
928   protected Configuration config;
929   protected RackManager rackManager;
930   private static final Random RANDOM = new Random(System.currentTimeMillis());
931   private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
932 
933   public static final String TABLES_ON_MASTER =
934     "hbase.balancer.tablesOnMaster";
935 
936   protected final Set<String> tablesOnMaster = new HashSet<String>();
937   protected final MetricsBalancer metricsBalancer = new MetricsBalancer();
938   protected ClusterStatus clusterStatus = null;
939   protected ServerName masterServerName;
940   protected MasterServices services;
941 
942   protected static String[] getTablesOnMaster(Configuration conf) {
943     String valueString = conf.get(TABLES_ON_MASTER);
944     if (valueString != null) {
945       valueString = valueString.trim();
946     }
947     if (valueString == null || valueString.equalsIgnoreCase("none")) {
948       return null;
949     }
950     return StringUtils.getStrings(valueString);
951   }
952 
953   /**
954    * Check if configured to put any tables on the active master
955    */
956   public static boolean tablesOnMaster(Configuration conf) {
957     String[] tables = getTablesOnMaster(conf);
958     return tables != null && tables.length > 0;
959   }
960 
961   @Override
962   public void setConf(Configuration conf) {
963     setSlop(conf);
964     if (slop < 0) slop = 0;
965     else if (slop > 1) slop = 1;
966 
967     this.config = conf;
968     String[] tables = getTablesOnMaster(conf);
969     if (tables != null && tables.length > 0) {
970       Collections.addAll(tablesOnMaster, tables);
971     }
972     this.rackManager = new RackManager(getConf());
973     regionFinder.setConf(conf);
974   }
975 
976   protected void setSlop(Configuration conf) {
977     this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
978   }
979 
980   /**
981    * Check if a region belongs to some small system table.
982    * If so, the primary replica may be expected to be put on the master regionserver.
983    */
984   public boolean shouldBeOnMaster(HRegionInfo region) {
985     return tablesOnMaster.contains(region.getTable().getNameAsString())
986         && region.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID;
987   }
988 
989   /**
990    * Balance the regions that should be on master regionserver.
991    */
992   protected List<RegionPlan> balanceMasterRegions(
993       Map<ServerName, List<HRegionInfo>> clusterMap) {
994     if (masterServerName == null
995         || clusterMap == null || clusterMap.size() <= 1) return null;
996     List<RegionPlan> plans = null;
997     List<HRegionInfo> regions = clusterMap.get(masterServerName);
998     if (regions != null) {
999       Iterator<ServerName> keyIt = null;
1000       for (HRegionInfo region: regions) {
1001         if (shouldBeOnMaster(region)) continue;
1002 
1003         // Find a non-master regionserver to host the region
1004         if (keyIt == null || !keyIt.hasNext()) {
1005           keyIt = clusterMap.keySet().iterator();
1006         }
1007         ServerName dest = keyIt.next();
1008         if (masterServerName.equals(dest)) {
1009           if (!keyIt.hasNext()) {
1010             keyIt = clusterMap.keySet().iterator();
1011           }
1012           dest = keyIt.next();
1013         }
1014 
1015         // Move this region away from the master regionserver
1016         RegionPlan plan = new RegionPlan(region, masterServerName, dest);
1017         if (plans == null) {
1018           plans = new ArrayList<RegionPlan>();
1019         }
1020         plans.add(plan);
1021       }
1022     }
1023     for (Map.Entry<ServerName, List<HRegionInfo>> server: clusterMap.entrySet()) {
1024       if (masterServerName.equals(server.getKey())) continue;
1025       for (HRegionInfo region: server.getValue()) {
1026         if (!shouldBeOnMaster(region)) continue;
1027 
1028         // Move this region to the master regionserver
1029         RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName);
1030         if (plans == null) {
1031           plans = new ArrayList<RegionPlan>();
1032         }
1033         plans.add(plan);
1034       }
1035     }
1036     return plans;
1037   }
1038 
1039   /**
1040    * Assign the regions that should be on master regionserver.
1041    */
1042   protected Map<ServerName, List<HRegionInfo>> assignMasterRegions(
1043       Collection<HRegionInfo> regions, List<ServerName> servers) {
1044     if (servers == null || regions == null || regions.isEmpty()) {
1045       return null;
1046     }
1047     Map<ServerName, List<HRegionInfo>> assignments
1048       = new TreeMap<ServerName, List<HRegionInfo>>();
1049     if (masterServerName != null && servers.contains(masterServerName)) {
1050       assignments.put(masterServerName, new ArrayList<HRegionInfo>());
1051       for (HRegionInfo region: regions) {
1052         if (shouldBeOnMaster(region)) {
1053           assignments.get(masterServerName).add(region);
1054         }
1055       }
1056     }
1057     return assignments;
1058   }
1059 
1060   @Override
1061   public Configuration getConf() {
1062     return this.config;
1063   }
1064 
1065   @Override
1066   public synchronized void setClusterStatus(ClusterStatus st) {
1067     this.clusterStatus = st;
1068     regionFinder.setClusterStatus(st);
1069   }
1070 
1071   @Override
1072   public void setMasterServices(MasterServices masterServices) {
1073     masterServerName = masterServices.getServerName();
1074     this.services = masterServices;
1075     this.regionFinder.setServices(masterServices);
1076   }
1077 
1078   public void setRackManager(RackManager rackManager) {
1079     this.rackManager = rackManager;
1080   }
1081 
1082   protected boolean needsBalance(Cluster c) {
1083     ClusterLoadState cs = new ClusterLoadState(c.clusterState);
1084     if (cs.getNumServers() < MIN_SERVER_BALANCE) {
1085       if (LOG.isDebugEnabled()) {
1086         LOG.debug("Not running balancer because only " + cs.getNumServers()
1087             + " active regionserver(s)");
1088       }
1089       return false;
1090     }
1091     if(areSomeRegionReplicasColocated(c)) return true;
1092     // Check if we even need to do any load balancing
1093     // HBASE-3681 check sloppiness first
1094     float average = cs.getLoadAverage(); // for logging
1095     int floor = (int) Math.floor(average * (1 - slop));
1096     int ceiling = (int) Math.ceil(average * (1 + slop));
1097     if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
1098       NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
1099       if (LOG.isTraceEnabled()) {
1100         // If nothing to balance, then don't say anything unless trace-level logging.
1101         LOG.trace("Skipping load balancing because balanced cluster; " +
1102           "servers=" + cs.getNumServers() +
1103           " regions=" + cs.getNumRegions() + " average=" + average +
1104           " mostloaded=" + serversByLoad.lastKey().getLoad() +
1105           " leastloaded=" + serversByLoad.firstKey().getLoad());
1106       }
1107       return false;
1108     }
1109     return true;
1110   }
1111 
1112   /**
1113    * Subclasses should implement this to return true if the cluster has nodes that hosts
1114    * multiple replicas for the same region, or, if there are multiple racks and the same
1115    * rack hosts replicas of the same region
1116    * @param c Cluster information
1117    * @return whether region replicas are currently co-located
1118    */
1119   protected boolean areSomeRegionReplicasColocated(Cluster c) {
1120     return false;
1121   }
1122 
1123   /**
1124    * Generates a bulk assignment plan to be used on cluster startup using a
1125    * simple round-robin assignment.
1126    * <p>
1127    * Takes a list of all the regions and all the servers in the cluster and
1128    * returns a map of each server to the regions that it should be assigned.
1129    * <p>
1130    * Currently implemented as a round-robin assignment. Same invariant as load
1131    * balancing, all servers holding floor(avg) or ceiling(avg).
1132    *
1133    * TODO: Use block locations from HDFS to place regions with their blocks
1134    *
1135    * @param regions all regions
1136    * @param servers all servers
1137    * @return map of server to the regions it should take, or null if no
1138    *         assignment is possible (ie. no regions or no servers)
1139    */
1140   @Override
1141   public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
1142       List<ServerName> servers) {
1143     metricsBalancer.incrMiscInvocations();
1144     Map<ServerName, List<HRegionInfo>> assignments = assignMasterRegions(regions, servers);
1145     if (assignments != null && !assignments.isEmpty()) {
1146       servers = new ArrayList<ServerName>(servers);
1147       // Guarantee not to put other regions on master
1148       servers.remove(masterServerName);
1149       List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1150       if (!masterRegions.isEmpty()) {
1151         regions = new ArrayList<HRegionInfo>(regions);
1152         for (HRegionInfo region: masterRegions) {
1153           regions.remove(region);
1154         }
1155       }
1156     }
1157     if (regions == null || regions.isEmpty()) {
1158       return assignments;
1159     }
1160 
1161     int numServers = servers == null ? 0 : servers.size();
1162     if (numServers == 0) {
1163       LOG.warn("Wanted to do round robin assignment but no servers to assign to");
1164       return null;
1165     }
1166 
1167     // TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the
1168     // normal LB.balancerCluster() with unassignedRegions. We only need to have a candidate
1169     // generator for AssignRegionAction. The LB will ensure the regions are mostly local
1170     // and balanced. This should also run fast with fewer number of iterations.
1171 
1172     if (numServers == 1) { // Only one server, nothing fancy we can do here
1173       ServerName server = servers.get(0);
1174       assignments.put(server, new ArrayList<HRegionInfo>(regions));
1175       return assignments;
1176     }
1177 
1178     Cluster cluster = createCluster(servers, regions);
1179     List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>();
1180 
1181     roundRobinAssignment(cluster, regions, unassignedRegions,
1182       servers, assignments);
1183 
1184     List<HRegionInfo> lastFewRegions = new ArrayList<HRegionInfo>();
1185     // assign the remaining by going through the list and try to assign to servers one-by-one
1186     int serverIdx = RANDOM.nextInt(numServers);
1187     for (HRegionInfo region : unassignedRegions) {
1188       boolean assigned = false;
1189       for (int j = 0; j < numServers; j++) { // try all servers one by one
1190         ServerName serverName = servers.get((j + serverIdx) % numServers);
1191         if (!cluster.wouldLowerAvailability(region, serverName)) {
1192           List<HRegionInfo> serverRegions = assignments.get(serverName);
1193           if (serverRegions == null) {
1194             serverRegions = new ArrayList<HRegionInfo>();
1195             assignments.put(serverName, serverRegions);
1196           }
1197           serverRegions.add(region);
1198           cluster.doAssignRegion(region, serverName);
1199           serverIdx = (j + serverIdx + 1) % numServers; //remain from next server
1200           assigned = true;
1201           break;
1202         }
1203       }
1204       if (!assigned) {
1205         lastFewRegions.add(region);
1206       }
1207     }
1208     // just sprinkle the rest of the regions on random regionservers. The balanceCluster will
1209     // make it optimal later. we can end up with this if numReplicas > numServers.
1210     for (HRegionInfo region : lastFewRegions) {
1211       int i = RANDOM.nextInt(numServers);
1212       ServerName server = servers.get(i);
1213       List<HRegionInfo> serverRegions = assignments.get(server);
1214       if (serverRegions == null) {
1215         serverRegions = new ArrayList<HRegionInfo>();
1216         assignments.put(server, serverRegions);
1217       }
1218       serverRegions.add(region);
1219       cluster.doAssignRegion(region, server);
1220     }
1221     return assignments;
1222   }
1223 
1224   protected Cluster createCluster(List<ServerName> servers,
1225       Collection<HRegionInfo> regions) {
1226     // Get the snapshot of the current assignments for the regions in question, and then create
1227     // a cluster out of it. Note that we might have replicas already assigned to some servers
1228     // earlier. So we want to get the snapshot to see those assignments, but this will only contain
1229     // replicas of the regions that are passed (for performance).
1230     Map<ServerName, List<HRegionInfo>> clusterState = getRegionAssignmentsByServer(regions);
1231 
1232     for (ServerName server : servers) {
1233       if (!clusterState.containsKey(server)) {
1234         clusterState.put(server, EMPTY_REGION_LIST);
1235       }
1236     }
1237     return new Cluster(regions, clusterState, null, this.regionFinder,
1238       rackManager);
1239   }
1240 
1241   /**
1242    * Generates an immediate assignment plan to be used by a new master for
1243    * regions in transition that do not have an already known destination.
1244    *
1245    * Takes a list of regions that need immediate assignment and a list of all
1246    * available servers. Returns a map of regions to the server they should be
1247    * assigned to.
1248    *
1249    * This method will return quickly and does not do any intelligent balancing.
1250    * The goal is to make a fast decision not the best decision possible.
1251    *
1252    * Currently this is random.
1253    *
1254    * @param regions
1255    * @param servers
1256    * @return map of regions to the server it should be assigned to
1257    */
1258   @Override
1259   public Map<HRegionInfo, ServerName> immediateAssignment(List<HRegionInfo> regions,
1260       List<ServerName> servers) {
1261     metricsBalancer.incrMiscInvocations();
1262     if (servers == null || servers.isEmpty()) {
1263       LOG.warn("Wanted to do random assignment but no servers to assign to");
1264       return null;
1265     }
1266 
1267     Map<HRegionInfo, ServerName> assignments = new TreeMap<HRegionInfo, ServerName>();
1268     for (HRegionInfo region : regions) {
1269       assignments.put(region, randomAssignment(region, servers));
1270     }
1271     return assignments;
1272   }
1273 
1274   /**
1275    * Used to assign a single region to a random server.
1276    */
1277   @Override
1278   public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
1279     metricsBalancer.incrMiscInvocations();
1280     if (servers != null && servers.contains(masterServerName)) {
1281       if (shouldBeOnMaster(regionInfo)) {
1282         return masterServerName;
1283       }
1284       servers = new ArrayList<ServerName>(servers);
1285       // Guarantee not to put other regions on master
1286       servers.remove(masterServerName);
1287     }
1288 
1289     int numServers = servers == null ? 0 : servers.size();
1290     if (numServers == 0) {
1291       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1292       return null;
1293     }
1294     if (numServers == 1) { // Only one server, nothing fancy we can do here
1295       return servers.get(0);
1296     }
1297 
1298     List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
1299     Cluster cluster = createCluster(servers, regions);
1300     return randomAssignment(cluster, regionInfo, servers);
1301   }
1302 
1303   /**
1304    * Generates a bulk assignment startup plan, attempting to reuse the existing
1305    * assignment information from META, but adjusting for the specified list of
1306    * available/online servers available for assignment.
1307    * <p>
1308    * Takes a map of all regions to their existing assignment from META. Also
1309    * takes a list of online servers for regions to be assigned to. Attempts to
1310    * retain all assignment, so in some instances initial assignment will not be
1311    * completely balanced.
1312    * <p>
1313    * Any leftover regions without an existing server to be assigned to will be
1314    * assigned randomly to available servers.
1315    *
1316    * @param regions regions and existing assignment from meta
1317    * @param servers available servers
1318    * @return map of servers and regions to be assigned to them
1319    */
1320   @Override
1321   public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
1322       List<ServerName> servers) {
1323     // Update metrics
1324     metricsBalancer.incrMiscInvocations();
1325     Map<ServerName, List<HRegionInfo>> assignments
1326       = assignMasterRegions(regions.keySet(), servers);
1327     if (assignments != null && !assignments.isEmpty()) {
1328       servers = new ArrayList<ServerName>(servers);
1329       // Guarantee not to put other regions on master
1330       servers.remove(masterServerName);
1331       List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1332       if (!masterRegions.isEmpty()) {
1333         regions = new HashMap<HRegionInfo, ServerName>(regions);
1334         for (HRegionInfo region: masterRegions) {
1335           regions.remove(region);
1336         }
1337       }
1338     }
1339     if (regions == null || regions.isEmpty()) {
1340       return assignments;
1341     }
1342 
1343     int numServers = servers == null ? 0 : servers.size();
1344     if (numServers == 0) {
1345       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1346       return null;
1347     }
1348     if (numServers == 1) { // Only one server, nothing fancy we can do here
1349       ServerName server = servers.get(0);
1350       assignments.put(server, new ArrayList<HRegionInfo>(regions.keySet()));
1351       return assignments;
1352     }
1353 
1354     // Group all of the old assignments by their hostname.
1355     // We can't group directly by ServerName since the servers all have
1356     // new start-codes.
1357 
1358     // Group the servers by their hostname. It's possible we have multiple
1359     // servers on the same host on different ports.
1360     ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
1361     for (ServerName server : servers) {
1362       assignments.put(server, new ArrayList<HRegionInfo>());
1363       serversByHostname.put(server.getHostname(), server);
1364     }
1365 
1366     // Collection of the hostnames that used to have regions
1367     // assigned, but for which we no longer have any RS running
1368     // after the cluster restart.
1369     Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
1370 
1371     int numRandomAssignments = 0;
1372     int numRetainedAssigments = 0;
1373 
1374     Cluster cluster = createCluster(servers, regions.keySet());
1375 
1376     for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
1377       HRegionInfo region = entry.getKey();
1378       ServerName oldServerName = entry.getValue();
1379       List<ServerName> localServers = new ArrayList<ServerName>();
1380       if (oldServerName != null) {
1381         localServers = serversByHostname.get(oldServerName.getHostname());
1382       }
1383       if (localServers.isEmpty()) {
1384         // No servers on the new cluster match up with this hostname,
1385         // assign randomly.
1386         ServerName randomServer = randomAssignment(cluster, region, servers);
1387         assignments.get(randomServer).add(region);
1388         numRandomAssignments++;
1389         if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
1390       } else if (localServers.size() == 1) {
1391         // the usual case - one new server on same host
1392         ServerName target = localServers.get(0);
1393         assignments.get(target).add(region);
1394         cluster.doAssignRegion(region, target);
1395         numRetainedAssigments++;
1396       } else {
1397         // multiple new servers in the cluster on this same host
1398         if (localServers.contains(oldServerName)) {
1399           assignments.get(oldServerName).add(region);
1400           cluster.doAssignRegion(region, oldServerName);
1401         } else {
1402           ServerName target = null;
1403           for (ServerName tmp: localServers) {
1404             if (tmp.getPort() == oldServerName.getPort()) {
1405               target = tmp;
1406               break;
1407             }
1408           }
1409           if (target == null) {
1410             target = randomAssignment(cluster, region, localServers);
1411           }
1412           assignments.get(target).add(region);
1413         }
1414         numRetainedAssigments++;
1415       }
1416     }
1417 
1418     String randomAssignMsg = "";
1419     if (numRandomAssignments > 0) {
1420       randomAssignMsg =
1421           numRandomAssignments + " regions were assigned "
1422               + "to random hosts, since the old hosts for these regions are no "
1423               + "longer present in the cluster. These hosts were:\n  "
1424               + Joiner.on("\n  ").join(oldHostsNoLongerPresent);
1425     }
1426 
1427     LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
1428         + " retained the pre-restart assignment. " + randomAssignMsg);
1429     return assignments;
1430   }
1431 
1432   @Override
1433   public void initialize() throws HBaseIOException{
1434   }
1435 
1436   @Override
1437   public void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1438   }
1439 
1440   @Override
1441   public void regionOffline(HRegionInfo regionInfo) {
1442   }
1443 
1444   @Override
1445   public boolean isStopped() {
1446     return stopped;
1447   }
1448 
1449   @Override
1450   public void stop(String why) {
1451     LOG.info("Load Balancer stop requested: "+why);
1452     stopped = true;
1453   }
1454 
1455   /**
1456    * Used to assign a single region to a random server.
1457    */
1458   private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo,
1459       List<ServerName> servers) {
1460     int numServers = servers.size(); // servers is not null, numServers > 1
1461     ServerName sn = null;
1462     final int maxIterations = numServers * 4;
1463     int iterations = 0;
1464 
1465     do {
1466       int i = RANDOM.nextInt(numServers);
1467       sn = servers.get(i);
1468     } while (cluster.wouldLowerAvailability(regionInfo, sn)
1469         && iterations++ < maxIterations);
1470     cluster.doAssignRegion(regionInfo, sn);
1471     return sn;
1472   }
1473 
1474   /**
1475    * Round robin a list of regions to a list of servers
1476    */
1477   private void roundRobinAssignment(Cluster cluster, List<HRegionInfo> regions,
1478       List<HRegionInfo> unassignedRegions, List<ServerName> servers,
1479       Map<ServerName, List<HRegionInfo>> assignments) {
1480 
1481     int numServers = servers.size();
1482     int numRegions = regions.size();
1483     int max = (int) Math.ceil((float) numRegions / numServers);
1484     int serverIdx = 0;
1485     if (numServers > 1) {
1486       serverIdx = RANDOM.nextInt(numServers);
1487     }
1488     int regionIdx = 0;
1489 
1490     for (int j = 0; j < numServers; j++) {
1491       ServerName server = servers.get((j + serverIdx) % numServers);
1492       List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
1493       for (int i = regionIdx; i < numRegions; i += numServers) {
1494         HRegionInfo region = regions.get(i % numRegions);
1495         if (cluster.wouldLowerAvailability(region, server)) {
1496           unassignedRegions.add(region);
1497         } else {
1498           serverRegions.add(region);
1499           cluster.doAssignRegion(region, server);
1500         }
1501       }
1502       assignments.put(server, serverRegions);
1503       regionIdx++;
1504     }
1505   }
1506 
1507   protected Map<ServerName, List<HRegionInfo>> getRegionAssignmentsByServer(
1508     Collection<HRegionInfo> regions) {
1509     if (this.services != null && this.services.getAssignmentManager() != null) {
1510       return this.services.getAssignmentManager().getSnapShotOfAssignment(regions);
1511     } else {
1512       return new HashMap<ServerName, List<HRegionInfo>>();
1513     }
1514   }
1515 
1516   @Override
1517   public void onConfigurationChange(Configuration conf) {
1518   }
1519 }