View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Collection;
23  import java.util.Collections;
24  import java.util.Comparator;
25  import java.util.Deque;
26  import java.util.HashMap;
27  import java.util.HashSet;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.Map.Entry;
32  import java.util.NavigableMap;
33  import java.util.Random;
34  import java.util.Set;
35  import java.util.TreeMap;
36  
37  import org.apache.commons.lang.NotImplementedException;
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.conf.Configuration;
41  import org.apache.hadoop.hbase.ClusterStatus;
42  import org.apache.hadoop.hbase.HBaseIOException;
43  import org.apache.hadoop.hbase.HRegionInfo;
44  import org.apache.hadoop.hbase.RegionLoad;
45  import org.apache.hadoop.hbase.ServerName;
46  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
47  import org.apache.hadoop.hbase.conf.ConfigurationObserver;
48  import org.apache.hadoop.hbase.master.LoadBalancer;
49  import org.apache.hadoop.hbase.master.MasterServices;
50  import org.apache.hadoop.hbase.master.RackManager;
51  import org.apache.hadoop.hbase.master.RegionPlan;
52  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
53  import org.apache.hadoop.util.StringUtils;
54  
55  import com.google.common.base.Joiner;
56  import com.google.common.collect.ArrayListMultimap;
57  import com.google.common.collect.Lists;
58  import com.google.common.collect.Sets;
59  
60  /**
61   * The base class for load balancers. It provides the the functions used to by
62   * {@link org.apache.hadoop.hbase.master.AssignmentManager} to assign regions
63   * in the edge cases. It doesn't provide an implementation of the actual
64   * balancing algorithm.
65   *
66   */
67  public abstract class BaseLoadBalancer implements LoadBalancer {
68    private static final int MIN_SERVER_BALANCE = 2;
69    private volatile boolean stopped = false;
70  
71    private static final List<HRegionInfo> EMPTY_REGION_LIST = new ArrayList<HRegionInfo>(0);
72  
73    protected final RegionLocationFinder regionFinder = new RegionLocationFinder();
74  
75    private static class DefaultRackManager extends RackManager {
76      @Override
77      public String getRack(ServerName server) {
78        return UNKNOWN_RACK;
79      }
80    }
81  
82    /**
83     * An efficient array based implementation similar to ClusterState for keeping
84     * the status of the cluster in terms of region assignment and distribution.
85     * LoadBalancers, such as StochasticLoadBalancer uses this Cluster object because of
86     * hundreds of thousands of hashmap manipulations are very costly, which is why this
87     * class uses mostly indexes and arrays.
88     *
89     * Cluster tracks a list of unassigned regions, region assignments, and the server
90     * topology in terms of server names, hostnames and racks.
91     */
92    protected static class Cluster {
93      ServerName[] servers;
94      String[] hosts; // ServerName uniquely identifies a region server. multiple RS can run on the same host
95      String[] racks;
96      boolean multiServersPerHost = false; // whether or not any host has more than one server
97  
98      ArrayList<String> tables;
99      HRegionInfo[] regions;
100     Deque<RegionLoad>[] regionLoads;
101 
102     int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality
103 
104     int[]   serverIndexToHostIndex;      //serverIndex -> host index
105     int[]   serverIndexToRackIndex;      //serverIndex -> rack index
106 
107     int[][] regionsPerServer;            //serverIndex -> region list
108     int[][] regionsPerHost;              //hostIndex -> list of regions
109     int[][] regionsPerRack;              //rackIndex -> region list
110     int[][] primariesOfRegionsPerServer; //serverIndex -> sorted list of regions by primary region index
111     int[][] primariesOfRegionsPerHost;   //hostIndex -> sorted list of regions by primary region index
112     int[][] primariesOfRegionsPerRack;   //rackIndex -> sorted list of regions by primary region index
113 
114     int[][] serversPerHost;              //hostIndex -> list of server indexes
115     int[][] serversPerRack;              //rackIndex -> list of server indexes
116     int[]   regionIndexToServerIndex;    //regionIndex -> serverIndex
117     int[]   initialRegionIndexToServerIndex;    //regionIndex -> serverIndex (initial cluster state)
118     int[]   regionIndexToTableIndex;     //regionIndex -> tableIndex
119     int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions
120     int[]   numMaxRegionsPerTable;       //tableIndex -> max number of regions in a single RS
121     int[]   regionIndexToPrimaryIndex;   //regionIndex -> regionIndex of the primary
122     boolean hasRegionReplicas = false;   //whether there is regions with replicas
123 
124     Integer[] serverIndicesSortedByRegionCount;
125 
126     Map<String, Integer> serversToIndex;
127     Map<String, Integer> hostsToIndex;
128     Map<String, Integer> racksToIndex;
129     Map<String, Integer> tablesToIndex;
130     Map<HRegionInfo, Integer> regionsToIndex;
131 
132     int numServers;
133     int numHosts;
134     int numRacks;
135     int numTables;
136     int numRegions;
137 
138     int numMovedRegions = 0; //num moved regions from the initial configuration
139     Map<ServerName, List<HRegionInfo>> clusterState;
140 
141     protected final RackManager rackManager;
142 
143     protected Cluster(
144         Map<ServerName, List<HRegionInfo>> clusterState,
145         Map<String, Deque<RegionLoad>> loads,
146         RegionLocationFinder regionFinder,
147         RackManager rackManager) {
148       this(null, clusterState, loads, regionFinder,
149         rackManager);
150     }
151 
152     @SuppressWarnings("unchecked")
153     protected Cluster(
154         Collection<HRegionInfo> unassignedRegions,
155         Map<ServerName, List<HRegionInfo>> clusterState,
156         Map<String, Deque<RegionLoad>> loads,
157         RegionLocationFinder regionFinder,
158         RackManager rackManager) {
159 
160       if (unassignedRegions == null) {
161         unassignedRegions = EMPTY_REGION_LIST;
162       }
163 
164       serversToIndex = new HashMap<String, Integer>();
165       hostsToIndex = new HashMap<String, Integer>();
166       racksToIndex = new HashMap<String, Integer>();
167       tablesToIndex = new HashMap<String, Integer>();
168 
169       //TODO: We should get the list of tables from master
170       tables = new ArrayList<String>();
171       this.rackManager = rackManager != null ? rackManager : new DefaultRackManager();
172 
173       numRegions = 0;
174 
175       List<List<Integer>> serversPerHostList = new ArrayList<List<Integer>>();
176       List<List<Integer>> serversPerRackList = new ArrayList<List<Integer>>();
177       this.clusterState = clusterState;
178 
179       // Use servername and port as there can be dead servers in this list. We want everything with
180       // a matching hostname and port to have the same index.
181       for (ServerName sn : clusterState.keySet()) {
182         if (serversToIndex.get(sn.getHostAndPort()) == null) {
183           serversToIndex.put(sn.getHostAndPort(), numServers++);
184         }
185         if (!hostsToIndex.containsKey(sn.getHostname())) {
186           hostsToIndex.put(sn.getHostname(), numHosts++);
187           serversPerHostList.add(new ArrayList<Integer>(1));
188         }
189 
190         int serverIndex = serversToIndex.get(sn.getHostAndPort());
191         int hostIndex = hostsToIndex.get(sn.getHostname());
192         serversPerHostList.get(hostIndex).add(serverIndex);
193 
194         String rack = this.rackManager.getRack(sn);
195         if (!racksToIndex.containsKey(rack)) {
196           racksToIndex.put(rack, numRacks++);
197           serversPerRackList.add(new ArrayList<Integer>());
198         }
199         int rackIndex = racksToIndex.get(rack);
200         serversPerRackList.get(rackIndex).add(serverIndex);
201       }
202 
203       // Count how many regions there are.
204       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
205         numRegions += entry.getValue().size();
206       }
207       numRegions += unassignedRegions.size();
208 
209       regionsToIndex = new HashMap<HRegionInfo, Integer>(numRegions);
210       servers = new ServerName[numServers];
211       serversPerHost = new int[numHosts][];
212       serversPerRack = new int[numRacks][];
213       regions = new HRegionInfo[numRegions];
214       regionIndexToServerIndex = new int[numRegions];
215       initialRegionIndexToServerIndex = new int[numRegions];
216       regionIndexToTableIndex = new int[numRegions];
217       regionIndexToPrimaryIndex = new int[numRegions];
218       regionLoads = new Deque[numRegions];
219       regionLocations = new int[numRegions][];
220       serverIndicesSortedByRegionCount = new Integer[numServers];
221 
222       serverIndexToHostIndex = new int[numServers];
223       serverIndexToRackIndex = new int[numServers];
224       regionsPerServer = new int[numServers][];
225       regionsPerHost = new int[numHosts][];
226       regionsPerRack = new int[numRacks][];
227       primariesOfRegionsPerServer = new int[numServers][];
228       primariesOfRegionsPerHost = new int[numHosts][];
229       primariesOfRegionsPerRack = new int[numRacks][];
230 
231       int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
232 
233       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
234         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
235 
236         // keep the servername if this is the first server name for this hostname
237         // or this servername has the newest startcode.
238         if (servers[serverIndex] == null ||
239             servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
240           servers[serverIndex] = entry.getKey();
241         }
242 
243         if (regionsPerServer[serverIndex] != null) {
244           // there is another server with the same hostAndPort in ClusterState.
245           // allocate the array for the total size
246           regionsPerServer[serverIndex] = new int[entry.getValue().size() + regionsPerServer[serverIndex].length];
247         } else {
248           regionsPerServer[serverIndex] = new int[entry.getValue().size()];
249         }
250         primariesOfRegionsPerServer[serverIndex] = new int[regionsPerServer[serverIndex].length];
251         serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
252       }
253 
254       hosts = new String[numHosts];
255       for (Entry<String, Integer> entry : hostsToIndex.entrySet()) {
256         hosts[entry.getValue()] = entry.getKey();
257       }
258       racks = new String[numRacks];
259       for (Entry<String, Integer> entry : racksToIndex.entrySet()) {
260         racks[entry.getValue()] = entry.getKey();
261       }
262 
263       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
264         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
265         regionPerServerIndex = 0;
266 
267         int hostIndex = hostsToIndex.get(entry.getKey().getHostname());
268         serverIndexToHostIndex[serverIndex] = hostIndex;
269 
270         int rackIndex = racksToIndex.get(this.rackManager.getRack(entry.getKey()));
271         serverIndexToRackIndex[serverIndex] = rackIndex;
272 
273         for (HRegionInfo region : entry.getValue()) {
274           registerRegion(region, regionIndex, serverIndex, loads, regionFinder);
275 
276           regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
277           regionIndex++;
278         }
279       }
280       for (HRegionInfo region : unassignedRegions) {
281         registerRegion(region, regionIndex, -1, loads, regionFinder);
282         regionIndex++;
283       }
284 
285       for (int i = 0; i < serversPerHostList.size(); i++) {
286         serversPerHost[i] = new int[serversPerHostList.get(i).size()];
287         for (int j = 0; j < serversPerHost[i].length; j++) {
288           serversPerHost[i][j] = serversPerHostList.get(i).get(j);
289         }
290         if (serversPerHost[i].length > 1) {
291           multiServersPerHost = true;
292         }
293       }
294 
295       for (int i = 0; i < serversPerRackList.size(); i++) {
296         serversPerRack[i] = new int[serversPerRackList.get(i).size()];
297         for (int j = 0; j < serversPerRack[i].length; j++) {
298           serversPerRack[i][j] = serversPerRackList.get(i).get(j);
299         }
300       }
301 
302       numTables = tables.size();
303       numRegionsPerServerPerTable = new int[numServers][numTables];
304 
305       for (int i = 0; i < numServers; i++) {
306         for (int j = 0; j < numTables; j++) {
307           numRegionsPerServerPerTable[i][j] = 0;
308         }
309       }
310 
311       for (int i=0; i < regionIndexToServerIndex.length; i++) {
312         if (regionIndexToServerIndex[i] >= 0) {
313           numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
314         }
315       }
316 
317       numMaxRegionsPerTable = new int[numTables];
318       for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
319         for (tableIndex = 0 ; tableIndex < numRegionsPerServerPerTable[serverIndex].length; tableIndex++) {
320           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
321             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
322           }
323         }
324       }
325 
326       for (int i = 0; i < regions.length; i ++) {
327         HRegionInfo info = regions[i];
328         if (RegionReplicaUtil.isDefaultReplica(info)) {
329           regionIndexToPrimaryIndex[i] = i;
330         } else {
331           hasRegionReplicas = true;
332           HRegionInfo primaryInfo = RegionReplicaUtil.getRegionInfoForDefaultReplica(info);
333           regionIndexToPrimaryIndex[i] =
334               regionsToIndex.containsKey(primaryInfo) ?
335               regionsToIndex.get(primaryInfo):
336               -1;
337         }
338       }
339 
340       for (int i = 0; i < regionsPerServer.length; i++) {
341         primariesOfRegionsPerServer[i] = new int[regionsPerServer[i].length];
342         for (int j = 0; j < regionsPerServer[i].length; j++) {
343           int primaryIndex = regionIndexToPrimaryIndex[regionsPerServer[i][j]];
344           primariesOfRegionsPerServer[i][j] = primaryIndex;
345         }
346         // sort the regions by primaries.
347         Arrays.sort(primariesOfRegionsPerServer[i]);
348       }
349 
350       // compute regionsPerHost
351       if (multiServersPerHost) {
352         for (int i = 0 ; i < serversPerHost.length; i++) {
353           int numRegionsPerHost = 0;
354           for (int j = 0; j < serversPerHost[i].length; j++) {
355             numRegionsPerHost += regionsPerServer[serversPerHost[i][j]].length;
356           }
357           regionsPerHost[i] = new int[numRegionsPerHost];
358           primariesOfRegionsPerHost[i] = new int[numRegionsPerHost];
359         }
360         for (int i = 0 ; i < serversPerHost.length; i++) {
361           int numRegionPerHostIndex = 0;
362           for (int j = 0; j < serversPerHost[i].length; j++) {
363             for (int k = 0; k < regionsPerServer[serversPerHost[i][j]].length; k++) {
364               int region = regionsPerServer[serversPerHost[i][j]][k];
365               regionsPerHost[i][numRegionPerHostIndex] = region;
366               int primaryIndex = regionIndexToPrimaryIndex[region];
367               primariesOfRegionsPerHost[i][numRegionPerHostIndex] = primaryIndex;
368               numRegionPerHostIndex++;
369             }
370           }
371           // sort the regions by primaries.
372           Arrays.sort(primariesOfRegionsPerHost[i]);
373         }
374       }
375 
376       // compute regionsPerRack
377       if (numRacks > 1) {
378         for (int i = 0 ; i < serversPerRack.length; i++) {
379           int numRegionsPerRack = 0;
380           for (int j = 0; j < serversPerRack[i].length; j++) {
381             numRegionsPerRack += regionsPerServer[serversPerRack[i][j]].length;
382           }
383           regionsPerRack[i] = new int[numRegionsPerRack];
384           primariesOfRegionsPerRack[i] = new int[numRegionsPerRack];
385         }
386 
387         for (int i = 0 ; i < serversPerRack.length; i++) {
388           int numRegionPerRackIndex = 0;
389           for (int j = 0; j < serversPerRack[i].length; j++) {
390             for (int k = 0; k < regionsPerServer[serversPerRack[i][j]].length; k++) {
391               int region = regionsPerServer[serversPerRack[i][j]][k];
392               regionsPerRack[i][numRegionPerRackIndex] = region;
393               int primaryIndex = regionIndexToPrimaryIndex[region];
394               primariesOfRegionsPerRack[i][numRegionPerRackIndex] = primaryIndex;
395               numRegionPerRackIndex++;
396             }
397           }
398           // sort the regions by primaries.
399           Arrays.sort(primariesOfRegionsPerRack[i]);
400         }
401       }
402     }
403 
404     /** Helper for Cluster constructor to handle a region */
405     private void registerRegion(HRegionInfo region, int regionIndex, int serverIndex,
406         Map<String, Deque<RegionLoad>> loads, RegionLocationFinder regionFinder) {
407       String tableName = region.getTable().getNameAsString();
408       if (!tablesToIndex.containsKey(tableName)) {
409         tables.add(tableName);
410         tablesToIndex.put(tableName, tablesToIndex.size());
411       }
412       int tableIndex = tablesToIndex.get(tableName);
413 
414       regionsToIndex.put(region, regionIndex);
415       regions[regionIndex] = region;
416       regionIndexToServerIndex[regionIndex] = serverIndex;
417       initialRegionIndexToServerIndex[regionIndex] = serverIndex;
418       regionIndexToTableIndex[regionIndex] = tableIndex;
419 
420       // region load
421       if (loads != null) {
422         Deque<RegionLoad> rl = loads.get(region.getRegionNameAsString());
423         // That could have failed if the RegionLoad is using the other regionName
424         if (rl == null) {
425           // Try getting the region load using encoded name.
426           rl = loads.get(region.getEncodedName());
427         }
428         regionLoads[regionIndex] = rl;
429       }
430 
431       if (regionFinder != null) {
432         //region location
433         List<ServerName> loc = regionFinder.getTopBlockLocations(region);
434         regionLocations[regionIndex] = new int[loc.size()];
435         for (int i=0; i < loc.size(); i++) {
436           regionLocations[regionIndex][i] =
437               loc.get(i) == null ? -1 :
438                 (serversToIndex.get(loc.get(i).getHostAndPort()) == null ? -1
439                     : serversToIndex.get(loc.get(i).getHostAndPort()));
440         }
441       }
442     }
443 
444     /** An action to move or swap a region */
445     public static class Action {
446       public static enum Type {
447         ASSIGN_REGION,
448         MOVE_REGION,
449         SWAP_REGIONS,
450         NULL,
451       }
452 
453       public Type type;
454       public Action (Type type) {this.type = type;}
455       /** Returns an Action which would undo this action */
456       public Action undoAction() { return this; }
457       @Override
458       public String toString() { return type + ":";}
459     }
460 
461     public static class AssignRegionAction extends Action {
462       public int region;
463       public int server;
464       public AssignRegionAction(int region, int server) {
465         super(Type.ASSIGN_REGION);
466         this.region = region;
467         this.server = server;
468       }
469       @Override
470       public Action undoAction() {
471         // TODO implement this. This action is not being used by the StochasticLB for now
472         // in case it uses it, we should implement this function.
473         throw new NotImplementedException();
474       }
475       @Override
476       public String toString() {
477         return type + ": " + region + ":" + server;
478       }
479     }
480 
481     public static class MoveRegionAction extends Action {
482       public int region;
483       public int fromServer;
484       public int toServer;
485 
486       public MoveRegionAction(int region, int fromServer, int toServer) {
487         super(Type.MOVE_REGION);
488         this.fromServer = fromServer;
489         this.region = region;
490         this.toServer = toServer;
491       }
492       @Override
493       public Action undoAction() {
494         return new MoveRegionAction (region, toServer, fromServer);
495       }
496       @Override
497       public String toString() {
498         return type + ": " + region + ":" + fromServer + " -> " + toServer;
499       }
500     }
501 
502     public static class SwapRegionsAction extends Action {
503       public int fromServer;
504       public int fromRegion;
505       public int toServer;
506       public int toRegion;
507       public SwapRegionsAction(int fromServer, int fromRegion, int toServer, int toRegion) {
508         super(Type.SWAP_REGIONS);
509         this.fromServer = fromServer;
510         this.fromRegion = fromRegion;
511         this.toServer = toServer;
512         this.toRegion = toRegion;
513       }
514       @Override
515       public Action undoAction() {
516         return new SwapRegionsAction (fromServer, toRegion, toServer, fromRegion);
517       }
518       @Override
519       public String toString() {
520         return type + ": " + fromRegion + ":" + fromServer + " <-> " + toRegion + ":" + toServer;
521       }
522     }
523 
524     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NM_FIELD_NAMING_CONVENTION",
525         justification="Mistake. Too disruptive to change now")
526     public static final Action NullAction = new Action(Type.NULL);
527 
528     public void doAction(Action action) {
529       switch (action.type) {
530       case NULL: break;
531       case ASSIGN_REGION:
532         AssignRegionAction ar = (AssignRegionAction) action;
533         regionsPerServer[ar.server] = addRegion(regionsPerServer[ar.server], ar.region);
534         regionMoved(ar.region, -1, ar.server);
535         break;
536       case MOVE_REGION:
537         MoveRegionAction mra = (MoveRegionAction) action;
538         regionsPerServer[mra.fromServer] = removeRegion(regionsPerServer[mra.fromServer], mra.region);
539         regionsPerServer[mra.toServer] = addRegion(regionsPerServer[mra.toServer], mra.region);
540         regionMoved(mra.region, mra.fromServer, mra.toServer);
541         break;
542       case SWAP_REGIONS:
543         SwapRegionsAction a = (SwapRegionsAction) action;
544         regionsPerServer[a.fromServer] = replaceRegion(regionsPerServer[a.fromServer], a.fromRegion, a.toRegion);
545         regionsPerServer[a.toServer] = replaceRegion(regionsPerServer[a.toServer], a.toRegion, a.fromRegion);
546         regionMoved(a.fromRegion, a.fromServer, a.toServer);
547         regionMoved(a.toRegion, a.toServer, a.fromServer);
548         break;
549       default:
550         throw new RuntimeException("Uknown action:" + action.type);
551       }
552     }
553 
554     /**
555      * Return true if the placement of region on server would lower the availability
556      * of the region in question
557      * @param server
558      * @param region
559      * @return true or false
560      */
561     boolean wouldLowerAvailability(HRegionInfo regionInfo, ServerName serverName) {
562       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
563         return false; // safeguard against race between cluster.servers and servers from LB method args
564       }
565       int server = serversToIndex.get(serverName.getHostAndPort());
566       int region = regionsToIndex.get(regionInfo);
567 
568       int primary = regionIndexToPrimaryIndex[region];
569 
570       // there is a subset relation for server < host < rack
571       // check server first
572 
573       if (contains(primariesOfRegionsPerServer[server], primary)) {
574         // check for whether there are other servers that we can place this region
575         for (int i = 0; i < primariesOfRegionsPerServer.length; i++) {
576           if (i != server && !contains(primariesOfRegionsPerServer[i], primary)) {
577             return true; // meaning there is a better server
578           }
579         }
580         return false; // there is not a better server to place this
581       }
582 
583       // check host
584       if (multiServersPerHost) { // these arrays would only be allocated if we have more than one server per host
585         int host = serverIndexToHostIndex[server];
586         if (contains(primariesOfRegionsPerHost[host], primary)) {
587           // check for whether there are other hosts that we can place this region
588           for (int i = 0; i < primariesOfRegionsPerHost.length; i++) {
589             if (i != host && !contains(primariesOfRegionsPerHost[i], primary)) {
590               return true; // meaning there is a better host
591             }
592           }
593           return false; // there is not a better host to place this
594         }
595       }
596 
597       // check rack
598       if (numRacks > 1) {
599         int rack = serverIndexToRackIndex[server];
600         if (contains(primariesOfRegionsPerRack[rack], primary)) {
601           // check for whether there are other racks that we can place this region
602           for (int i = 0; i < primariesOfRegionsPerRack.length; i++) {
603             if (i != rack && !contains(primariesOfRegionsPerRack[i], primary)) {
604               return true; // meaning there is a better rack
605             }
606           }
607           return false; // there is not a better rack to place this
608         }
609       }
610       return false;
611     }
612 
613     void doAssignRegion(HRegionInfo regionInfo, ServerName serverName) {
614       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
615         return;
616       }
617       int server = serversToIndex.get(serverName.getHostAndPort());
618       int region = regionsToIndex.get(regionInfo);
619       doAction(new AssignRegionAction(region, server));
620     }
621 
622     void regionMoved(int region, int oldServer, int newServer) {
623       regionIndexToServerIndex[region] = newServer;
624       if (initialRegionIndexToServerIndex[region] == newServer) {
625         numMovedRegions--; //region moved back to original location
626       } else if (oldServer >= 0 && initialRegionIndexToServerIndex[region] == oldServer) {
627         numMovedRegions++; //region moved from original location
628       }
629       int tableIndex = regionIndexToTableIndex[region];
630       if (oldServer >= 0) {
631         numRegionsPerServerPerTable[oldServer][tableIndex]--;
632       }
633       numRegionsPerServerPerTable[newServer][tableIndex]++;
634 
635       //check whether this caused maxRegionsPerTable in the new Server to be updated
636       if (numRegionsPerServerPerTable[newServer][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
637         numRegionsPerServerPerTable[newServer][tableIndex] = numMaxRegionsPerTable[tableIndex];
638       } else if (oldServer >= 0 && (numRegionsPerServerPerTable[oldServer][tableIndex] + 1)
639           == numMaxRegionsPerTable[tableIndex]) {
640         //recompute maxRegionsPerTable since the previous value was coming from the old server
641         for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
642           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
643             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
644           }
645         }
646       }
647 
648       // update for servers
649       int primary = regionIndexToPrimaryIndex[region];
650       if (oldServer >= 0) {
651         primariesOfRegionsPerServer[oldServer] = removeRegion(
652           primariesOfRegionsPerServer[oldServer], primary);
653       }
654       primariesOfRegionsPerServer[newServer] = addRegionSorted(
655         primariesOfRegionsPerServer[newServer], primary);
656 
657       // update for hosts
658       if (multiServersPerHost) {
659         int oldHost = oldServer >= 0 ? serverIndexToHostIndex[oldServer] : -1;
660         int newHost = serverIndexToHostIndex[newServer];
661         if (newHost != oldHost) {
662           regionsPerHost[newHost] = addRegion(regionsPerHost[newHost], region);
663           primariesOfRegionsPerHost[newHost] = addRegionSorted(primariesOfRegionsPerHost[newHost], primary);
664           if (oldHost >= 0) {
665             regionsPerHost[oldHost] = removeRegion(regionsPerHost[oldHost], region);
666             primariesOfRegionsPerHost[oldHost] = removeRegion(
667               primariesOfRegionsPerHost[oldHost], primary); // will still be sorted
668           }
669         }
670       }
671 
672       // update for racks
673       if (numRacks > 1) {
674         int oldRack = oldServer >= 0 ? serverIndexToRackIndex[oldServer] : -1;
675         int newRack = serverIndexToRackIndex[newServer];
676         if (newRack != oldRack) {
677           regionsPerRack[newRack] = addRegion(regionsPerRack[newRack], region);
678           primariesOfRegionsPerRack[newRack] = addRegionSorted(primariesOfRegionsPerRack[newRack], primary);
679           if (oldRack >= 0) {
680             regionsPerRack[oldRack] = removeRegion(regionsPerRack[oldRack], region);
681             primariesOfRegionsPerRack[oldRack] = removeRegion(
682               primariesOfRegionsPerRack[oldRack], primary); // will still be sorted
683           }
684         }
685       }
686     }
687 
688     int[] removeRegion(int[] regions, int regionIndex) {
689       //TODO: this maybe costly. Consider using linked lists
690       int[] newRegions = new int[regions.length - 1];
691       int i = 0;
692       for (i = 0; i < regions.length; i++) {
693         if (regions[i] == regionIndex) {
694           break;
695         }
696         newRegions[i] = regions[i];
697       }
698       System.arraycopy(regions, i+1, newRegions, i, newRegions.length - i);
699       return newRegions;
700     }
701 
702     int[] addRegion(int[] regions, int regionIndex) {
703       int[] newRegions = new int[regions.length + 1];
704       System.arraycopy(regions, 0, newRegions, 0, regions.length);
705       newRegions[newRegions.length - 1] = regionIndex;
706       return newRegions;
707     }
708 
709     int[] addRegionSorted(int[] regions, int regionIndex) {
710       int[] newRegions = new int[regions.length + 1];
711       int i = 0;
712       for (i = 0; i < regions.length; i++) { // find the index to insert
713         if (regions[i] > regionIndex) {
714           break;
715         }
716       }
717       System.arraycopy(regions, 0, newRegions, 0, i); // copy first half
718       System.arraycopy(regions, i, newRegions, i+1, regions.length - i); // copy second half
719       newRegions[i] = regionIndex;
720 
721       return newRegions;
722     }
723 
724     int[] replaceRegion(int[] regions, int regionIndex, int newRegionIndex) {
725       int i = 0;
726       for (i = 0; i < regions.length; i++) {
727         if (regions[i] == regionIndex) {
728           regions[i] = newRegionIndex;
729           break;
730         }
731       }
732       return regions;
733     }
734 
735     void sortServersByRegionCount() {
736       Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
737     }
738 
739     int getNumRegions(int server) {
740       return regionsPerServer[server].length;
741     }
742 
743     boolean contains(int[] arr, int val) {
744       return Arrays.binarySearch(arr, val) >= 0;
745     }
746 
747     private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
748       @Override
749       public int compare(Integer integer, Integer integer2) {
750         return Integer.valueOf(getNumRegions(integer)).compareTo(getNumRegions(integer2));
751       }
752     };
753 
754     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SBSC_USE_STRINGBUFFER_CONCATENATION",
755         justification="Not important but should be fixed")
756     @Override
757     public String toString() {
758       String desc = "Cluster{" +
759           "servers=[";
760           for(ServerName sn:servers) {
761              desc += sn.getHostAndPort() + ", ";
762           }
763           desc +=
764           ", serverIndicesSortedByRegionCount="+
765           Arrays.toString(serverIndicesSortedByRegionCount) +
766           ", regionsPerServer=[";
767 
768           for (int[]r:regionsPerServer) {
769             desc += Arrays.toString(r);
770           }
771           desc += "]" +
772           ", numMaxRegionsPerTable=" +
773           Arrays.toString(numMaxRegionsPerTable) +
774           ", numRegions=" +
775           numRegions +
776           ", numServers=" +
777           numServers +
778           ", numTables=" +
779           numTables +
780           ", numMovedRegions=" +
781           numMovedRegions +
782           '}';
783       return desc;
784     }
785   }
786 
787   // slop for regions
788   protected float slop;
789   protected Configuration config;
790   protected RackManager rackManager;
791   private static final Random RANDOM = new Random(System.currentTimeMillis());
792   private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
793 
794   public static final String TABLES_ON_MASTER =
795     "hbase.balancer.tablesOnMaster";
796 
797   protected final Set<String> tablesOnMaster = new HashSet<String>();
798   protected final MetricsBalancer metricsBalancer = new MetricsBalancer();
799   protected ClusterStatus clusterStatus = null;
800   protected ServerName masterServerName;
801   protected MasterServices services;
802 
803   protected static String[] getTablesOnMaster(Configuration conf) {
804     String valueString = conf.get(TABLES_ON_MASTER);
805     if (valueString != null) {
806       valueString = valueString.trim();
807     }
808     if (valueString == null || valueString.equalsIgnoreCase("none")) {
809       return null;
810     }
811     return StringUtils.getStrings(valueString);
812   }
813 
814   /**
815    * Check if configured to put any tables on the active master
816    */
817   public static boolean tablesOnMaster(Configuration conf) {
818     String[] tables = getTablesOnMaster(conf);
819     return tables != null && tables.length > 0;
820   }
821 
822   @Override
823   public void setConf(Configuration conf) {
824     setSlop(conf);
825     if (slop < 0) slop = 0;
826     else if (slop > 1) slop = 1;
827 
828     this.config = conf;
829     String[] tables = getTablesOnMaster(conf);
830     if (tables != null && tables.length > 0) {
831       Collections.addAll(tablesOnMaster, tables);
832     }
833     this.rackManager = new RackManager(getConf());
834     regionFinder.setConf(conf);
835   }
836 
837   protected void setSlop(Configuration conf) {
838     this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
839   }
840 
841   /**
842    * Check if a region belongs to some small system table.
843    * If so, the primary replica may be expected to be put on the master regionserver.
844    */
845   public boolean shouldBeOnMaster(HRegionInfo region) {
846     return tablesOnMaster.contains(region.getTable().getNameAsString())
847         && region.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID;
848   }
849 
850   /**
851    * Balance the regions that should be on master regionserver.
852    */
853   protected List<RegionPlan> balanceMasterRegions(
854       Map<ServerName, List<HRegionInfo>> clusterMap) {
855     if (masterServerName == null
856         || clusterMap == null || clusterMap.size() <= 1) return null;
857     List<RegionPlan> plans = null;
858     List<HRegionInfo> regions = clusterMap.get(masterServerName);
859     if (regions != null) {
860       Iterator<ServerName> keyIt = null;
861       for (HRegionInfo region: regions) {
862         if (shouldBeOnMaster(region)) continue;
863 
864         // Find a non-master regionserver to host the region
865         if (keyIt == null || !keyIt.hasNext()) {
866           keyIt = clusterMap.keySet().iterator();
867         }
868         ServerName dest = keyIt.next();
869         if (masterServerName.equals(dest)) {
870           if (!keyIt.hasNext()) {
871             keyIt = clusterMap.keySet().iterator();
872           }
873           dest = keyIt.next();
874         }
875 
876         // Move this region away from the master regionserver
877         RegionPlan plan = new RegionPlan(region, masterServerName, dest);
878         if (plans == null) {
879           plans = new ArrayList<RegionPlan>();
880         }
881         plans.add(plan);
882       }
883     }
884     for (Map.Entry<ServerName, List<HRegionInfo>> server: clusterMap.entrySet()) {
885       if (masterServerName.equals(server.getKey())) continue;
886       for (HRegionInfo region: server.getValue()) {
887         if (!shouldBeOnMaster(region)) continue;
888 
889         // Move this region to the master regionserver
890         RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName);
891         if (plans == null) {
892           plans = new ArrayList<RegionPlan>();
893         }
894         plans.add(plan);
895       }
896     }
897     return plans;
898   }
899 
900   /**
901    * Assign the regions that should be on master regionserver.
902    */
903   protected Map<ServerName, List<HRegionInfo>> assignMasterRegions(
904       Collection<HRegionInfo> regions, List<ServerName> servers) {
905     if (servers == null || regions == null || regions.isEmpty()) {
906       return null;
907     }
908     Map<ServerName, List<HRegionInfo>> assignments
909       = new TreeMap<ServerName, List<HRegionInfo>>();
910     if (masterServerName != null && servers.contains(masterServerName)) {
911       assignments.put(masterServerName, new ArrayList<HRegionInfo>());
912       for (HRegionInfo region: regions) {
913         if (shouldBeOnMaster(region)) {
914           assignments.get(masterServerName).add(region);
915         }
916       }
917     }
918     return assignments;
919   }
920 
921   @Override
922   public Configuration getConf() {
923     return this.config;
924   }
925 
926   @Override
927   public void setClusterStatus(ClusterStatus st) {
928     this.clusterStatus = st;
929     regionFinder.setClusterStatus(st);
930   }
931 
932   @Override
933   public void setMasterServices(MasterServices masterServices) {
934     masterServerName = masterServices.getServerName();
935     this.services = masterServices;
936     this.regionFinder.setServices(masterServices);
937   }
938 
939   public void setRackManager(RackManager rackManager) {
940     this.rackManager = rackManager;
941   }
942 
943   protected boolean needsBalance(Cluster c) {
944     ClusterLoadState cs = new ClusterLoadState(c.clusterState);
945     if (cs.getNumServers() < MIN_SERVER_BALANCE) {
946       if (LOG.isDebugEnabled()) {
947         LOG.debug("Not running balancer because only " + cs.getNumServers()
948             + " active regionserver(s)");
949       }
950       return false;
951     }
952     if(areSomeRegionReplicasColocated(c)) return true;
953     // Check if we even need to do any load balancing
954     // HBASE-3681 check sloppiness first
955     float average = cs.getLoadAverage(); // for logging
956     int floor = (int) Math.floor(average * (1 - slop));
957     int ceiling = (int) Math.ceil(average * (1 + slop));
958     if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
959       NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
960       if (LOG.isTraceEnabled()) {
961         // If nothing to balance, then don't say anything unless trace-level logging.
962         LOG.trace("Skipping load balancing because balanced cluster; " +
963           "servers=" + cs.getNumServers() +
964           " regions=" + cs.getNumRegions() + " average=" + average +
965           " mostloaded=" + serversByLoad.lastKey().getLoad() +
966           " leastloaded=" + serversByLoad.firstKey().getLoad());
967       }
968       return false;
969     }
970     return true;
971   }
972 
973   /**
974    * Subclasses should implement this to return true if the cluster has nodes that hosts
975    * multiple replicas for the same region, or, if there are multiple racks and the same
976    * rack hosts replicas of the same region
977    * @param c Cluster information
978    * @return whether region replicas are currently co-located
979    */
980   protected boolean areSomeRegionReplicasColocated(Cluster c) {
981     return false;
982   }
983 
984   /**
985    * Generates a bulk assignment plan to be used on cluster startup using a
986    * simple round-robin assignment.
987    * <p>
988    * Takes a list of all the regions and all the servers in the cluster and
989    * returns a map of each server to the regions that it should be assigned.
990    * <p>
991    * Currently implemented as a round-robin assignment. Same invariant as load
992    * balancing, all servers holding floor(avg) or ceiling(avg).
993    *
994    * TODO: Use block locations from HDFS to place regions with their blocks
995    *
996    * @param regions all regions
997    * @param servers all servers
998    * @return map of server to the regions it should take, or null if no
999    *         assignment is possible (ie. no regions or no servers)
1000    */
1001   @Override
1002   public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
1003       List<ServerName> servers) {
1004     metricsBalancer.incrMiscInvocations();
1005     Map<ServerName, List<HRegionInfo>> assignments = assignMasterRegions(regions, servers);
1006     if (assignments != null && !assignments.isEmpty()) {
1007       servers = new ArrayList<ServerName>(servers);
1008       // Guarantee not to put other regions on master
1009       servers.remove(masterServerName);
1010       List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1011       if (!masterRegions.isEmpty()) {
1012         regions = new ArrayList<HRegionInfo>(regions);
1013         for (HRegionInfo region: masterRegions) {
1014           regions.remove(region);
1015         }
1016       }
1017     }
1018     if (regions == null || regions.isEmpty()) {
1019       return assignments;
1020     }
1021 
1022     int numServers = servers == null ? 0 : servers.size();
1023     if (numServers == 0) {
1024       LOG.warn("Wanted to do round robin assignment but no servers to assign to");
1025       return null;
1026     }
1027 
1028     // TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the
1029     // normal LB.balancerCluster() with unassignedRegions. We only need to have a candidate
1030     // generator for AssignRegionAction. The LB will ensure the regions are mostly local
1031     // and balanced. This should also run fast with fewer number of iterations.
1032 
1033     if (numServers == 1) { // Only one server, nothing fancy we can do here
1034       ServerName server = servers.get(0);
1035       assignments.put(server, new ArrayList<HRegionInfo>(regions));
1036       return assignments;
1037     }
1038 
1039     Cluster cluster = createCluster(servers, regions);
1040     List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>();
1041 
1042     roundRobinAssignment(cluster, regions, unassignedRegions,
1043       servers, assignments);
1044 
1045     List<HRegionInfo> lastFewRegions = new ArrayList<HRegionInfo>();
1046     // assign the remaining by going through the list and try to assign to servers one-by-one
1047     int serverIdx = RANDOM.nextInt(numServers);
1048     for (HRegionInfo region : unassignedRegions) {
1049       boolean assigned = false;
1050       for (int j = 0; j < numServers; j++) { // try all servers one by one
1051         ServerName serverName = servers.get((j + serverIdx) % numServers);
1052         if (!cluster.wouldLowerAvailability(region, serverName)) {
1053           List<HRegionInfo> serverRegions = assignments.get(serverName);
1054           if (serverRegions == null) {
1055             serverRegions = new ArrayList<HRegionInfo>();
1056             assignments.put(serverName, serverRegions);
1057           }
1058           serverRegions.add(region);
1059           cluster.doAssignRegion(region, serverName);
1060           serverIdx = (j + serverIdx + 1) % numServers; //remain from next server
1061           assigned = true;
1062           break;
1063         }
1064       }
1065       if (!assigned) {
1066         lastFewRegions.add(region);
1067       }
1068     }
1069     // just sprinkle the rest of the regions on random regionservers. The balanceCluster will
1070     // make it optimal later. we can end up with this if numReplicas > numServers.
1071     for (HRegionInfo region : lastFewRegions) {
1072       int i = RANDOM.nextInt(numServers);
1073       ServerName server = servers.get(i);
1074       List<HRegionInfo> serverRegions = assignments.get(server);
1075       if (serverRegions == null) {
1076         serverRegions = new ArrayList<HRegionInfo>();
1077         assignments.put(server, serverRegions);
1078       }
1079       serverRegions.add(region);
1080       cluster.doAssignRegion(region, server);
1081     }
1082     return assignments;
1083   }
1084 
1085   protected Cluster createCluster(List<ServerName> servers,
1086       Collection<HRegionInfo> regions) {
1087     // Get the snapshot of the current assignments for the regions in question, and then create
1088     // a cluster out of it. Note that we might have replicas already assigned to some servers
1089     // earlier. So we want to get the snapshot to see those assignments, but this will only contain
1090     // replicas of the regions that are passed (for performance).
1091     Map<ServerName, List<HRegionInfo>> clusterState = getRegionAssignmentsByServer(regions);
1092 
1093     for (ServerName server : servers) {
1094       if (!clusterState.containsKey(server)) {
1095         clusterState.put(server, EMPTY_REGION_LIST);
1096       }
1097     }
1098     return new Cluster(regions, clusterState, null, this.regionFinder,
1099       rackManager);
1100   }
1101 
1102   /**
1103    * Generates an immediate assignment plan to be used by a new master for
1104    * regions in transition that do not have an already known destination.
1105    *
1106    * Takes a list of regions that need immediate assignment and a list of all
1107    * available servers. Returns a map of regions to the server they should be
1108    * assigned to.
1109    *
1110    * This method will return quickly and does not do any intelligent balancing.
1111    * The goal is to make a fast decision not the best decision possible.
1112    *
1113    * Currently this is random.
1114    *
1115    * @param regions
1116    * @param servers
1117    * @return map of regions to the server it should be assigned to
1118    */
1119   @Override
1120   public Map<HRegionInfo, ServerName> immediateAssignment(List<HRegionInfo> regions,
1121       List<ServerName> servers) {
1122     metricsBalancer.incrMiscInvocations();
1123     if (servers == null || servers.isEmpty()) {
1124       LOG.warn("Wanted to do random assignment but no servers to assign to");
1125       return null;
1126     }
1127 
1128     Map<HRegionInfo, ServerName> assignments = new TreeMap<HRegionInfo, ServerName>();
1129     for (HRegionInfo region : regions) {
1130       assignments.put(region, randomAssignment(region, servers));
1131     }
1132     return assignments;
1133   }
1134 
1135   /**
1136    * Used to assign a single region to a random server.
1137    */
1138   @Override
1139   public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
1140     metricsBalancer.incrMiscInvocations();
1141     if (servers != null && servers.contains(masterServerName)) {
1142       if (shouldBeOnMaster(regionInfo)) {
1143         return masterServerName;
1144       }
1145       servers = new ArrayList<ServerName>(servers);
1146       // Guarantee not to put other regions on master
1147       servers.remove(masterServerName);
1148     }
1149 
1150     int numServers = servers == null ? 0 : servers.size();
1151     if (numServers == 0) {
1152       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1153       return null;
1154     }
1155     if (numServers == 1) { // Only one server, nothing fancy we can do here
1156       return servers.get(0);
1157     }
1158 
1159     List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
1160     Cluster cluster = createCluster(servers, regions);
1161     return randomAssignment(cluster, regionInfo, servers);
1162   }
1163 
1164   /**
1165    * Generates a bulk assignment startup plan, attempting to reuse the existing
1166    * assignment information from META, but adjusting for the specified list of
1167    * available/online servers available for assignment.
1168    * <p>
1169    * Takes a map of all regions to their existing assignment from META. Also
1170    * takes a list of online servers for regions to be assigned to. Attempts to
1171    * retain all assignment, so in some instances initial assignment will not be
1172    * completely balanced.
1173    * <p>
1174    * Any leftover regions without an existing server to be assigned to will be
1175    * assigned randomly to available servers.
1176    *
1177    * @param regions regions and existing assignment from meta
1178    * @param servers available servers
1179    * @return map of servers and regions to be assigned to them
1180    */
1181   @Override
1182   public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
1183       List<ServerName> servers) {
1184     // Update metrics
1185     metricsBalancer.incrMiscInvocations();
1186     Map<ServerName, List<HRegionInfo>> assignments
1187       = assignMasterRegions(regions.keySet(), servers);
1188     if (assignments != null && !assignments.isEmpty()) {
1189       servers = new ArrayList<ServerName>(servers);
1190       // Guarantee not to put other regions on master
1191       servers.remove(masterServerName);
1192       List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1193       if (!masterRegions.isEmpty()) {
1194         regions = new HashMap<HRegionInfo, ServerName>(regions);
1195         for (HRegionInfo region: masterRegions) {
1196           regions.remove(region);
1197         }
1198       }
1199     }
1200     if (regions == null || regions.isEmpty()) {
1201       return assignments;
1202     }
1203 
1204     int numServers = servers == null ? 0 : servers.size();
1205     if (numServers == 0) {
1206       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1207       return null;
1208     }
1209     if (numServers == 1) { // Only one server, nothing fancy we can do here
1210       ServerName server = servers.get(0);
1211       assignments.put(server, new ArrayList<HRegionInfo>(regions.keySet()));
1212       return assignments;
1213     }
1214 
1215     // Group all of the old assignments by their hostname.
1216     // We can't group directly by ServerName since the servers all have
1217     // new start-codes.
1218 
1219     // Group the servers by their hostname. It's possible we have multiple
1220     // servers on the same host on different ports.
1221     ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
1222     for (ServerName server : servers) {
1223       assignments.put(server, new ArrayList<HRegionInfo>());
1224       serversByHostname.put(server.getHostname(), server);
1225     }
1226 
1227     // Collection of the hostnames that used to have regions
1228     // assigned, but for which we no longer have any RS running
1229     // after the cluster restart.
1230     Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
1231 
1232     int numRandomAssignments = 0;
1233     int numRetainedAssigments = 0;
1234 
1235     Cluster cluster = createCluster(servers, regions.keySet());
1236 
1237     for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
1238       HRegionInfo region = entry.getKey();
1239       ServerName oldServerName = entry.getValue();
1240       List<ServerName> localServers = new ArrayList<ServerName>();
1241       if (oldServerName != null) {
1242         localServers = serversByHostname.get(oldServerName.getHostname());
1243       }
1244       if (localServers.isEmpty()) {
1245         // No servers on the new cluster match up with this hostname,
1246         // assign randomly.
1247         ServerName randomServer = randomAssignment(cluster, region, servers);
1248         assignments.get(randomServer).add(region);
1249         numRandomAssignments++;
1250         if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
1251       } else if (localServers.size() == 1) {
1252         // the usual case - one new server on same host
1253         ServerName target = localServers.get(0);
1254         assignments.get(target).add(region);
1255         cluster.doAssignRegion(region, target);
1256         numRetainedAssigments++;
1257       } else {
1258         // multiple new servers in the cluster on this same host
1259         if (localServers.contains(oldServerName)) {
1260           assignments.get(oldServerName).add(region);
1261           cluster.doAssignRegion(region, oldServerName);
1262         } else {
1263           ServerName target = null;
1264           for (ServerName tmp: localServers) {
1265             if (tmp.getPort() == oldServerName.getPort()) {
1266               target = tmp;
1267               break;
1268             }
1269           }
1270           if (target == null) {
1271             target = randomAssignment(cluster, region, localServers);
1272           }
1273           assignments.get(target).add(region);
1274         }
1275         numRetainedAssigments++;
1276       }
1277     }
1278 
1279     String randomAssignMsg = "";
1280     if (numRandomAssignments > 0) {
1281       randomAssignMsg =
1282           numRandomAssignments + " regions were assigned "
1283               + "to random hosts, since the old hosts for these regions are no "
1284               + "longer present in the cluster. These hosts were:\n  "
1285               + Joiner.on("\n  ").join(oldHostsNoLongerPresent);
1286     }
1287 
1288     LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
1289         + " retained the pre-restart assignment. " + randomAssignMsg);
1290     return assignments;
1291   }
1292 
1293   @Override
1294   public void initialize() throws HBaseIOException{
1295   }
1296 
1297   @Override
1298   public void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1299   }
1300 
1301   @Override
1302   public void regionOffline(HRegionInfo regionInfo) {
1303   }
1304 
1305   @Override
1306   public boolean isStopped() {
1307     return stopped;
1308   }
1309 
1310   @Override
1311   public void stop(String why) {
1312     LOG.info("Load Balancer stop requested: "+why);
1313     stopped = true;
1314   }
1315 
1316   /**
1317    * Used to assign a single region to a random server.
1318    */
1319   private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo,
1320       List<ServerName> servers) {
1321     int numServers = servers.size(); // servers is not null, numServers > 1
1322     ServerName sn = null;
1323     final int maxIterations = numServers * 4;
1324     int iterations = 0;
1325 
1326     do {
1327       int i = RANDOM.nextInt(numServers);
1328       sn = servers.get(i);
1329     } while (cluster.wouldLowerAvailability(regionInfo, sn)
1330         && iterations++ < maxIterations);
1331     cluster.doAssignRegion(regionInfo, sn);
1332     return sn;
1333   }
1334 
1335   /**
1336    * Round robin a list of regions to a list of servers
1337    */
1338   private void roundRobinAssignment(Cluster cluster, List<HRegionInfo> regions,
1339       List<HRegionInfo> unassignedRegions, List<ServerName> servers,
1340       Map<ServerName, List<HRegionInfo>> assignments) {
1341 
1342     int numServers = servers.size();
1343     int numRegions = regions.size();
1344     int max = (int) Math.ceil((float) numRegions / numServers);
1345     int serverIdx = 0;
1346     if (numServers > 1) {
1347       serverIdx = RANDOM.nextInt(numServers);
1348     }
1349     int regionIdx = 0;
1350 
1351     for (int j = 0; j < numServers; j++) {
1352       ServerName server = servers.get((j + serverIdx) % numServers);
1353       List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
1354       for (int i = regionIdx; i < numRegions; i += numServers) {
1355         HRegionInfo region = regions.get(i % numRegions);
1356         if (cluster.wouldLowerAvailability(region, server)) {
1357           unassignedRegions.add(region);
1358         } else {
1359           serverRegions.add(region);
1360           cluster.doAssignRegion(region, server);
1361         }
1362       }
1363       assignments.put(server, serverRegions);
1364       regionIdx++;
1365     }
1366   }
1367 
1368   protected Map<ServerName, List<HRegionInfo>> getRegionAssignmentsByServer(
1369     Collection<HRegionInfo> regions) {
1370     if (this.services != null && this.services.getAssignmentManager() != null) {
1371       return this.services.getAssignmentManager().getSnapShotOfAssignment(regions);
1372     } else {
1373       return new HashMap<ServerName, List<HRegionInfo>>();
1374     }
1375   }
1376 
1377   @Override
1378   public void onConfigurationChange(Configuration conf) {
1379   }
1380 }