View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Collection;
23  import java.util.Collections;
24  import java.util.Comparator;
25  import java.util.Deque;
26  import java.util.HashMap;
27  import java.util.HashSet;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.Map.Entry;
32  import java.util.NavigableMap;
33  import java.util.Random;
34  import java.util.Set;
35  import java.util.TreeMap;
36  
37  import org.apache.commons.lang.NotImplementedException;
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.conf.Configuration;
41  import org.apache.hadoop.hbase.ClusterStatus;
42  import org.apache.hadoop.hbase.HBaseIOException;
43  import org.apache.hadoop.hbase.HRegionInfo;
44  import org.apache.hadoop.hbase.RegionLoad;
45  import org.apache.hadoop.hbase.ServerName;
46  import org.apache.hadoop.hbase.TableName;
47  import org.apache.hadoop.hbase.master.AssignmentManager;
48  import org.apache.hadoop.hbase.master.LoadBalancer;
49  import org.apache.hadoop.hbase.master.MasterServices;
50  import org.apache.hadoop.hbase.master.RegionPlan;
51  import org.apache.hadoop.hbase.security.access.AccessControlLists;
52  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
53  import org.apache.hadoop.hbase.master.RackManager;
54  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
55  
56  import com.google.common.base.Joiner;
57  import com.google.common.collect.ArrayListMultimap;
58  import com.google.common.collect.Lists;
59  import com.google.common.collect.Sets;
60  
61  /**
62   * The base class for load balancers. It provides the the functions used to by
63   * {@link AssignmentManager} to assign regions in the edge cases. It doesn't
64   * provide an implementation of the actual balancing algorithm.
65   *
66   */
67  public abstract class BaseLoadBalancer implements LoadBalancer {
68    private static final int MIN_SERVER_BALANCE = 2;
69    private volatile boolean stopped = false;
70  
71    private static final List<HRegionInfo> EMPTY_REGION_LIST = new ArrayList<HRegionInfo>(0);
72  
73    protected final RegionLocationFinder regionFinder = new RegionLocationFinder();
74  
75    private static class DefaultRackManager extends RackManager {
76      @Override
77      public String getRack(ServerName server) {
78        return UNKNOWN_RACK;
79      }
80    }
81  
82    /**
83     * An efficient array based implementation similar to ClusterState for keeping
84     * the status of the cluster in terms of region assignment and distribution.
85     * LoadBalancers, such as StochasticLoadBalancer uses this Cluster object because of
86     * hundreds of thousands of hashmap manipulations are very costly, which is why this
87     * class uses mostly indexes and arrays.
88     *
89     * Cluster tracks a list of unassigned regions, region assignments, and the server
90     * topology in terms of server names, hostnames and racks.
91     */
92    protected static class Cluster {
93      ServerName masterServerName;
94      Set<String> tablesOnMaster;
95      ServerName[] servers;
96      String[] hosts; // ServerName uniquely identifies a region server. multiple RS can run on the same host
97      String[] racks;
98      boolean multiServersPerHost = false; // whether or not any host has more than one server
99  
100     ArrayList<String> tables;
101     HRegionInfo[] regions;
102     Deque<RegionLoad>[] regionLoads;
103     boolean[] backupMasterFlags;
104     int activeMasterIndex = -1;
105 
106     int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality
107 
108     int[]   serverIndexToHostIndex;      //serverIndex -> host index
109     int[]   serverIndexToRackIndex;      //serverIndex -> rack index
110 
111     int[][] regionsPerServer;            //serverIndex -> region list
112     int[][] regionsPerHost;              //hostIndex -> list of regions
113     int[][] regionsPerRack;              //rackIndex -> region list
114     int[][] primariesOfRegionsPerServer; //serverIndex -> sorted list of regions by primary region index
115     int[][] primariesOfRegionsPerHost;   //hostIndex -> sorted list of regions by primary region index
116     int[][] primariesOfRegionsPerRack;   //rackIndex -> sorted list of regions by primary region index
117 
118     int[][] serversPerHost;              //hostIndex -> list of server indexes
119     int[][] serversPerRack;              //rackIndex -> list of server indexes
120     int[]   regionIndexToServerIndex;    //regionIndex -> serverIndex
121     int[]   initialRegionIndexToServerIndex;    //regionIndex -> serverIndex (initial cluster state)
122     int[]   regionIndexToTableIndex;     //regionIndex -> tableIndex
123     int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions
124     int[]   numMaxRegionsPerTable;       //tableIndex -> max number of regions in a single RS
125     int     numUserRegionsOnMaster;      //number of user regions on the active master
126     int[]   regionIndexToPrimaryIndex;   //regionIndex -> regionIndex of the primary
127     boolean hasRegionReplicas = false;   //whether there is regions with replicas
128 
129     Integer[] serverIndicesSortedByRegionCount;
130 
131     Map<String, Integer> serversToIndex;
132     Map<String, Integer> hostsToIndex;
133     Map<String, Integer> racksToIndex;
134     Map<String, Integer> tablesToIndex;
135     Map<HRegionInfo, Integer> regionsToIndex;
136 
137     int numServers;
138     int numHosts;
139     int numRacks;
140     int numTables;
141     int numRegions;
142 
143     int numMovedRegions = 0; //num moved regions from the initial configuration
144     // num of moved regions away from master that should be on the master
145     int numMovedMasterHostedRegions = 0;
146     int numMovedMetaRegions = 0;       //num of moved regions that are META
147     Map<ServerName, List<HRegionInfo>> clusterState;
148 
149     protected final RackManager rackManager;
150 
151     protected Cluster(
152         ServerName masterServerName,
153         Map<ServerName, List<HRegionInfo>> clusterState,
154         Map<String, Deque<RegionLoad>> loads,
155         RegionLocationFinder regionFinder,
156         Collection<ServerName> backupMasters,
157         Set<String> tablesOnMaster,
158         RackManager rackManager) {
159       this(masterServerName, null, clusterState, loads, regionFinder, backupMasters,
160         tablesOnMaster, rackManager);
161     }
162 
163     protected Cluster(
164         ServerName masterServerName,
165         Collection<HRegionInfo> unassignedRegions,
166         Map<ServerName, List<HRegionInfo>> clusterState,
167         Map<String, Deque<RegionLoad>> loads,
168         RegionLocationFinder regionFinder,
169         Collection<ServerName> backupMasters,
170         Set<String> tablesOnMaster,
171         RackManager rackManager) {
172 
173       if (unassignedRegions == null) {
174         unassignedRegions = EMPTY_REGION_LIST;
175       }
176 
177       this.masterServerName = masterServerName;
178       this.tablesOnMaster = tablesOnMaster;
179 
180       serversToIndex = new HashMap<String, Integer>();
181       hostsToIndex = new HashMap<String, Integer>();
182       racksToIndex = new HashMap<String, Integer>();
183       tablesToIndex = new HashMap<String, Integer>();
184 
185       //TODO: We should get the list of tables from master
186       tables = new ArrayList<String>();
187       this.rackManager = rackManager != null ? rackManager : new DefaultRackManager();
188 
189       numRegions = 0;
190 
191       List<List<Integer>> serversPerHostList = new ArrayList<List<Integer>>();
192       List<List<Integer>> serversPerRackList = new ArrayList<List<Integer>>();
193       this.clusterState = clusterState;
194 
195       // Use servername and port as there can be dead servers in this list. We want everything with
196       // a matching hostname and port to have the same index.
197       for (ServerName sn : clusterState.keySet()) {
198         if (serversToIndex.get(sn.getHostAndPort()) == null) {
199           serversToIndex.put(sn.getHostAndPort(), numServers++);
200         }
201         if (!hostsToIndex.containsKey(sn.getHostname())) {
202           hostsToIndex.put(sn.getHostname(), numHosts++);
203           serversPerHostList.add(new ArrayList<Integer>(1));
204         }
205 
206         int serverIndex = serversToIndex.get(sn.getHostAndPort());
207         int hostIndex = hostsToIndex.get(sn.getHostname());
208         serversPerHostList.get(hostIndex).add(serverIndex);
209 
210         String rack = this.rackManager.getRack(sn);
211         if (!racksToIndex.containsKey(rack)) {
212           racksToIndex.put(rack, numRacks++);
213           serversPerRackList.add(new ArrayList<Integer>());
214         }
215         int rackIndex = racksToIndex.get(rack);
216         serversPerRackList.get(rackIndex).add(serverIndex);
217       }
218 
219       // Count how many regions there are.
220       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
221         numRegions += entry.getValue().size();
222       }
223       numRegions += unassignedRegions.size();
224 
225       regionsToIndex = new HashMap<HRegionInfo, Integer>(numRegions);
226       servers = new ServerName[numServers];
227       serversPerHost = new int[numHosts][];
228       serversPerRack = new int[numRacks][];
229       regions = new HRegionInfo[numRegions];
230       regionIndexToServerIndex = new int[numRegions];
231       initialRegionIndexToServerIndex = new int[numRegions];
232       regionIndexToTableIndex = new int[numRegions];
233       regionIndexToPrimaryIndex = new int[numRegions];
234       regionLoads = new Deque[numRegions];
235       regionLocations = new int[numRegions][];
236       serverIndicesSortedByRegionCount = new Integer[numServers];
237       backupMasterFlags = new boolean[numServers];
238 
239       serverIndexToHostIndex = new int[numServers];
240       serverIndexToRackIndex = new int[numServers];
241       regionsPerServer = new int[numServers][];
242       regionsPerHost = new int[numHosts][];
243       regionsPerRack = new int[numRacks][];
244       primariesOfRegionsPerServer = new int[numServers][];
245       primariesOfRegionsPerHost = new int[numHosts][];
246       primariesOfRegionsPerRack = new int[numRacks][];
247 
248       int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
249 
250       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
251         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
252 
253         // keep the servername if this is the first server name for this hostname
254         // or this servername has the newest startcode.
255         if (servers[serverIndex] == null ||
256             servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
257           servers[serverIndex] = entry.getKey();
258           backupMasterFlags[serverIndex] = backupMasters != null
259             && backupMasters.contains(servers[serverIndex]);
260         }
261 
262         if (regionsPerServer[serverIndex] != null) {
263           // there is another server with the same hostAndPort in ClusterState.
264           // allocate the array for the total size
265           regionsPerServer[serverIndex] = new int[entry.getValue().size() + regionsPerServer[serverIndex].length];
266         } else {
267           regionsPerServer[serverIndex] = new int[entry.getValue().size()];
268         }
269         primariesOfRegionsPerServer[serverIndex] = new int[regionsPerServer[serverIndex].length];
270         serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
271 
272         if (servers[serverIndex].equals(masterServerName)) {
273           activeMasterIndex = serverIndex;
274           for (HRegionInfo hri: entry.getValue()) {
275             if (!shouldBeOnMaster(hri)) {
276               numUserRegionsOnMaster++;
277             }
278           }
279         }
280       }
281 
282       hosts = new String[numHosts];
283       for (Entry<String, Integer> entry : hostsToIndex.entrySet()) {
284         hosts[entry.getValue()] = entry.getKey();
285       }
286       racks = new String[numRacks];
287       for (Entry<String, Integer> entry : racksToIndex.entrySet()) {
288         racks[entry.getValue()] = entry.getKey();
289       }
290 
291       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
292         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
293         regionPerServerIndex = 0;
294 
295         int hostIndex = hostsToIndex.get(entry.getKey().getHostname());
296         serverIndexToHostIndex[serverIndex] = hostIndex;
297 
298         int rackIndex = racksToIndex.get(this.rackManager.getRack(entry.getKey()));
299         serverIndexToRackIndex[serverIndex] = rackIndex;
300 
301         for (HRegionInfo region : entry.getValue()) {
302           registerRegion(region, regionIndex, serverIndex, loads, regionFinder);
303 
304           regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
305           regionIndex++;
306         }
307       }
308       for (HRegionInfo region : unassignedRegions) {
309         registerRegion(region, regionIndex, -1, loads, regionFinder);
310         regionIndex++;
311       }
312 
313       for (int i = 0; i < serversPerHostList.size(); i++) {
314         serversPerHost[i] = new int[serversPerHostList.get(i).size()];
315         for (int j = 0; j < serversPerHost[i].length; j++) {
316           serversPerHost[i][j] = serversPerHostList.get(i).get(j);
317         }
318         if (serversPerHost[i].length > 1) {
319           multiServersPerHost = true;
320         }
321       }
322 
323       for (int i = 0; i < serversPerRackList.size(); i++) {
324         serversPerRack[i] = new int[serversPerRackList.get(i).size()];
325         for (int j = 0; j < serversPerRack[i].length; j++) {
326           serversPerRack[i][j] = serversPerRackList.get(i).get(j);
327         }
328       }
329 
330       numTables = tables.size();
331       numRegionsPerServerPerTable = new int[numServers][numTables];
332 
333       for (int i = 0; i < numServers; i++) {
334         for (int j = 0; j < numTables; j++) {
335           numRegionsPerServerPerTable[i][j] = 0;
336         }
337       }
338 
339       for (int i=0; i < regionIndexToServerIndex.length; i++) {
340         if (regionIndexToServerIndex[i] >= 0) {
341           numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
342         }
343       }
344 
345       numMaxRegionsPerTable = new int[numTables];
346       for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
347         for (tableIndex = 0 ; tableIndex < numRegionsPerServerPerTable[serverIndex].length; tableIndex++) {
348           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
349             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
350           }
351         }
352       }
353 
354       for (int i = 0; i < regions.length; i ++) {
355         HRegionInfo info = regions[i];
356         if (RegionReplicaUtil.isDefaultReplica(info)) {
357           regionIndexToPrimaryIndex[i] = i;
358         } else {
359           hasRegionReplicas = true;
360           HRegionInfo primaryInfo = RegionReplicaUtil.getRegionInfoForDefaultReplica(info);
361           regionIndexToPrimaryIndex[i] =
362               regionsToIndex.containsKey(primaryInfo) ?
363               regionsToIndex.get(primaryInfo):
364               -1;
365         }
366       }
367 
368       for (int i = 0; i < regionsPerServer.length; i++) {
369         primariesOfRegionsPerServer[i] = new int[regionsPerServer[i].length];
370         for (int j = 0; j < regionsPerServer[i].length; j++) {
371           int primaryIndex = regionIndexToPrimaryIndex[regionsPerServer[i][j]];
372           primariesOfRegionsPerServer[i][j] = primaryIndex;
373         }
374         // sort the regions by primaries.
375         Arrays.sort(primariesOfRegionsPerServer[i]);
376       }
377 
378       // compute regionsPerHost
379       if (multiServersPerHost) {
380         for (int i = 0 ; i < serversPerHost.length; i++) {
381           int numRegionsPerHost = 0;
382           for (int j = 0; j < serversPerHost[i].length; j++) {
383             numRegionsPerHost += regionsPerServer[serversPerHost[i][j]].length;
384           }
385           regionsPerHost[i] = new int[numRegionsPerHost];
386           primariesOfRegionsPerHost[i] = new int[numRegionsPerHost];
387         }
388         for (int i = 0 ; i < serversPerHost.length; i++) {
389           int numRegionPerHostIndex = 0;
390           for (int j = 0; j < serversPerHost[i].length; j++) {
391             for (int k = 0; k < regionsPerServer[serversPerHost[i][j]].length; k++) {
392               int region = regionsPerServer[serversPerHost[i][j]][k];
393               regionsPerHost[i][numRegionPerHostIndex] = region;
394               int primaryIndex = regionIndexToPrimaryIndex[region];
395               primariesOfRegionsPerHost[i][numRegionPerHostIndex] = primaryIndex;
396               numRegionPerHostIndex++;
397             }
398           }
399           // sort the regions by primaries.
400           Arrays.sort(primariesOfRegionsPerHost[i]);
401         }
402       }
403 
404       // compute regionsPerRack
405       if (numRacks > 1) {
406         for (int i = 0 ; i < serversPerRack.length; i++) {
407           int numRegionsPerRack = 0;
408           for (int j = 0; j < serversPerRack[i].length; j++) {
409             numRegionsPerRack += regionsPerServer[serversPerRack[i][j]].length;
410           }
411           regionsPerRack[i] = new int[numRegionsPerRack];
412           primariesOfRegionsPerRack[i] = new int[numRegionsPerRack];
413         }
414 
415         for (int i = 0 ; i < serversPerRack.length; i++) {
416           int numRegionPerRackIndex = 0;
417           for (int j = 0; j < serversPerRack[i].length; j++) {
418             for (int k = 0; k < regionsPerServer[serversPerRack[i][j]].length; k++) {
419               int region = regionsPerServer[serversPerRack[i][j]][k];
420               regionsPerRack[i][numRegionPerRackIndex] = region;
421               int primaryIndex = regionIndexToPrimaryIndex[region];
422               primariesOfRegionsPerRack[i][numRegionPerRackIndex] = primaryIndex;
423               numRegionPerRackIndex++;
424             }
425           }
426           // sort the regions by primaries.
427           Arrays.sort(primariesOfRegionsPerRack[i]);
428         }
429       }
430     }
431 
432     /** Helper for Cluster constructor to handle a region */
433     private void registerRegion(HRegionInfo region, int regionIndex, int serverIndex,
434         Map<String, Deque<RegionLoad>> loads, RegionLocationFinder regionFinder) {
435       String tableName = region.getTable().getNameAsString();
436       if (!tablesToIndex.containsKey(tableName)) {
437         tables.add(tableName);
438         tablesToIndex.put(tableName, tablesToIndex.size());
439       }
440       int tableIndex = tablesToIndex.get(tableName);
441 
442       regionsToIndex.put(region, regionIndex);
443       regions[regionIndex] = region;
444       regionIndexToServerIndex[regionIndex] = serverIndex;
445       initialRegionIndexToServerIndex[regionIndex] = serverIndex;
446       regionIndexToTableIndex[regionIndex] = tableIndex;
447 
448       // region load
449       if (loads != null) {
450         Deque<RegionLoad> rl = loads.get(region.getRegionNameAsString());
451         // That could have failed if the RegionLoad is using the other regionName
452         if (rl == null) {
453           // Try getting the region load using encoded name.
454           rl = loads.get(region.getEncodedName());
455         }
456         regionLoads[regionIndex] = rl;
457       }
458 
459       if (regionFinder != null) {
460         //region location
461         List<ServerName> loc = regionFinder.getTopBlockLocations(region);
462         regionLocations[regionIndex] = new int[loc.size()];
463         for (int i=0; i < loc.size(); i++) {
464           regionLocations[regionIndex][i] =
465               loc.get(i) == null ? -1 :
466                 (serversToIndex.get(loc.get(i).getHostAndPort()) == null ? -1
467                     : serversToIndex.get(loc.get(i).getHostAndPort()));
468         }
469       }
470     }
471 
472     /** An action to move or swap a region */
473     public static class Action {
474       public static enum Type {
475         ASSIGN_REGION,
476         MOVE_REGION,
477         SWAP_REGIONS,
478         NULL,
479       }
480 
481       public Type type;
482       public Action (Type type) {this.type = type;}
483       /** Returns an Action which would undo this action */
484       public Action undoAction() { return this; }
485       @Override
486       public String toString() { return type + ":";}
487     }
488 
489     public static class AssignRegionAction extends Action {
490       public int region;
491       public int server;
492       public AssignRegionAction(int region, int server) {
493         super(Type.ASSIGN_REGION);
494         this.region = region;
495         this.server = server;
496       }
497       @Override
498       public Action undoAction() {
499         // TODO implement this. This action is not being used by the StochasticLB for now
500         // in case it uses it, we should implement this function.
501         throw new NotImplementedException();
502       }
503       @Override
504       public String toString() {
505         return type + ": " + region + ":" + server;
506       }
507     }
508 
509     public static class MoveRegionAction extends Action {
510       public int region;
511       public int fromServer;
512       public int toServer;
513 
514       public MoveRegionAction(int region, int fromServer, int toServer) {
515         super(Type.MOVE_REGION);
516         this.fromServer = fromServer;
517         this.region = region;
518         this.toServer = toServer;
519       }
520       @Override
521       public Action undoAction() {
522         return new MoveRegionAction (region, toServer, fromServer);
523       }
524       @Override
525       public String toString() {
526         return type + ": " + region + ":" + fromServer + " -> " + toServer;
527       }
528     }
529 
530     public static class SwapRegionsAction extends Action {
531       public int fromServer;
532       public int fromRegion;
533       public int toServer;
534       public int toRegion;
535       public SwapRegionsAction(int fromServer, int fromRegion, int toServer, int toRegion) {
536         super(Type.SWAP_REGIONS);
537         this.fromServer = fromServer;
538         this.fromRegion = fromRegion;
539         this.toServer = toServer;
540         this.toRegion = toRegion;
541       }
542       @Override
543       public Action undoAction() {
544         return new SwapRegionsAction (fromServer, toRegion, toServer, fromRegion);
545       }
546       @Override
547       public String toString() {
548         return type + ": " + fromRegion + ":" + fromServer + " <-> " + toRegion + ":" + toServer;
549       }
550     }
551 
552     public static Action NullAction = new Action(Type.NULL);
553 
554     public void doAction(Action action) {
555       switch (action.type) {
556       case NULL: break;
557       case ASSIGN_REGION:
558         AssignRegionAction ar = (AssignRegionAction) action;
559         regionsPerServer[ar.server] = addRegion(regionsPerServer[ar.server], ar.region);
560         regionMoved(ar.region, -1, ar.server);
561         break;
562       case MOVE_REGION:
563         MoveRegionAction mra = (MoveRegionAction) action;
564         regionsPerServer[mra.fromServer] = removeRegion(regionsPerServer[mra.fromServer], mra.region);
565         regionsPerServer[mra.toServer] = addRegion(regionsPerServer[mra.toServer], mra.region);
566         regionMoved(mra.region, mra.fromServer, mra.toServer);
567         break;
568       case SWAP_REGIONS:
569         SwapRegionsAction a = (SwapRegionsAction) action;
570         regionsPerServer[a.fromServer] = replaceRegion(regionsPerServer[a.fromServer], a.fromRegion, a.toRegion);
571         regionsPerServer[a.toServer] = replaceRegion(regionsPerServer[a.toServer], a.toRegion, a.fromRegion);
572         regionMoved(a.fromRegion, a.fromServer, a.toServer);
573         regionMoved(a.toRegion, a.toServer, a.fromServer);
574         break;
575       default:
576         throw new RuntimeException("Uknown action:" + action.type);
577       }
578     }
579 
580     /**
581      * Return true if the placement of region on server would lower the availability
582      * of the region in question
583      * @param server
584      * @param region
585      * @return true or false
586      */
587     boolean wouldLowerAvailability(HRegionInfo regionInfo, ServerName serverName) {
588       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
589         return false; // safeguard against race between cluster.servers and servers from LB method args
590       }
591       int server = serversToIndex.get(serverName.getHostAndPort());
592       int region = regionsToIndex.get(regionInfo);
593 
594       int primary = regionIndexToPrimaryIndex[region];
595 
596       // there is a subset relation for server < host < rack
597       // check server first
598 
599       if (contains(primariesOfRegionsPerServer[server], primary)) {
600         // check for whether there are other servers that we can place this region
601         for (int i = 0; i < primariesOfRegionsPerServer.length; i++) {
602           if (i != server && !contains(primariesOfRegionsPerServer[i], primary)) {
603             return true; // meaning there is a better server
604           }
605         }
606         return false; // there is not a better server to place this
607       }
608 
609       // check host
610       if (multiServersPerHost) { // these arrays would only be allocated if we have more than one server per host
611         int host = serverIndexToHostIndex[server];
612         if (contains(primariesOfRegionsPerHost[host], primary)) {
613           // check for whether there are other hosts that we can place this region
614           for (int i = 0; i < primariesOfRegionsPerHost.length; i++) {
615             if (i != host && !contains(primariesOfRegionsPerHost[i], primary)) {
616               return true; // meaning there is a better host
617             }
618           }
619           return false; // there is not a better host to place this
620         }
621       }
622 
623       // check rack
624       if (numRacks > 1) {
625         int rack = serverIndexToRackIndex[server];
626         if (contains(primariesOfRegionsPerRack[rack], primary)) {
627           // check for whether there are other racks that we can place this region
628           for (int i = 0; i < primariesOfRegionsPerRack.length; i++) {
629             if (i != rack && !contains(primariesOfRegionsPerRack[i], primary)) {
630               return true; // meaning there is a better rack
631             }
632           }
633           return false; // there is not a better rack to place this
634         }
635       }
636       return false;
637     }
638 
639     void doAssignRegion(HRegionInfo regionInfo, ServerName serverName) {
640       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
641         return;
642       }
643       int server = serversToIndex.get(serverName.getHostAndPort());
644       int region = regionsToIndex.get(regionInfo);
645       doAction(new AssignRegionAction(region, server));
646     }
647 
648     void regionMoved(int region, int oldServer, int newServer) {
649       regionIndexToServerIndex[region] = newServer;
650       if (initialRegionIndexToServerIndex[region] == newServer) {
651         numMovedRegions--; //region moved back to original location
652         if (shouldBeOnMaster(regions[region]) && isActiveMaster(newServer)) {
653           //Master hosted region moved back to the active master
654           numMovedMasterHostedRegions--;
655         }
656       } else if (oldServer >= 0 && initialRegionIndexToServerIndex[region] == oldServer) {
657         numMovedRegions++; //region moved from original location
658         if (shouldBeOnMaster(regions[region]) && isActiveMaster(oldServer)) {
659           // Master hosted region moved away from active the master
660           numMovedMasterHostedRegions++;
661         }
662       }
663       int tableIndex = regionIndexToTableIndex[region];
664       if (oldServer >= 0) {
665         numRegionsPerServerPerTable[oldServer][tableIndex]--;
666       }
667       numRegionsPerServerPerTable[newServer][tableIndex]++;
668 
669       //check whether this caused maxRegionsPerTable in the new Server to be updated
670       if (numRegionsPerServerPerTable[newServer][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
671         numRegionsPerServerPerTable[newServer][tableIndex] = numMaxRegionsPerTable[tableIndex];
672       } else if (oldServer >= 0 && (numRegionsPerServerPerTable[oldServer][tableIndex] + 1)
673           == numMaxRegionsPerTable[tableIndex]) {
674         //recompute maxRegionsPerTable since the previous value was coming from the old server
675         for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
676           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
677             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
678           }
679         }
680       }
681 
682       // update for servers
683       int primary = regionIndexToPrimaryIndex[region];
684       if (oldServer >= 0) {
685         primariesOfRegionsPerServer[oldServer] = removeRegion(
686           primariesOfRegionsPerServer[oldServer], primary);
687       }
688       primariesOfRegionsPerServer[newServer] = addRegionSorted(
689         primariesOfRegionsPerServer[newServer], primary);
690 
691       // update for hosts
692       if (multiServersPerHost) {
693         int oldHost = oldServer >= 0 ? serverIndexToHostIndex[oldServer] : -1;
694         int newHost = serverIndexToHostIndex[newServer];
695         if (newHost != oldHost) {
696           regionsPerHost[newHost] = addRegion(regionsPerHost[newHost], region);
697           primariesOfRegionsPerHost[newHost] = addRegionSorted(primariesOfRegionsPerHost[newHost], primary);
698           if (oldHost >= 0) {
699             regionsPerHost[oldHost] = removeRegion(regionsPerHost[oldHost], region);
700             primariesOfRegionsPerHost[oldHost] = removeRegion(
701               primariesOfRegionsPerHost[oldHost], primary); // will still be sorted
702           }
703         }
704       }
705 
706       // update for racks
707       if (numRacks > 1) {
708         int oldRack = oldServer >= 0 ? serverIndexToRackIndex[oldServer] : -1;
709         int newRack = serverIndexToRackIndex[newServer];
710         if (newRack != oldRack) {
711           regionsPerRack[newRack] = addRegion(regionsPerRack[newRack], region);
712           primariesOfRegionsPerRack[newRack] = addRegionSorted(primariesOfRegionsPerRack[newRack], primary);
713           if (oldRack >= 0) {
714             regionsPerRack[oldRack] = removeRegion(regionsPerRack[oldRack], region);
715             primariesOfRegionsPerRack[oldRack] = removeRegion(
716               primariesOfRegionsPerRack[oldRack], primary); // will still be sorted
717           }
718         }
719       }
720       if (oldServer >= 0 && isActiveMaster(oldServer)) {
721         if (!shouldBeOnMaster(regions[region])) {
722           numUserRegionsOnMaster--;
723         }
724       } else if (isActiveMaster(newServer)) {
725         if (!shouldBeOnMaster(regions[region])) {
726           numUserRegionsOnMaster++;
727         }
728       }
729     }
730 
731     int[] removeRegion(int[] regions, int regionIndex) {
732       //TODO: this maybe costly. Consider using linked lists
733       int[] newRegions = new int[regions.length - 1];
734       int i = 0;
735       for (i = 0; i < regions.length; i++) {
736         if (regions[i] == regionIndex) {
737           break;
738         }
739         newRegions[i] = regions[i];
740       }
741       System.arraycopy(regions, i+1, newRegions, i, newRegions.length - i);
742       return newRegions;
743     }
744 
745     int[] addRegion(int[] regions, int regionIndex) {
746       int[] newRegions = new int[regions.length + 1];
747       System.arraycopy(regions, 0, newRegions, 0, regions.length);
748       newRegions[newRegions.length - 1] = regionIndex;
749       return newRegions;
750     }
751 
752     int[] addRegionSorted(int[] regions, int regionIndex) {
753       int[] newRegions = new int[regions.length + 1];
754       int i = 0;
755       for (i = 0; i < regions.length; i++) { // find the index to insert
756         if (regions[i] > regionIndex) {
757           break;
758         }
759       }
760       System.arraycopy(regions, 0, newRegions, 0, i); // copy first half
761       System.arraycopy(regions, i, newRegions, i+1, regions.length - i); // copy second half
762       newRegions[i] = regionIndex;
763 
764       return newRegions;
765     }
766 
767     int[] replaceRegion(int[] regions, int regionIndex, int newRegionIndex) {
768       int i = 0;
769       for (i = 0; i < regions.length; i++) {
770         if (regions[i] == regionIndex) {
771           regions[i] = newRegionIndex;
772           break;
773         }
774       }
775       return regions;
776     }
777 
778     void sortServersByRegionCount() {
779       Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
780     }
781 
782     int getNumRegions(int server) {
783       return regionsPerServer[server].length;
784     }
785 
786     boolean isBackupMaster(int server) {
787       return backupMasterFlags[server];
788     }
789 
790     boolean isActiveMaster(int server) {
791       return activeMasterIndex == server;
792     }
793 
794     boolean shouldBeOnMaster(HRegionInfo region) {
795       return tablesOnMaster != null && tablesOnMaster.contains(
796         region.getTable().getNameAsString());
797     }
798 
799     boolean contains(int[] arr, int val) {
800       return Arrays.binarySearch(arr, val) >= 0;
801     }
802 
803     private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
804       @Override
805       public int compare(Integer integer, Integer integer2) {
806         return Integer.valueOf(getNumRegions(integer)).compareTo(getNumRegions(integer2));
807       }
808     };
809 
810     @Override
811     public String toString() {
812       String desc = "Cluster{" +
813           "servers=[";
814           for(ServerName sn:servers) {
815              desc += sn.getHostAndPort() + ", ";
816           }
817           desc +=
818           ", serverIndicesSortedByRegionCount="+
819           Arrays.toString(serverIndicesSortedByRegionCount) +
820           ", regionsPerServer=[";
821 
822           for (int[]r:regionsPerServer) {
823             desc += Arrays.toString(r);
824           }
825           desc += "]" +
826           ", numMaxRegionsPerTable=" +
827           Arrays.toString(numMaxRegionsPerTable) +
828           ", numRegions=" +
829           numRegions +
830           ", numServers=" +
831           numServers +
832           ", numTables=" +
833           numTables +
834           ", numMovedRegions=" +
835           numMovedRegions +
836           ", numMovedMasterHostedRegions=" +
837           numMovedMasterHostedRegions +
838           '}';
839       return desc;
840     }
841   }
842 
843   // slop for regions
844   protected float slop;
845   protected Configuration config;
846   protected RackManager rackManager;
847   private static final Random RANDOM = new Random(System.currentTimeMillis());
848   private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
849 
850   // The weight means that each region on the active/backup master is
851   // equal to that many regions on a normal regionserver, in calculating
852   // the region load by the load balancer. So that the active/backup master
853   // can host less (or equal if weight = 1) regions than normal regionservers.
854   //
855   // The weight can be used to control the number of regions on backup
856   // masters, which shouldn't host as many regions as normal regionservers.
857   // So that we don't need to move around too many regions when a
858   // backup master becomes the active one.
859   //
860   // Currently, the active master weight is used only by StockasticLoadBalancer.
861   // Generally, we don't put any user regions on the active master, which
862   // only hosts regions of tables defined in TABLES_ON_MASTER.
863   // That's why the default activeMasterWeight is high.
864   public static final String BACKUP_MASTER_WEIGHT_KEY =
865     "hbase.balancer.backupMasterWeight";
866   public static final int DEFAULT_BACKUP_MASTER_WEIGHT = 1;
867 
868   private static final String ACTIVE_MASTER_WEIGHT_KEY =
869     "hbase.balancer.activeMasterWeight";
870   private static final int DEFAULT_ACTIVE_MASTER_WEIGHT = 200;
871 
872   // Regions of these tables are put on the master by default.
873   private static final String[] DEFAULT_TABLES_ON_MASTER =
874     new String[] {AccessControlLists.ACL_TABLE_NAME.getNameAsString(),
875       TableName.NAMESPACE_TABLE_NAME.getNameAsString(),
876       TableName.META_TABLE_NAME.getNameAsString()};
877 
878   protected int activeMasterWeight;
879   protected int backupMasterWeight;
880 
881   // a flag to indicate if assigning regions to backup masters
882   protected boolean usingBackupMasters = true;
883   protected final Set<ServerName> excludedServers =
884     Collections.synchronizedSet(new HashSet<ServerName>());
885 
886   protected final Set<String> tablesOnMaster = new HashSet<String>();
887   protected final MetricsBalancer metricsBalancer = new MetricsBalancer();
888   protected ClusterStatus clusterStatus = null;
889   protected ServerName masterServerName;
890   protected MasterServices services;
891 
892   @Override
893   public void setConf(Configuration conf) {
894     setSlop(conf);
895     if (slop < 0) slop = 0;
896     else if (slop > 1) slop = 1;
897 
898     this.config = conf;
899     activeMasterWeight = conf.getInt(
900       ACTIVE_MASTER_WEIGHT_KEY, DEFAULT_ACTIVE_MASTER_WEIGHT);
901     backupMasterWeight = conf.getInt(
902       BACKUP_MASTER_WEIGHT_KEY, DEFAULT_BACKUP_MASTER_WEIGHT);
903     if (backupMasterWeight < 1) {
904       usingBackupMasters = false;
905       LOG.info("Backup master won't host any region since "
906         + BACKUP_MASTER_WEIGHT_KEY + " is " + backupMasterWeight
907         + "(<1)");
908     }
909     String[] tables = conf.getStrings(
910       "hbase.balancer.tablesOnMaster", DEFAULT_TABLES_ON_MASTER);
911     if (tables != null) {
912       for (String table: tables) {
913         tablesOnMaster.add(table);
914       }
915     }
916     this.rackManager = new RackManager(getConf());
917     regionFinder.setConf(conf);
918   }
919 
920   protected void setSlop(Configuration conf) {
921     this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
922   }
923 
924   /**
925    * If there is any server excluded, filter it out from the cluster map so
926    * we won't assign any region to it, assuming none's already assigned there.
927    */
928   protected void filterExcludedServers(Map<ServerName, List<HRegionInfo>> clusterMap) {
929     if (excludedServers.isEmpty()) { // No server to filter out
930       return;
931     }
932     Iterator<Map.Entry<ServerName, List<HRegionInfo>>> it = clusterMap.entrySet().iterator();
933     while (it.hasNext()) {
934       Map.Entry<ServerName, List<HRegionInfo>> en = it.next();
935       if (excludedServers.contains(en.getKey()) && en.getValue().isEmpty()) {
936         it.remove();
937       }
938     }
939   }
940 
941   /**
942    * Check if a region belongs to some small system table.
943    * If so, it may be expected to be put on the master regionserver.
944    */
945   protected boolean shouldBeOnMaster(HRegionInfo region) {
946     return tablesOnMaster.contains(region.getTable().getNameAsString());
947   }
948 
949   /**
950    * Balance the regions that should be on master regionserver.
951    */
952   protected List<RegionPlan> balanceMasterRegions(
953       Map<ServerName, List<HRegionInfo>> clusterMap) {
954     if (services == null || clusterMap.size() <= 1) return null;
955     List<RegionPlan> plans = null;
956     List<HRegionInfo> regions = clusterMap.get(masterServerName);
957     if (regions != null) {
958       Iterator<ServerName> keyIt = null;
959       for (HRegionInfo region: regions) {
960         if (shouldBeOnMaster(region)) continue;
961 
962         // Find a non-master regionserver to host the region
963         if (keyIt == null || !keyIt.hasNext()) {
964           keyIt = clusterMap.keySet().iterator();
965         }
966         ServerName dest = keyIt.next();
967         if (masterServerName.equals(dest)) {
968           dest = keyIt.next();
969         }
970 
971         // Move this region away from the master regionserver
972         RegionPlan plan = new RegionPlan(region, masterServerName, dest);
973         if (plans == null) {
974           plans = new ArrayList<RegionPlan>();
975         }
976         plans.add(plan);
977       }
978     }
979     for (Map.Entry<ServerName, List<HRegionInfo>> server: clusterMap.entrySet()) {
980       if (masterServerName.equals(server.getKey())) continue;
981       for (HRegionInfo region: server.getValue()) {
982         if (!shouldBeOnMaster(region)) continue;
983 
984         // Move this region to the master regionserver
985         RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName);
986         if (plans == null) {
987           plans = new ArrayList<RegionPlan>();
988         }
989         plans.add(plan);
990       }
991     }
992     return plans;
993   }
994 
995   public void excludeServer(ServerName serverName) {
996     if (!usingBackupMasters) excludedServers.add(serverName);
997   }
998 
999   public Set<ServerName> getExcludedServers() {
1000     return excludedServers;
1001   }
1002 
1003   @Override
1004   public Configuration getConf() {
1005     return this.config;
1006   }
1007 
1008   @Override
1009   public void setClusterStatus(ClusterStatus st) {
1010     this.clusterStatus = st;
1011     if (st == null || usingBackupMasters) return;
1012 
1013     // Not assign any region to backup masters.
1014     // Put them on the excluded server list.
1015     // Assume there won't be too much backup masters
1016     // re/starting, so this won't leak much memory.
1017     excludedServers.addAll(st.getBackupMasters());
1018     regionFinder.setClusterStatus(st);
1019   }
1020 
1021   @Override
1022   public void setMasterServices(MasterServices masterServices) {
1023     masterServerName = masterServices.getServerName();
1024     excludedServers.remove(masterServerName);
1025     this.services = masterServices;
1026     this.regionFinder.setServices(masterServices);
1027   }
1028 
1029   public void setRackManager(RackManager rackManager) {
1030     this.rackManager = rackManager;
1031   }
1032 
1033   protected Collection<ServerName> getBackupMasters() {
1034     return clusterStatus == null ? null : clusterStatus.getBackupMasters();
1035   }
1036 
1037   protected boolean needsBalance(Cluster c) {
1038     ClusterLoadState cs = new ClusterLoadState(
1039       masterServerName, getBackupMasters(), backupMasterWeight, c.clusterState);
1040     if (cs.getNumServers() < MIN_SERVER_BALANCE) {
1041       if (LOG.isDebugEnabled()) {
1042         LOG.debug("Not running balancer because only " + cs.getNumServers()
1043             + " active regionserver(s)");
1044       }
1045       return false;
1046     }
1047     if(areSomeRegionReplicasColocated(c)) return true;
1048     // Check if we even need to do any load balancing
1049     // HBASE-3681 check sloppiness first
1050     float average = cs.getLoadAverage(); // for logging
1051     int floor = (int) Math.floor(average * (1 - slop));
1052     int ceiling = (int) Math.ceil(average * (1 + slop));
1053     if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
1054       NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
1055       if (LOG.isTraceEnabled()) {
1056         // If nothing to balance, then don't say anything unless trace-level logging.
1057         LOG.trace("Skipping load balancing because balanced cluster; " +
1058           "servers=" + cs.getNumServers() + "(backupMasters=" + cs.getNumBackupMasters() +
1059           ") regions=" + cs.getNumRegions() + " average=" + average + " " +
1060           "mostloaded=" + serversByLoad.lastKey().getLoad() +
1061           " leastloaded=" + serversByLoad.firstKey().getLoad());
1062       }
1063       return false;
1064     }
1065     return true;
1066   }
1067 
1068   /**
1069    * Subclasses should implement this to return true if the cluster has nodes that hosts
1070    * multiple replicas for the same region, or, if there are multiple racks and the same
1071    * rack hosts replicas of the same region
1072    * @param c Cluster information
1073    * @return whether region replicas are currently co-located
1074    */
1075   protected boolean areSomeRegionReplicasColocated(Cluster c) {
1076     return false;
1077   }
1078 
1079   /**
1080    * Generates a bulk assignment plan to be used on cluster startup using a
1081    * simple round-robin assignment.
1082    * <p>
1083    * Takes a list of all the regions and all the servers in the cluster and
1084    * returns a map of each server to the regions that it should be assigned.
1085    * <p>
1086    * Currently implemented as a round-robin assignment. Same invariant as load
1087    * balancing, all servers holding floor(avg) or ceiling(avg).
1088    *
1089    * TODO: Use block locations from HDFS to place regions with their blocks
1090    *
1091    * @param regions all regions
1092    * @param servers all servers
1093    * @return map of server to the regions it should take, or null if no
1094    *         assignment is possible (ie. no regions or no servers)
1095    */
1096   @Override
1097   public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
1098       List<ServerName> servers) {
1099     metricsBalancer.incrMiscInvocations();
1100     if (regions == null || regions.isEmpty()) {
1101       return null;
1102     }
1103 
1104     List<ServerName> backupMasters = normalizeServers(servers);
1105     int numServers = servers == null ? 0 : servers.size();
1106     int numBackupMasters = backupMasters == null ? 0 : backupMasters.size();
1107     if (numServers == 0 && numBackupMasters == 0) {
1108       LOG.warn("Wanted to do round robin assignment but no servers to assign to");
1109       return null;
1110     }
1111 
1112     // TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the
1113     // normal LB.balancerCluster() with unassignedRegions. We only need to have a candidate
1114     // generator for AssignRegionAction. The LB will ensure the regions are mostly local
1115     // and balanced. This should also run fast with fewer number of iterations.
1116 
1117     Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>();
1118     if (numServers + numBackupMasters == 1) { // Only one server, nothing fancy we can do here
1119       ServerName server = numServers > 0 ? servers.get(0) : backupMasters.get(0);
1120       assignments.put(server, new ArrayList<HRegionInfo>(regions));
1121       return assignments;
1122     }
1123     List<HRegionInfo> masterRegions = null;
1124     if (numServers > 0 && servers.contains(masterServerName)) {
1125       masterRegions = new ArrayList<HRegionInfo>();
1126       if (numServers == 1) {
1127         // The only server in servers is the master,
1128         // Assign all regions to backup masters
1129         numServers = 0;
1130       }
1131     }
1132 
1133     Cluster cluster = createCluster(servers, regions, backupMasters, tablesOnMaster);
1134     List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>();
1135 
1136     int total = regions.size();
1137     // Get the number of regions to be assigned
1138     // to backup masters based on the weight
1139     int numRegions = total * numBackupMasters
1140       / (numServers * backupMasterWeight + numBackupMasters);
1141     if (numRegions > 0) {
1142       // backupMasters can't be null, according to the formula, numBackupMasters != 0
1143       roundRobinAssignment(cluster, regions, unassignedRegions, 0,
1144         numRegions, backupMasters, masterRegions, assignments);
1145     }
1146     int remainder = total - numRegions;
1147     if (remainder > 0) {
1148       // servers can't be null, or contains the master only since numServers != 0
1149       roundRobinAssignment(cluster, regions, unassignedRegions, numRegions, remainder,
1150         servers, masterRegions, assignments);
1151     }
1152     if (masterRegions != null && !masterRegions.isEmpty()) {
1153       assignments.put(masterServerName, masterRegions);
1154       for (HRegionInfo r : masterRegions) {
1155         cluster.doAssignRegion(r, masterServerName);
1156       }
1157     }
1158     List<HRegionInfo> lastFewRegions = new ArrayList<HRegionInfo>();
1159     // assign the remaining by going through the list and try to assign to servers one-by-one
1160     int serverIdx = RANDOM.nextInt(numServers);
1161     for (HRegionInfo region : unassignedRegions) {
1162       boolean assigned = false;
1163       for (int j = 0; j < numServers; j++) { // try all servers one by one
1164         ServerName serverName = servers.get((j + serverIdx) % numServers);
1165         if (serverName.equals(masterServerName)) {
1166           continue;
1167         }
1168         if (!cluster.wouldLowerAvailability(region, serverName)) {
1169           List<HRegionInfo> serverRegions = assignments.get(serverName);
1170           if (serverRegions == null) {
1171             serverRegions = new ArrayList<HRegionInfo>();
1172             assignments.put(serverName, serverRegions);
1173           }
1174           serverRegions.add(region);
1175           cluster.doAssignRegion(region, serverName);
1176           serverIdx = (j + serverIdx + 1) % numServers; //remain from next server
1177           assigned = true;
1178           break;
1179         }
1180       }
1181       if (!assigned) {
1182         lastFewRegions.add(region);
1183       }
1184     }
1185     // just sprinkle the rest of the regions on random regionservers. The balanceCluster will
1186     // make it optimal later. we can end up with this if numReplicas > numServers.
1187     for (HRegionInfo region : lastFewRegions) {
1188       ServerName server = null;
1189       if (numServers == 0) {
1190         // select from backup masters
1191         int i = RANDOM.nextInt(backupMasters.size());
1192         server = backupMasters.get(i);
1193       } else {
1194         do {
1195           int i = RANDOM.nextInt(numServers);
1196           server = servers.get(i);
1197         } while (numServers > 1 && server.equals(masterServerName));
1198       }
1199       List<HRegionInfo> serverRegions = assignments.get(server);
1200       if (serverRegions == null) {
1201         serverRegions = new ArrayList<HRegionInfo>();
1202         assignments.put(server, serverRegions);
1203       }
1204       serverRegions.add(region);
1205       cluster.doAssignRegion(region, server);
1206     }
1207     return assignments;
1208   }
1209 
1210   protected Cluster createCluster(List<ServerName> servers,
1211       Collection<HRegionInfo> regions, List<ServerName> backupMasters, Set<String> tablesOnMaster) {
1212     // Get the snapshot of the current assignments for the regions in question, and then create
1213     // a cluster out of it. Note that we might have replicas already assigned to some servers
1214     // earlier. So we want to get the snapshot to see those assignments, but this will only contain
1215     // replicas of the regions that are passed (for performance).
1216     Map<ServerName, List<HRegionInfo>> clusterState = getRegionAssignmentsByServer(regions);
1217 
1218     for (ServerName server : servers) {
1219       if (!clusterState.containsKey(server)) {
1220         clusterState.put(server, EMPTY_REGION_LIST);
1221       }
1222     }
1223     return new Cluster(masterServerName, regions, clusterState, null, this.regionFinder, backupMasters,
1224       tablesOnMaster, rackManager);
1225   }
1226 
1227   /**
1228    * Generates an immediate assignment plan to be used by a new master for
1229    * regions in transition that do not have an already known destination.
1230    *
1231    * Takes a list of regions that need immediate assignment and a list of all
1232    * available servers. Returns a map of regions to the server they should be
1233    * assigned to.
1234    *
1235    * This method will return quickly and does not do any intelligent balancing.
1236    * The goal is to make a fast decision not the best decision possible.
1237    *
1238    * Currently this is random.
1239    *
1240    * @param regions
1241    * @param servers
1242    * @return map of regions to the server it should be assigned to
1243    */
1244   @Override
1245   public Map<HRegionInfo, ServerName> immediateAssignment(List<HRegionInfo> regions,
1246       List<ServerName> servers) {
1247     metricsBalancer.incrMiscInvocations();
1248     if (servers == null || servers.isEmpty()) {
1249       LOG.warn("Wanted to do random assignment but no servers to assign to");
1250       return null;
1251     }
1252 
1253     Map<HRegionInfo, ServerName> assignments = new TreeMap<HRegionInfo, ServerName>();
1254     for (HRegionInfo region : regions) {
1255       assignments.put(region, randomAssignment(region, servers));
1256     }
1257     return assignments;
1258   }
1259 
1260   /**
1261    * Used to assign a single region to a random server.
1262    */
1263   @Override
1264   public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
1265     metricsBalancer.incrMiscInvocations();
1266     if (servers == null || servers.isEmpty()) {
1267       LOG.warn("Wanted to do random assignment but no servers to assign to");
1268       return null;
1269     }
1270     List<ServerName> backupMasters = normalizeServers(servers);
1271     List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
1272     Cluster cluster = createCluster(servers, regions, backupMasters, tablesOnMaster);
1273 
1274     return randomAssignment(cluster, regionInfo, servers, backupMasters);
1275   }
1276 
1277   /**
1278    * Generates a bulk assignment startup plan, attempting to reuse the existing
1279    * assignment information from META, but adjusting for the specified list of
1280    * available/online servers available for assignment.
1281    * <p>
1282    * Takes a map of all regions to their existing assignment from META. Also
1283    * takes a list of online servers for regions to be assigned to. Attempts to
1284    * retain all assignment, so in some instances initial assignment will not be
1285    * completely balanced.
1286    * <p>
1287    * Any leftover regions without an existing server to be assigned to will be
1288    * assigned randomly to available servers.
1289    *
1290    * @param regions regions and existing assignment from meta
1291    * @param servers available servers
1292    * @return map of servers and regions to be assigned to them
1293    */
1294   @Override
1295   public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
1296       List<ServerName> servers) {
1297     // Update metrics
1298     metricsBalancer.incrMiscInvocations();
1299     if (regions == null || regions.isEmpty()) {
1300       return null;
1301     }
1302 
1303     List<ServerName> backupMasters = normalizeServers(servers);
1304     int numServers = servers == null ? 0 : servers.size();
1305     int numBackupMasters = backupMasters == null ? 0 : backupMasters.size();
1306     if (numServers == 0 && numBackupMasters == 0) {
1307       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1308       return null;
1309     }
1310     Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>();
1311     if (numServers + numBackupMasters == 1) { // Only one server, nothing fancy we can do here
1312       ServerName server = numServers > 0 ? servers.get(0) : backupMasters.get(0);
1313       assignments.put(server, new ArrayList<HRegionInfo>(regions.keySet()));
1314       return assignments;
1315     }
1316 
1317     // Group all of the old assignments by their hostname.
1318     // We can't group directly by ServerName since the servers all have
1319     // new start-codes.
1320 
1321     // Group the servers by their hostname. It's possible we have multiple
1322     // servers on the same host on different ports.
1323     ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
1324     for (ServerName server : servers) {
1325       assignments.put(server, new ArrayList<HRegionInfo>());
1326       if (!server.equals(masterServerName)) {
1327         serversByHostname.put(server.getHostname(), server);
1328       }
1329     }
1330     if (numBackupMasters > 0) {
1331       for (ServerName server : backupMasters) {
1332         assignments.put(server, new ArrayList<HRegionInfo>());
1333       }
1334     }
1335 
1336     // Collection of the hostnames that used to have regions
1337     // assigned, but for which we no longer have any RS running
1338     // after the cluster restart.
1339     Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
1340 
1341     // Master regionserver is in the server list.
1342     boolean masterIncluded = servers.contains(masterServerName);
1343 
1344     int numRandomAssignments = 0;
1345     int numRetainedAssigments = 0;
1346 
1347     Cluster cluster = createCluster(servers, regions.keySet(), backupMasters, tablesOnMaster);
1348 
1349     for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
1350       HRegionInfo region = entry.getKey();
1351       ServerName oldServerName = entry.getValue();
1352       List<ServerName> localServers = new ArrayList<ServerName>();
1353       if (oldServerName != null) {
1354         localServers = serversByHostname.get(oldServerName.getHostname());
1355       }
1356       if (masterIncluded && shouldBeOnMaster(region)) {
1357         assignments.get(masterServerName).add(region);
1358         if (localServers.contains(masterServerName)) {
1359           numRetainedAssigments++;
1360         } else {
1361           numRandomAssignments++;
1362         }
1363       } else if (localServers.isEmpty()) {
1364         // No servers on the new cluster match up with this hostname,
1365         // assign randomly.
1366         ServerName randomServer = randomAssignment(cluster, region, servers, backupMasters);
1367         assignments.get(randomServer).add(region);
1368         numRandomAssignments++;
1369         if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
1370       } else if (localServers.size() == 1) {
1371         // the usual case - one new server on same host
1372         ServerName target = localServers.get(0);
1373         assignments.get(target).add(region);
1374         cluster.doAssignRegion(region, target);
1375         numRetainedAssigments++;
1376       } else {
1377         // multiple new servers in the cluster on this same host
1378         if (localServers.contains(oldServerName)) {
1379           assignments.get(oldServerName).add(region);
1380           cluster.doAssignRegion(region, oldServerName);
1381         } else {
1382           ServerName target = null;
1383           for (ServerName tmp: localServers) {
1384             if (tmp.getPort() == oldServerName.getPort()) {
1385               target = tmp;
1386               break;
1387             }
1388           }
1389           if (target == null) {
1390             target = randomAssignment(cluster, region, localServers, backupMasters);
1391           }
1392           assignments.get(target).add(region);
1393         }
1394         numRetainedAssigments++;
1395       }
1396     }
1397 
1398     String randomAssignMsg = "";
1399     if (numRandomAssignments > 0) {
1400       randomAssignMsg =
1401           numRandomAssignments + " regions were assigned "
1402               + "to random hosts, since the old hosts for these regions are no "
1403               + "longer present in the cluster. These hosts were:\n  "
1404               + Joiner.on("\n  ").join(oldHostsNoLongerPresent);
1405     }
1406 
1407     LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
1408         + " retained the pre-restart assignment. " + randomAssignMsg);
1409     return assignments;
1410   }
1411 
1412   @Override
1413   public void initialize() throws HBaseIOException{
1414   }
1415 
1416   @Override
1417   public void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1418   }
1419 
1420   @Override
1421   public void regionOffline(HRegionInfo regionInfo) {
1422   }
1423 
1424   @Override
1425   public boolean isStopped() {
1426     return stopped;
1427   }
1428 
1429   @Override
1430   public void stop(String why) {
1431     LOG.info("Load Balancer stop requested: "+why);
1432     stopped = true;
1433   }
1434 
1435   /**
1436    * Prepare the list of target regionservers so that it doesn't
1437    * contain any excluded server, or backup master. Those backup masters
1438    * used to be in the original list are returned.
1439    */
1440   private List<ServerName> normalizeServers(List<ServerName> servers) {
1441     if (servers == null) {
1442       return null;
1443     }
1444     if (!excludedServers.isEmpty()) {
1445       servers.removeAll(excludedServers);
1446     }
1447     Collection<ServerName> allBackupMasters = getBackupMasters();
1448     List<ServerName> backupMasters = null;
1449     if (allBackupMasters != null && !allBackupMasters.isEmpty()) {
1450       for (ServerName server: allBackupMasters) {
1451         if (!servers.contains(server)) {
1452           // Ignore backup masters not included
1453           continue;
1454         }
1455         servers.remove(server);
1456         if (backupMasters == null) {
1457           backupMasters = new ArrayList<ServerName>();
1458         }
1459         backupMasters.add(server);
1460       }
1461     }
1462     return backupMasters;
1463   }
1464 
1465   /**
1466    * Used to assign a single region to a random server. The input should
1467    * have been already normalized: 1) servers doesn't include any exclude sever,
1468    * 2) servers doesn't include any backup master, 3) backupMasters contains
1469    * only backup masters that are intended to host this region, i.e, it
1470    * may not have all the backup masters.
1471    */
1472   private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo,
1473       List<ServerName> servers, List<ServerName> backupMasters) {
1474     int numServers = servers == null ? 0 : servers.size();
1475     int numBackupMasters = backupMasters == null ? 0 : backupMasters.size();
1476     if (numServers == 0 && numBackupMasters == 0) {
1477       LOG.warn("Wanted to do random assignment but no servers to assign to");
1478       return null;
1479     }
1480     if (servers != null && shouldBeOnMaster(regionInfo)
1481         && servers.contains(masterServerName)) {
1482       return masterServerName;
1483     }
1484     ServerName sn = null;
1485     final int maxIterations = servers.size() * 4;
1486     int iterations = 0;
1487 
1488     do {
1489       // Generate a random number weighted more towards
1490       // regular regionservers instead of backup masters.
1491       // This formula is chosen for simplicity.
1492       int i = RANDOM.nextInt(
1493         numBackupMasters + numServers * backupMasterWeight);
1494       if (i < numBackupMasters) {
1495         sn = backupMasters.get(i);
1496         continue;
1497       }
1498       i = (i - numBackupMasters)/backupMasterWeight;
1499       sn = servers.get(i);
1500       if (sn.equals(masterServerName)) {
1501         // Try to avoid master for a user region
1502         if (numServers > 1) {
1503           i = (i == 0 ? 1 : i - 1);
1504           sn = servers.get(i);
1505         } else if (numBackupMasters > 0) {
1506           sn = backupMasters.get(0);
1507         }
1508       }
1509     } while (cluster.wouldLowerAvailability(regionInfo, sn)
1510         && iterations++ < maxIterations);
1511     cluster.doAssignRegion(regionInfo, sn);
1512     return sn;
1513   }
1514 
1515   /**
1516    * Round robin a chunk of a list of regions to a list of servers
1517    */
1518   private void roundRobinAssignment(Cluster cluster, List<HRegionInfo> regions,
1519       List<HRegionInfo> unassignedRegions, int offset,
1520       int numRegions, List<ServerName> servers, List<HRegionInfo> masterRegions,
1521       Map<ServerName, List<HRegionInfo>> assignments) {
1522 
1523     boolean masterIncluded = servers.contains(masterServerName);
1524     int numServers = servers.size();
1525     int skipServers = numServers;
1526     if (masterIncluded) {
1527       skipServers--;
1528     }
1529     int max = (int) Math.ceil((float) numRegions / skipServers);
1530     int serverIdx = 0;
1531     if (numServers > 1) {
1532       serverIdx = RANDOM.nextInt(numServers);
1533     }
1534     int regionIdx = 0;
1535 
1536     for (int j = 0; j < numServers; j++) {
1537       ServerName server = servers.get((j + serverIdx) % numServers);
1538       if (masterIncluded && server.equals(masterServerName)) {
1539         // Don't put non-special region on the master regionserver,
1540         // So that it is not overloaded.
1541         continue;
1542       }
1543       List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
1544       for (int i = regionIdx; i < numRegions; i += skipServers) {
1545         HRegionInfo region = regions.get(offset + i % numRegions);
1546         if (masterRegions == null || !shouldBeOnMaster(region)) {
1547           if (cluster.wouldLowerAvailability(region, server)) {
1548             unassignedRegions.add(region);
1549           } else {
1550             serverRegions.add(region);
1551             cluster.doAssignRegion(region, server);
1552           }
1553           continue;
1554         }
1555         // Master is in the list and this is a special region
1556         masterRegions.add(region);
1557       }
1558       assignments.put(server, serverRegions);
1559       regionIdx++;
1560     }
1561   }
1562 
1563   protected Map<ServerName, List<HRegionInfo>> getRegionAssignmentsByServer(
1564     Collection<HRegionInfo> regions) {
1565     if (this.services != null && this.services.getAssignmentManager() != null) {
1566       return this.services.getAssignmentManager().getSnapShotOfAssignment(regions);
1567     } else {
1568       return new HashMap<ServerName, List<HRegionInfo>>();
1569     }
1570   }
1571 }