View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Collection;
23  import java.util.Collections;
24  import java.util.Comparator;
25  import java.util.Deque;
26  import java.util.HashMap;
27  import java.util.HashSet;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.Map.Entry;
32  import java.util.NavigableMap;
33  import java.util.Random;
34  import java.util.Set;
35  import java.util.TreeMap;
36  
37  import org.apache.commons.lang.NotImplementedException;
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.conf.Configuration;
41  import org.apache.hadoop.hbase.ClusterStatus;
42  import org.apache.hadoop.hbase.HBaseIOException;
43  import org.apache.hadoop.hbase.HRegionInfo;
44  import org.apache.hadoop.hbase.RegionLoad;
45  import org.apache.hadoop.hbase.ServerName;
46  import org.apache.hadoop.hbase.TableName;
47  import org.apache.hadoop.hbase.master.AssignmentManager;
48  import org.apache.hadoop.hbase.master.LoadBalancer;
49  import org.apache.hadoop.hbase.master.MasterServices;
50  import org.apache.hadoop.hbase.master.RegionPlan;
51  import org.apache.hadoop.hbase.security.access.AccessControlLists;
52  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
53  import org.apache.hadoop.hbase.master.RackManager;
54  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
55  
56  import com.google.common.base.Joiner;
57  import com.google.common.collect.ArrayListMultimap;
58  import com.google.common.collect.Lists;
59  import com.google.common.collect.Sets;
60  
61  /**
62   * The base class for load balancers. It provides the the functions used to by
63   * {@link AssignmentManager} to assign regions in the edge cases. It doesn't
64   * provide an implementation of the actual balancing algorithm.
65   *
66   */
67  public abstract class BaseLoadBalancer implements LoadBalancer {
68    private static final int MIN_SERVER_BALANCE = 2;
69    private volatile boolean stopped = false;
70  
71    private static final List<HRegionInfo> EMPTY_REGION_LIST = new ArrayList<HRegionInfo>(0);
72  
73    protected final RegionLocationFinder regionFinder = new RegionLocationFinder();
74  
75    private static class DefaultRackManager extends RackManager {
76      @Override
77      public String getRack(ServerName server) {
78        return UNKNOWN_RACK;
79      }
80    }
81  
82    /**
83     * An efficient array based implementation similar to ClusterState for keeping
84     * the status of the cluster in terms of region assignment and distribution.
85     * LoadBalancers, such as StochasticLoadBalancer uses this Cluster object because of
86     * hundreds of thousands of hashmap manipulations are very costly, which is why this
87     * class uses mostly indexes and arrays.
88     *
89     * Cluster tracks a list of unassigned regions, region assignments, and the server
90     * topology in terms of server names, hostnames and racks.
91     */
92    protected static class Cluster {
93      ServerName masterServerName;
94      Set<String> tablesOnMaster;
95      ServerName[] servers;
96      String[] hosts; // ServerName uniquely identifies a region server. multiple RS can run on the same host
97      String[] racks;
98      boolean multiServersPerHost = false; // whether or not any host has more than one server
99  
100     ArrayList<String> tables;
101     HRegionInfo[] regions;
102     Deque<RegionLoad>[] regionLoads;
103     boolean[] backupMasterFlags;
104     int activeMasterIndex = -1;
105 
106     int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality
107 
108     int[]   serverIndexToHostIndex;      //serverIndex -> host index
109     int[]   serverIndexToRackIndex;      //serverIndex -> rack index
110 
111     int[][] regionsPerServer;            //serverIndex -> region list
112     int[][] regionsPerHost;              //hostIndex -> list of regions
113     int[][] regionsPerRack;              //rackIndex -> region list
114     int[][] primariesOfRegionsPerServer; //serverIndex -> sorted list of regions by primary region index
115     int[][] primariesOfRegionsPerHost;   //hostIndex -> sorted list of regions by primary region index
116     int[][] primariesOfRegionsPerRack;   //rackIndex -> sorted list of regions by primary region index
117 
118     int[][] serversPerHost;              //hostIndex -> list of server indexes
119     int[][] serversPerRack;              //rackIndex -> list of server indexes
120     int[]   regionIndexToServerIndex;    //regionIndex -> serverIndex
121     int[]   initialRegionIndexToServerIndex;    //regionIndex -> serverIndex (initial cluster state)
122     int[]   regionIndexToTableIndex;     //regionIndex -> tableIndex
123     int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions
124     int[]   numMaxRegionsPerTable;       //tableIndex -> max number of regions in a single RS
125     int     numUserRegionsOnMaster;      //number of user regions on the active master
126     int[]   regionIndexToPrimaryIndex;   //regionIndex -> regionIndex of the primary
127     boolean hasRegionReplicas = false;   //whether there is regions with replicas
128 
129     Integer[] serverIndicesSortedByRegionCount;
130 
131     Map<String, Integer> serversToIndex;
132     Map<String, Integer> hostsToIndex;
133     Map<String, Integer> racksToIndex;
134     Map<String, Integer> tablesToIndex;
135     Map<HRegionInfo, Integer> regionsToIndex;
136 
137     int numServers;
138     int numHosts;
139     int numRacks;
140     int numTables;
141     int numRegions;
142 
143     int numMovedRegions = 0; //num moved regions from the initial configuration
144     // num of moved regions away from master that should be on the master
145     int numMovedMasterHostedRegions = 0;
146     int numMovedMetaRegions = 0;       //num of moved regions that are META
147     Map<ServerName, List<HRegionInfo>> clusterState;
148 
149     protected final RackManager rackManager;
150 
151     protected Cluster(
152         ServerName masterServerName,
153         Map<ServerName, List<HRegionInfo>> clusterState,
154         Map<String, Deque<RegionLoad>> loads,
155         RegionLocationFinder regionFinder,
156         Collection<ServerName> backupMasters,
157         Set<String> tablesOnMaster,
158         RackManager rackManager) {
159       this(masterServerName, null, clusterState, loads, regionFinder, backupMasters,
160         tablesOnMaster, rackManager);
161     }
162 
163     protected Cluster(
164         ServerName masterServerName,
165         Collection<HRegionInfo> unassignedRegions,
166         Map<ServerName, List<HRegionInfo>> clusterState,
167         Map<String, Deque<RegionLoad>> loads,
168         RegionLocationFinder regionFinder,
169         Collection<ServerName> backupMasters,
170         Set<String> tablesOnMaster,
171         RackManager rackManager) {
172 
173       if (unassignedRegions == null) {
174         unassignedRegions = EMPTY_REGION_LIST;
175       }
176 
177       this.masterServerName = masterServerName;
178       this.tablesOnMaster = tablesOnMaster;
179 
180       serversToIndex = new HashMap<String, Integer>();
181       hostsToIndex = new HashMap<String, Integer>();
182       racksToIndex = new HashMap<String, Integer>();
183       tablesToIndex = new HashMap<String, Integer>();
184 
185       //TODO: We should get the list of tables from master
186       tables = new ArrayList<String>();
187       this.rackManager = rackManager != null ? rackManager : new DefaultRackManager();
188 
189       numRegions = 0;
190 
191       List<List<Integer>> serversPerHostList = new ArrayList<List<Integer>>();
192       List<List<Integer>> serversPerRackList = new ArrayList<List<Integer>>();
193       this.clusterState = clusterState;
194 
195       // Use servername and port as there can be dead servers in this list. We want everything with
196       // a matching hostname and port to have the same index.
197       for (ServerName sn : clusterState.keySet()) {
198         if (serversToIndex.get(sn.getHostAndPort()) == null) {
199           serversToIndex.put(sn.getHostAndPort(), numServers++);
200         }
201         if (!hostsToIndex.containsKey(sn.getHostname())) {
202           hostsToIndex.put(sn.getHostname(), numHosts++);
203           serversPerHostList.add(new ArrayList<Integer>(1));
204         }
205 
206         int serverIndex = serversToIndex.get(sn.getHostAndPort());
207         int hostIndex = hostsToIndex.get(sn.getHostname());
208         serversPerHostList.get(hostIndex).add(serverIndex);
209 
210         String rack = this.rackManager.getRack(sn);
211         if (!racksToIndex.containsKey(rack)) {
212           racksToIndex.put(rack, numRacks++);
213           serversPerRackList.add(new ArrayList<Integer>());
214         }
215         int rackIndex = racksToIndex.get(rack);
216         serversPerRackList.get(rackIndex).add(serverIndex);
217       }
218 
219       // Count how many regions there are.
220       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
221         numRegions += entry.getValue().size();
222       }
223       numRegions += unassignedRegions.size();
224 
225       regionsToIndex = new HashMap<HRegionInfo, Integer>(numRegions);
226       servers = new ServerName[numServers];
227       serversPerHost = new int[numHosts][];
228       serversPerRack = new int[numRacks][];
229       regions = new HRegionInfo[numRegions];
230       regionIndexToServerIndex = new int[numRegions];
231       initialRegionIndexToServerIndex = new int[numRegions];
232       regionIndexToTableIndex = new int[numRegions];
233       regionIndexToPrimaryIndex = new int[numRegions];
234       regionLoads = new Deque[numRegions];
235       regionLocations = new int[numRegions][];
236       serverIndicesSortedByRegionCount = new Integer[numServers];
237       backupMasterFlags = new boolean[numServers];
238 
239       serverIndexToHostIndex = new int[numServers];
240       serverIndexToRackIndex = new int[numServers];
241       regionsPerServer = new int[numServers][];
242       regionsPerHost = new int[numHosts][];
243       regionsPerRack = new int[numRacks][];
244       primariesOfRegionsPerServer = new int[numServers][];
245       primariesOfRegionsPerHost = new int[numHosts][];
246       primariesOfRegionsPerRack = new int[numRacks][];
247 
248       int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
249 
250       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
251         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
252 
253         // keep the servername if this is the first server name for this hostname
254         // or this servername has the newest startcode.
255         if (servers[serverIndex] == null ||
256             servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
257           servers[serverIndex] = entry.getKey();
258           backupMasterFlags[serverIndex] = backupMasters != null
259             && backupMasters.contains(servers[serverIndex]);
260         }
261 
262         if (regionsPerServer[serverIndex] != null) {
263           // there is another server with the same hostAndPort in ClusterState.
264           // allocate the array for the total size
265           regionsPerServer[serverIndex] = new int[entry.getValue().size() + regionsPerServer[serverIndex].length];
266         } else {
267           regionsPerServer[serverIndex] = new int[entry.getValue().size()];
268         }
269         primariesOfRegionsPerServer[serverIndex] = new int[regionsPerServer[serverIndex].length];
270         serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
271 
272         if (servers[serverIndex].equals(masterServerName)) {
273           activeMasterIndex = serverIndex;
274           for (HRegionInfo hri: entry.getValue()) {
275             if (!shouldBeOnMaster(hri)) {
276               numUserRegionsOnMaster++;
277             }
278           }
279         }
280       }
281 
282       hosts = new String[numHosts];
283       for (Entry<String, Integer> entry : hostsToIndex.entrySet()) {
284         hosts[entry.getValue()] = entry.getKey();
285       }
286       racks = new String[numRacks];
287       for (Entry<String, Integer> entry : racksToIndex.entrySet()) {
288         racks[entry.getValue()] = entry.getKey();
289       }
290 
291       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
292         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
293         regionPerServerIndex = 0;
294 
295         int hostIndex = hostsToIndex.get(entry.getKey().getHostname());
296         serverIndexToHostIndex[serverIndex] = hostIndex;
297 
298         int rackIndex = racksToIndex.get(this.rackManager.getRack(entry.getKey()));
299         serverIndexToRackIndex[serverIndex] = rackIndex;
300 
301         for (HRegionInfo region : entry.getValue()) {
302           registerRegion(region, regionIndex, serverIndex, loads, regionFinder);
303 
304           regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
305           regionIndex++;
306         }
307       }
308       for (HRegionInfo region : unassignedRegions) {
309         registerRegion(region, regionIndex, -1, loads, regionFinder);
310         regionIndex++;
311       }
312 
313       for (int i = 0; i < serversPerHostList.size(); i++) {
314         serversPerHost[i] = new int[serversPerHostList.get(i).size()];
315         for (int j = 0; j < serversPerHost[i].length; j++) {
316           serversPerHost[i][j] = serversPerHostList.get(i).get(j);
317         }
318         if (serversPerHost[i].length > 1) {
319           multiServersPerHost = true;
320         }
321       }
322 
323       for (int i = 0; i < serversPerRackList.size(); i++) {
324         serversPerRack[i] = new int[serversPerRackList.get(i).size()];
325         for (int j = 0; j < serversPerRack[i].length; j++) {
326           serversPerRack[i][j] = serversPerRackList.get(i).get(j);
327         }
328       }
329 
330       numTables = tables.size();
331       numRegionsPerServerPerTable = new int[numServers][numTables];
332 
333       for (int i = 0; i < numServers; i++) {
334         for (int j = 0; j < numTables; j++) {
335           numRegionsPerServerPerTable[i][j] = 0;
336         }
337       }
338 
339       for (int i=0; i < regionIndexToServerIndex.length; i++) {
340         if (regionIndexToServerIndex[i] >= 0) {
341           numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
342         }
343       }
344 
345       numMaxRegionsPerTable = new int[numTables];
346       for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
347         for (tableIndex = 0 ; tableIndex < numRegionsPerServerPerTable[serverIndex].length; tableIndex++) {
348           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
349             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
350           }
351         }
352       }
353 
354       for (int i = 0; i < regions.length; i ++) {
355         HRegionInfo info = regions[i];
356         if (RegionReplicaUtil.isDefaultReplica(info)) {
357           regionIndexToPrimaryIndex[i] = i;
358         } else {
359           hasRegionReplicas = true;
360           HRegionInfo primaryInfo = RegionReplicaUtil.getRegionInfoForDefaultReplica(info);
361           regionIndexToPrimaryIndex[i] =
362               regionsToIndex.containsKey(primaryInfo) ?
363               regionsToIndex.get(primaryInfo):
364               -1;
365         }
366       }
367 
368       for (int i = 0; i < regionsPerServer.length; i++) {
369         primariesOfRegionsPerServer[i] = new int[regionsPerServer[i].length];
370         for (int j = 0; j < regionsPerServer[i].length; j++) {
371           int primaryIndex = regionIndexToPrimaryIndex[regionsPerServer[i][j]];
372           primariesOfRegionsPerServer[i][j] = primaryIndex;
373         }
374         // sort the regions by primaries.
375         Arrays.sort(primariesOfRegionsPerServer[i]);
376       }
377 
378       // compute regionsPerHost
379       if (multiServersPerHost) {
380         for (int i = 0 ; i < serversPerHost.length; i++) {
381           int numRegionsPerHost = 0;
382           for (int j = 0; j < serversPerHost[i].length; j++) {
383             numRegionsPerHost += regionsPerServer[serversPerHost[i][j]].length;
384           }
385           regionsPerHost[i] = new int[numRegionsPerHost];
386           primariesOfRegionsPerHost[i] = new int[numRegionsPerHost];
387         }
388         for (int i = 0 ; i < serversPerHost.length; i++) {
389           int numRegionPerHostIndex = 0;
390           for (int j = 0; j < serversPerHost[i].length; j++) {
391             for (int k = 0; k < regionsPerServer[serversPerHost[i][j]].length; k++) {
392               int region = regionsPerServer[serversPerHost[i][j]][k];
393               regionsPerHost[i][numRegionPerHostIndex] = region;
394               int primaryIndex = regionIndexToPrimaryIndex[region];
395               primariesOfRegionsPerHost[i][numRegionPerHostIndex] = primaryIndex;
396               numRegionPerHostIndex++;
397             }
398           }
399           // sort the regions by primaries.
400           Arrays.sort(primariesOfRegionsPerHost[i]);
401         }
402       }
403 
404       // compute regionsPerRack
405       if (numRacks > 1) {
406         for (int i = 0 ; i < serversPerRack.length; i++) {
407           int numRegionsPerRack = 0;
408           for (int j = 0; j < serversPerRack[i].length; j++) {
409             numRegionsPerRack += regionsPerServer[serversPerRack[i][j]].length;
410           }
411           regionsPerRack[i] = new int[numRegionsPerRack];
412           primariesOfRegionsPerRack[i] = new int[numRegionsPerRack];
413         }
414 
415         for (int i = 0 ; i < serversPerRack.length; i++) {
416           int numRegionPerRackIndex = 0;
417           for (int j = 0; j < serversPerRack[i].length; j++) {
418             for (int k = 0; k < regionsPerServer[serversPerRack[i][j]].length; k++) {
419               int region = regionsPerServer[serversPerRack[i][j]][k];
420               regionsPerRack[i][numRegionPerRackIndex] = region;
421               int primaryIndex = regionIndexToPrimaryIndex[region];
422               primariesOfRegionsPerRack[i][numRegionPerRackIndex] = primaryIndex;
423               numRegionPerRackIndex++;
424             }
425           }
426           // sort the regions by primaries.
427           Arrays.sort(primariesOfRegionsPerRack[i]);
428         }
429       }
430     }
431 
432     /** Helper for Cluster constructor to handle a region */
433     private void registerRegion(HRegionInfo region, int regionIndex, int serverIndex,
434         Map<String, Deque<RegionLoad>> loads, RegionLocationFinder regionFinder) {
435       String tableName = region.getTable().getNameAsString();
436       if (!tablesToIndex.containsKey(tableName)) {
437         tables.add(tableName);
438         tablesToIndex.put(tableName, tablesToIndex.size());
439       }
440       int tableIndex = tablesToIndex.get(tableName);
441 
442       regionsToIndex.put(region, regionIndex);
443       regions[regionIndex] = region;
444       regionIndexToServerIndex[regionIndex] = serverIndex;
445       initialRegionIndexToServerIndex[regionIndex] = serverIndex;
446       regionIndexToTableIndex[regionIndex] = tableIndex;
447 
448       // region load
449       if (loads != null) {
450         Deque<RegionLoad> rl = loads.get(region.getRegionNameAsString());
451         // That could have failed if the RegionLoad is using the other regionName
452         if (rl == null) {
453           // Try getting the region load using encoded name.
454           rl = loads.get(region.getEncodedName());
455         }
456         regionLoads[regionIndex] = rl;
457       }
458 
459       if (regionFinder != null) {
460         //region location
461         List<ServerName> loc = regionFinder.getTopBlockLocations(region);
462         regionLocations[regionIndex] = new int[loc.size()];
463         for (int i=0; i < loc.size(); i++) {
464           regionLocations[regionIndex][i] =
465               loc.get(i) == null ? -1 :
466                 (serversToIndex.get(loc.get(i).getHostAndPort()) == null ? -1
467                     : serversToIndex.get(loc.get(i).getHostAndPort()));
468         }
469       }
470     }
471 
472     /** An action to move or swap a region */
473     public static class Action {
474       public static enum Type {
475         ASSIGN_REGION,
476         MOVE_REGION,
477         SWAP_REGIONS,
478         NULL,
479       }
480 
481       public Type type;
482       public Action (Type type) {this.type = type;}
483       /** Returns an Action which would undo this action */
484       public Action undoAction() { return this; }
485       @Override
486       public String toString() { return type + ":";}
487     }
488 
489     public static class AssignRegionAction extends Action {
490       public int region;
491       public int server;
492       public AssignRegionAction(int region, int server) {
493         super(Type.ASSIGN_REGION);
494         this.region = region;
495         this.server = server;
496       }
497       @Override
498       public Action undoAction() {
499         // TODO implement this. This action is not being used by the StochasticLB for now
500         // in case it uses it, we should implement this function.
501         throw new NotImplementedException();
502       }
503       @Override
504       public String toString() {
505         return type + ": " + region + ":" + server;
506       }
507     }
508 
509     public static class MoveRegionAction extends Action {
510       public int region;
511       public int fromServer;
512       public int toServer;
513 
514       public MoveRegionAction(int region, int fromServer, int toServer) {
515         super(Type.MOVE_REGION);
516         this.fromServer = fromServer;
517         this.region = region;
518         this.toServer = toServer;
519       }
520       @Override
521       public Action undoAction() {
522         return new MoveRegionAction (region, toServer, fromServer);
523       }
524       @Override
525       public String toString() {
526         return type + ": " + region + ":" + fromServer + " -> " + toServer;
527       }
528     }
529 
530     public static class SwapRegionsAction extends Action {
531       public int fromServer;
532       public int fromRegion;
533       public int toServer;
534       public int toRegion;
535       public SwapRegionsAction(int fromServer, int fromRegion, int toServer, int toRegion) {
536         super(Type.SWAP_REGIONS);
537         this.fromServer = fromServer;
538         this.fromRegion = fromRegion;
539         this.toServer = toServer;
540         this.toRegion = toRegion;
541       }
542       @Override
543       public Action undoAction() {
544         return new SwapRegionsAction (fromServer, toRegion, toServer, fromRegion);
545       }
546       @Override
547       public String toString() {
548         return type + ": " + fromRegion + ":" + fromServer + " <-> " + toRegion + ":" + toServer;
549       }
550     }
551 
552     public static Action NullAction = new Action(Type.NULL);
553 
554     public void doAction(Action action) {
555       switch (action.type) {
556       case NULL: break;
557       case ASSIGN_REGION:
558         AssignRegionAction ar = (AssignRegionAction) action;
559         regionsPerServer[ar.server] = addRegion(regionsPerServer[ar.server], ar.region);
560         regionMoved(ar.region, -1, ar.server);
561         break;
562       case MOVE_REGION:
563         MoveRegionAction mra = (MoveRegionAction) action;
564         regionsPerServer[mra.fromServer] = removeRegion(regionsPerServer[mra.fromServer], mra.region);
565         regionsPerServer[mra.toServer] = addRegion(regionsPerServer[mra.toServer], mra.region);
566         regionMoved(mra.region, mra.fromServer, mra.toServer);
567         break;
568       case SWAP_REGIONS:
569         SwapRegionsAction a = (SwapRegionsAction) action;
570         regionsPerServer[a.fromServer] = replaceRegion(regionsPerServer[a.fromServer], a.fromRegion, a.toRegion);
571         regionsPerServer[a.toServer] = replaceRegion(regionsPerServer[a.toServer], a.toRegion, a.fromRegion);
572         regionMoved(a.fromRegion, a.fromServer, a.toServer);
573         regionMoved(a.toRegion, a.toServer, a.fromServer);
574         break;
575       default:
576         throw new RuntimeException("Uknown action:" + action.type);
577       }
578     }
579 
580     /**
581      * Return true if the placement of region on server would lower the availability
582      * of the region in question
583      * @param server
584      * @param region
585      * @return true or false
586      */
587     boolean wouldLowerAvailability(HRegionInfo regionInfo, ServerName serverName) {
588       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
589         return false; // safeguard against race between cluster.servers and servers from LB method args
590       }
591       int server = serversToIndex.get(serverName.getHostAndPort());
592       int region = regionsToIndex.get(regionInfo);
593 
594       int primary = regionIndexToPrimaryIndex[region];
595 
596       // there is a subset relation for server < host < rack
597       // check server first
598 
599       if (contains(primariesOfRegionsPerServer[server], primary)) {
600         // check for whether there are other servers that we can place this region
601         for (int i = 0; i < primariesOfRegionsPerServer.length; i++) {
602           if (i != server && !contains(primariesOfRegionsPerServer[i], primary)) {
603             return true; // meaning there is a better server
604           }
605         }
606         return false; // there is not a better server to place this
607       }
608 
609       // check host
610       if (multiServersPerHost) { // these arrays would only be allocated if we have more than one server per host
611         int host = serverIndexToHostIndex[server];
612         if (contains(primariesOfRegionsPerHost[host], primary)) {
613           // check for whether there are other hosts that we can place this region
614           for (int i = 0; i < primariesOfRegionsPerHost.length; i++) {
615             if (i != host && !contains(primariesOfRegionsPerHost[i], primary)) {
616               return true; // meaning there is a better host
617             }
618           }
619           return false; // there is not a better host to place this
620         }
621       }
622 
623       // check rack
624       if (numRacks > 1) {
625         int rack = serverIndexToRackIndex[server];
626         if (contains(primariesOfRegionsPerRack[rack], primary)) {
627           // check for whether there are other racks that we can place this region
628           for (int i = 0; i < primariesOfRegionsPerRack.length; i++) {
629             if (i != rack && !contains(primariesOfRegionsPerRack[i], primary)) {
630               return true; // meaning there is a better rack
631             }
632           }
633           return false; // there is not a better rack to place this
634         }
635       }
636       return false;
637     }
638 
639     void doAssignRegion(HRegionInfo regionInfo, ServerName serverName) {
640       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
641         return;
642       }
643       int server = serversToIndex.get(serverName.getHostAndPort());
644       int region = regionsToIndex.get(regionInfo);
645       doAction(new AssignRegionAction(region, server));
646     }
647 
648     void regionMoved(int region, int oldServer, int newServer) {
649       regionIndexToServerIndex[region] = newServer;
650       if (initialRegionIndexToServerIndex[region] == newServer) {
651         numMovedRegions--; //region moved back to original location
652         if (shouldBeOnMaster(regions[region]) && isActiveMaster(newServer)) {
653           //Master hosted region moved back to the active master
654           numMovedMasterHostedRegions--;
655         }
656       } else if (oldServer >= 0 && initialRegionIndexToServerIndex[region] == oldServer) {
657         numMovedRegions++; //region moved from original location
658         if (shouldBeOnMaster(regions[region]) && isActiveMaster(oldServer)) {
659           // Master hosted region moved away from active the master
660           numMovedMasterHostedRegions++;
661         }
662       }
663       int tableIndex = regionIndexToTableIndex[region];
664       if (oldServer >= 0) {
665         numRegionsPerServerPerTable[oldServer][tableIndex]--;
666       }
667       numRegionsPerServerPerTable[newServer][tableIndex]++;
668 
669       //check whether this caused maxRegionsPerTable in the new Server to be updated
670       if (numRegionsPerServerPerTable[newServer][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
671         numRegionsPerServerPerTable[newServer][tableIndex] = numMaxRegionsPerTable[tableIndex];
672       } else if (oldServer >= 0 && (numRegionsPerServerPerTable[oldServer][tableIndex] + 1)
673           == numMaxRegionsPerTable[tableIndex]) {
674         //recompute maxRegionsPerTable since the previous value was coming from the old server
675         for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
676           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
677             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
678           }
679         }
680       }
681 
682       // update for servers
683       int primary = regionIndexToPrimaryIndex[region];
684       if (oldServer >= 0) {
685         primariesOfRegionsPerServer[oldServer] = removeRegion(
686           primariesOfRegionsPerServer[oldServer], primary);
687       }
688       primariesOfRegionsPerServer[newServer] = addRegionSorted(
689         primariesOfRegionsPerServer[newServer], primary);
690 
691       // update for hosts
692       if (multiServersPerHost) {
693         int oldHost = oldServer >= 0 ? serverIndexToHostIndex[oldServer] : -1;
694         int newHost = serverIndexToHostIndex[newServer];
695         if (newHost != oldHost) {
696           regionsPerHost[newHost] = addRegion(regionsPerHost[newHost], region);
697           primariesOfRegionsPerHost[newHost] = addRegionSorted(primariesOfRegionsPerHost[newHost], primary);
698           if (oldHost >= 0) {
699             regionsPerHost[oldHost] = removeRegion(regionsPerHost[oldHost], region);
700             primariesOfRegionsPerHost[oldHost] = removeRegion(
701               primariesOfRegionsPerHost[oldHost], primary); // will still be sorted
702           }
703         }
704       }
705 
706       // update for racks
707       if (numRacks > 1) {
708         int oldRack = oldServer >= 0 ? serverIndexToRackIndex[oldServer] : -1;
709         int newRack = serverIndexToRackIndex[newServer];
710         if (newRack != oldRack) {
711           regionsPerRack[newRack] = addRegion(regionsPerRack[newRack], region);
712           primariesOfRegionsPerRack[newRack] = addRegionSorted(primariesOfRegionsPerRack[newRack], primary);
713           if (oldRack >= 0) {
714             regionsPerRack[oldRack] = removeRegion(regionsPerRack[oldRack], region);
715             primariesOfRegionsPerRack[oldRack] = removeRegion(
716               primariesOfRegionsPerRack[oldRack], primary); // will still be sorted
717           }
718         }
719       }
720       if (oldServer >= 0 && isActiveMaster(oldServer)) {
721         if (!shouldBeOnMaster(regions[region])) {
722           numUserRegionsOnMaster--;
723         }
724       } else if (isActiveMaster(newServer)) {
725         if (!shouldBeOnMaster(regions[region])) {
726           numUserRegionsOnMaster++;
727         }
728       }
729     }
730 
731     int[] removeRegion(int[] regions, int regionIndex) {
732       //TODO: this maybe costly. Consider using linked lists
733       int[] newRegions = new int[regions.length - 1];
734       int i = 0;
735       for (i = 0; i < regions.length; i++) {
736         if (regions[i] == regionIndex) {
737           break;
738         }
739         newRegions[i] = regions[i];
740       }
741       System.arraycopy(regions, i+1, newRegions, i, newRegions.length - i);
742       return newRegions;
743     }
744 
745     int[] addRegion(int[] regions, int regionIndex) {
746       int[] newRegions = new int[regions.length + 1];
747       System.arraycopy(regions, 0, newRegions, 0, regions.length);
748       newRegions[newRegions.length - 1] = regionIndex;
749       return newRegions;
750     }
751 
752     int[] addRegionSorted(int[] regions, int regionIndex) {
753       int[] newRegions = new int[regions.length + 1];
754       int i = 0;
755       for (i = 0; i < regions.length; i++) { // find the index to insert
756         if (regions[i] > regionIndex) {
757           break;
758         }
759       }
760       System.arraycopy(regions, 0, newRegions, 0, i); // copy first half
761       System.arraycopy(regions, i, newRegions, i+1, regions.length - i); // copy second half
762       newRegions[i] = regionIndex;
763 
764       return newRegions;
765     }
766 
767     int[] replaceRegion(int[] regions, int regionIndex, int newRegionIndex) {
768       int i = 0;
769       for (i = 0; i < regions.length; i++) {
770         if (regions[i] == regionIndex) {
771           regions[i] = newRegionIndex;
772           break;
773         }
774       }
775       return regions;
776     }
777 
778     void sortServersByRegionCount() {
779       Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
780     }
781 
782     int getNumRegions(int server) {
783       return regionsPerServer[server].length;
784     }
785 
786     boolean isBackupMaster(int server) {
787       return backupMasterFlags[server];
788     }
789 
790     boolean isActiveMaster(int server) {
791       return activeMasterIndex == server;
792     }
793 
794     boolean shouldBeOnMaster(HRegionInfo region) {
795       return tablesOnMaster != null && tablesOnMaster.contains(
796         region.getTable().getNameAsString());
797     }
798 
799     boolean contains(int[] arr, int val) {
800       return Arrays.binarySearch(arr, val) >= 0;
801     }
802 
803     private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
804       @Override
805       public int compare(Integer integer, Integer integer2) {
806         return Integer.valueOf(getNumRegions(integer)).compareTo(getNumRegions(integer2));
807       }
808     };
809 
810     @Override
811     public String toString() {
812       String desc = "Cluster{" +
813           "servers=[";
814           for(ServerName sn:servers) {
815              desc += sn.getHostAndPort() + ", ";
816           }
817           desc +=
818           ", serverIndicesSortedByRegionCount="+
819           Arrays.toString(serverIndicesSortedByRegionCount) +
820           ", regionsPerServer=[";
821 
822           for (int[]r:regionsPerServer) {
823             desc += Arrays.toString(r);
824           }
825           desc += "]" +
826           ", numMaxRegionsPerTable=" +
827           Arrays.toString(numMaxRegionsPerTable) +
828           ", numRegions=" +
829           numRegions +
830           ", numServers=" +
831           numServers +
832           ", numTables=" +
833           numTables +
834           ", numMovedRegions=" +
835           numMovedRegions +
836           ", numMovedMasterHostedRegions=" +
837           numMovedMasterHostedRegions +
838           '}';
839       return desc;
840     }
841   }
842 
843   // slop for regions
844   protected float slop;
845   protected Configuration config;
846   protected RackManager rackManager;
847   private static final Random RANDOM = new Random(System.currentTimeMillis());
848   private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
849 
850   // The weight means that each region on the active/backup master is
851   // equal to that many regions on a normal regionserver, in calculating
852   // the region load by the load balancer. So that the active/backup master
853   // can host less (or equal if weight = 1) regions than normal regionservers.
854   //
855   // The weight can be used to control the number of regions on backup
856   // masters, which shouldn't host as many regions as normal regionservers.
857   // So that we don't need to move around too many regions when a
858   // backup master becomes the active one.
859   //
860   // Currently, the active master weight is used only by StockasticLoadBalancer.
861   // Generally, we don't put any user regions on the active master, which
862   // only hosts regions of tables defined in TABLES_ON_MASTER.
863   // That's why the default activeMasterWeight is high.
864   public static final String BACKUP_MASTER_WEIGHT_KEY =
865     "hbase.balancer.backupMasterWeight";
866   public static final int DEFAULT_BACKUP_MASTER_WEIGHT = 1;
867 
868   private static final String ACTIVE_MASTER_WEIGHT_KEY =
869     "hbase.balancer.activeMasterWeight";
870   private static final int DEFAULT_ACTIVE_MASTER_WEIGHT = 200;
871 
872   // Regions of these tables are put on the master by default.
873   private static final String[] DEFAULT_TABLES_ON_MASTER =
874     new String[] {AccessControlLists.ACL_TABLE_NAME.getNameAsString(),
875       TableName.NAMESPACE_TABLE_NAME.getNameAsString(),
876       TableName.META_TABLE_NAME.getNameAsString()};
877 
878   protected int activeMasterWeight;
879   protected int backupMasterWeight;
880 
881   // a flag to indicate if assigning regions to backup masters
882   protected boolean usingBackupMasters = true;
883   protected final Set<ServerName> excludedServers =
884     Collections.synchronizedSet(new HashSet<ServerName>());
885 
886   protected final Set<String> tablesOnMaster = new HashSet<String>();
887   protected final MetricsBalancer metricsBalancer = new MetricsBalancer();
888   protected ClusterStatus clusterStatus = null;
889   protected ServerName masterServerName;
890   protected MasterServices services;
891 
892   @Override
893   public void setConf(Configuration conf) {
894     setSlop(conf);
895     if (slop < 0) slop = 0;
896     else if (slop > 1) slop = 1;
897 
898     this.config = conf;
899     activeMasterWeight = conf.getInt(
900       ACTIVE_MASTER_WEIGHT_KEY, DEFAULT_ACTIVE_MASTER_WEIGHT);
901     backupMasterWeight = conf.getInt(
902       BACKUP_MASTER_WEIGHT_KEY, DEFAULT_BACKUP_MASTER_WEIGHT);
903     if (backupMasterWeight < 1) {
904       usingBackupMasters = false;
905       LOG.info("Backup master won't host any region since "
906         + BACKUP_MASTER_WEIGHT_KEY + " is " + backupMasterWeight
907         + "(<1)");
908     }
909     String[] tables = conf.getStrings(
910       "hbase.balancer.tablesOnMaster", DEFAULT_TABLES_ON_MASTER);
911     if (tables != null) {
912       for (String table: tables) {
913         tablesOnMaster.add(table);
914       }
915     }
916     this.rackManager = new RackManager(getConf());
917     regionFinder.setConf(conf);
918   }
919 
920   protected void setSlop(Configuration conf) {
921     this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
922   }
923 
924   /**
925    * If there is any server excluded, filter it out from the cluster map so
926    * we won't assign any region to it, assuming none's already assigned there.
927    */
928   protected void filterExcludedServers(Map<ServerName, List<HRegionInfo>> clusterMap) {
929     if (excludedServers.isEmpty()) { // No server to filter out
930       return;
931     }
932     Iterator<Map.Entry<ServerName, List<HRegionInfo>>> it = clusterMap.entrySet().iterator();
933     while (it.hasNext()) {
934       Map.Entry<ServerName, List<HRegionInfo>> en = it.next();
935       if (excludedServers.contains(en.getKey()) && en.getValue().isEmpty()) {
936         it.remove();
937       }
938     }
939   }
940 
941   /**
942    * Check if a region belongs to some small system table.
943    * If so, it may be expected to be put on the master regionserver.
944    */
945   protected boolean shouldBeOnMaster(HRegionInfo region) {
946     return tablesOnMaster.contains(region.getTable().getNameAsString());
947   }
948 
949   /**
950    * Balance the regions that should be on master regionserver.
951    */
952   protected List<RegionPlan> balanceMasterRegions(
953       Map<ServerName, List<HRegionInfo>> clusterMap) {
954     if (services == null || clusterMap.size() <= 1) return null;
955     List<RegionPlan> plans = null;
956     List<HRegionInfo> regions = clusterMap.get(masterServerName);
957     if (regions != null) {
958       Iterator<ServerName> keyIt = null;
959       for (HRegionInfo region: regions) {
960         if (shouldBeOnMaster(region)) continue;
961 
962         // Find a non-master regionserver to host the region
963         if (keyIt == null || !keyIt.hasNext()) {
964           keyIt = clusterMap.keySet().iterator();
965         }
966         ServerName dest = keyIt.next();
967         if (masterServerName.equals(dest)) {
968           if (!keyIt.hasNext()) {
969             keyIt = clusterMap.keySet().iterator();
970           }
971           dest = keyIt.next();
972         }
973 
974         // Move this region away from the master regionserver
975         RegionPlan plan = new RegionPlan(region, masterServerName, dest);
976         if (plans == null) {
977           plans = new ArrayList<RegionPlan>();
978         }
979         plans.add(plan);
980       }
981     }
982     for (Map.Entry<ServerName, List<HRegionInfo>> server: clusterMap.entrySet()) {
983       if (masterServerName.equals(server.getKey())) continue;
984       for (HRegionInfo region: server.getValue()) {
985         if (!shouldBeOnMaster(region)) continue;
986 
987         // Move this region to the master regionserver
988         RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName);
989         if (plans == null) {
990           plans = new ArrayList<RegionPlan>();
991         }
992         plans.add(plan);
993       }
994     }
995     return plans;
996   }
997 
998   public void excludeServer(ServerName serverName) {
999     if (!usingBackupMasters) excludedServers.add(serverName);
1000   }
1001 
1002   public Set<ServerName> getExcludedServers() {
1003     return excludedServers;
1004   }
1005 
1006   @Override
1007   public Configuration getConf() {
1008     return this.config;
1009   }
1010 
1011   @Override
1012   public void setClusterStatus(ClusterStatus st) {
1013     this.clusterStatus = st;
1014     if (st == null || usingBackupMasters) return;
1015 
1016     // Not assign any region to backup masters.
1017     // Put them on the excluded server list.
1018     // Assume there won't be too much backup masters
1019     // re/starting, so this won't leak much memory.
1020     excludedServers.addAll(st.getBackupMasters());
1021     regionFinder.setClusterStatus(st);
1022   }
1023 
1024   @Override
1025   public void setMasterServices(MasterServices masterServices) {
1026     masterServerName = masterServices.getServerName();
1027     excludedServers.remove(masterServerName);
1028     this.services = masterServices;
1029     this.regionFinder.setServices(masterServices);
1030   }
1031 
1032   public void setRackManager(RackManager rackManager) {
1033     this.rackManager = rackManager;
1034   }
1035 
1036   protected Collection<ServerName> getBackupMasters() {
1037     return clusterStatus == null ? null : clusterStatus.getBackupMasters();
1038   }
1039 
1040   protected boolean needsBalance(Cluster c) {
1041     ClusterLoadState cs = new ClusterLoadState(
1042       masterServerName, getBackupMasters(), backupMasterWeight, c.clusterState);
1043     if (cs.getNumServers() < MIN_SERVER_BALANCE) {
1044       if (LOG.isDebugEnabled()) {
1045         LOG.debug("Not running balancer because only " + cs.getNumServers()
1046             + " active regionserver(s)");
1047       }
1048       return false;
1049     }
1050     if(areSomeRegionReplicasColocated(c)) return true;
1051     // Check if we even need to do any load balancing
1052     // HBASE-3681 check sloppiness first
1053     float average = cs.getLoadAverage(); // for logging
1054     int floor = (int) Math.floor(average * (1 - slop));
1055     int ceiling = (int) Math.ceil(average * (1 + slop));
1056     if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
1057       NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
1058       if (LOG.isTraceEnabled()) {
1059         // If nothing to balance, then don't say anything unless trace-level logging.
1060         LOG.trace("Skipping load balancing because balanced cluster; " +
1061           "servers=" + cs.getNumServers() + "(backupMasters=" + cs.getNumBackupMasters() +
1062           ") regions=" + cs.getNumRegions() + " average=" + average + " " +
1063           "mostloaded=" + serversByLoad.lastKey().getLoad() +
1064           " leastloaded=" + serversByLoad.firstKey().getLoad());
1065       }
1066       return false;
1067     }
1068     return true;
1069   }
1070 
1071   /**
1072    * Subclasses should implement this to return true if the cluster has nodes that hosts
1073    * multiple replicas for the same region, or, if there are multiple racks and the same
1074    * rack hosts replicas of the same region
1075    * @param c Cluster information
1076    * @return whether region replicas are currently co-located
1077    */
1078   protected boolean areSomeRegionReplicasColocated(Cluster c) {
1079     return false;
1080   }
1081 
1082   /**
1083    * Generates a bulk assignment plan to be used on cluster startup using a
1084    * simple round-robin assignment.
1085    * <p>
1086    * Takes a list of all the regions and all the servers in the cluster and
1087    * returns a map of each server to the regions that it should be assigned.
1088    * <p>
1089    * Currently implemented as a round-robin assignment. Same invariant as load
1090    * balancing, all servers holding floor(avg) or ceiling(avg).
1091    *
1092    * TODO: Use block locations from HDFS to place regions with their blocks
1093    *
1094    * @param regions all regions
1095    * @param servers all servers
1096    * @return map of server to the regions it should take, or null if no
1097    *         assignment is possible (ie. no regions or no servers)
1098    */
1099   @Override
1100   public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
1101       List<ServerName> servers) {
1102     metricsBalancer.incrMiscInvocations();
1103     if (regions == null || regions.isEmpty()) {
1104       return null;
1105     }
1106 
1107     List<ServerName> backupMasters = normalizeServers(servers);
1108     int numServers = servers == null ? 0 : servers.size();
1109     int numBackupMasters = backupMasters == null ? 0 : backupMasters.size();
1110     if (numServers == 0 && numBackupMasters == 0) {
1111       LOG.warn("Wanted to do round robin assignment but no servers to assign to");
1112       return null;
1113     }
1114 
1115     // TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the
1116     // normal LB.balancerCluster() with unassignedRegions. We only need to have a candidate
1117     // generator for AssignRegionAction. The LB will ensure the regions are mostly local
1118     // and balanced. This should also run fast with fewer number of iterations.
1119 
1120     Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>();
1121     if (numServers + numBackupMasters == 1) { // Only one server, nothing fancy we can do here
1122       ServerName server = numServers > 0 ? servers.get(0) : backupMasters.get(0);
1123       assignments.put(server, new ArrayList<HRegionInfo>(regions));
1124       return assignments;
1125     }
1126     List<HRegionInfo> masterRegions = null;
1127     if (numServers > 0 && servers.contains(masterServerName)) {
1128       masterRegions = new ArrayList<HRegionInfo>();
1129       if (numServers == 1) {
1130         // The only server in servers is the master,
1131         // Assign all regions to backup masters
1132         numServers = 0;
1133       }
1134     }
1135 
1136     Cluster cluster = createCluster(servers, regions, backupMasters, tablesOnMaster);
1137     List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>();
1138 
1139     int total = regions.size();
1140     // Get the number of regions to be assigned
1141     // to backup masters based on the weight
1142     int numRegions = total * numBackupMasters
1143       / (numServers * backupMasterWeight + numBackupMasters);
1144     if (numRegions > 0) {
1145       // backupMasters can't be null, according to the formula, numBackupMasters != 0
1146       roundRobinAssignment(cluster, regions, unassignedRegions, 0,
1147         numRegions, backupMasters, masterRegions, assignments);
1148     }
1149     int remainder = total - numRegions;
1150     if (remainder > 0) {
1151       // servers can't be null, or contains the master only since numServers != 0
1152       roundRobinAssignment(cluster, regions, unassignedRegions, numRegions, remainder,
1153         servers, masterRegions, assignments);
1154     }
1155     if (masterRegions != null && !masterRegions.isEmpty()) {
1156       assignments.put(masterServerName, masterRegions);
1157       for (HRegionInfo r : masterRegions) {
1158         cluster.doAssignRegion(r, masterServerName);
1159       }
1160     }
1161     List<HRegionInfo> lastFewRegions = new ArrayList<HRegionInfo>();
1162     // assign the remaining by going through the list and try to assign to servers one-by-one
1163     int serverIdx = RANDOM.nextInt(numServers);
1164     for (HRegionInfo region : unassignedRegions) {
1165       boolean assigned = false;
1166       for (int j = 0; j < numServers; j++) { // try all servers one by one
1167         ServerName serverName = servers.get((j + serverIdx) % numServers);
1168         if (serverName.equals(masterServerName)) {
1169           continue;
1170         }
1171         if (!cluster.wouldLowerAvailability(region, serverName)) {
1172           List<HRegionInfo> serverRegions = assignments.get(serverName);
1173           if (serverRegions == null) {
1174             serverRegions = new ArrayList<HRegionInfo>();
1175             assignments.put(serverName, serverRegions);
1176           }
1177           serverRegions.add(region);
1178           cluster.doAssignRegion(region, serverName);
1179           serverIdx = (j + serverIdx + 1) % numServers; //remain from next server
1180           assigned = true;
1181           break;
1182         }
1183       }
1184       if (!assigned) {
1185         lastFewRegions.add(region);
1186       }
1187     }
1188     // just sprinkle the rest of the regions on random regionservers. The balanceCluster will
1189     // make it optimal later. we can end up with this if numReplicas > numServers.
1190     for (HRegionInfo region : lastFewRegions) {
1191       ServerName server = null;
1192       if (numServers == 0) {
1193         // select from backup masters
1194         int i = RANDOM.nextInt(backupMasters.size());
1195         server = backupMasters.get(i);
1196       } else {
1197         do {
1198           int i = RANDOM.nextInt(numServers);
1199           server = servers.get(i);
1200         } while (numServers > 1 && server.equals(masterServerName));
1201       }
1202       List<HRegionInfo> serverRegions = assignments.get(server);
1203       if (serverRegions == null) {
1204         serverRegions = new ArrayList<HRegionInfo>();
1205         assignments.put(server, serverRegions);
1206       }
1207       serverRegions.add(region);
1208       cluster.doAssignRegion(region, server);
1209     }
1210     return assignments;
1211   }
1212 
1213   protected Cluster createCluster(List<ServerName> servers,
1214       Collection<HRegionInfo> regions, List<ServerName> backupMasters, Set<String> tablesOnMaster) {
1215     // Get the snapshot of the current assignments for the regions in question, and then create
1216     // a cluster out of it. Note that we might have replicas already assigned to some servers
1217     // earlier. So we want to get the snapshot to see those assignments, but this will only contain
1218     // replicas of the regions that are passed (for performance).
1219     Map<ServerName, List<HRegionInfo>> clusterState = getRegionAssignmentsByServer(regions);
1220 
1221     for (ServerName server : servers) {
1222       if (!clusterState.containsKey(server)) {
1223         clusterState.put(server, EMPTY_REGION_LIST);
1224       }
1225     }
1226     return new Cluster(masterServerName, regions, clusterState, null, this.regionFinder, backupMasters,
1227       tablesOnMaster, rackManager);
1228   }
1229 
1230   /**
1231    * Generates an immediate assignment plan to be used by a new master for
1232    * regions in transition that do not have an already known destination.
1233    *
1234    * Takes a list of regions that need immediate assignment and a list of all
1235    * available servers. Returns a map of regions to the server they should be
1236    * assigned to.
1237    *
1238    * This method will return quickly and does not do any intelligent balancing.
1239    * The goal is to make a fast decision not the best decision possible.
1240    *
1241    * Currently this is random.
1242    *
1243    * @param regions
1244    * @param servers
1245    * @return map of regions to the server it should be assigned to
1246    */
1247   @Override
1248   public Map<HRegionInfo, ServerName> immediateAssignment(List<HRegionInfo> regions,
1249       List<ServerName> servers) {
1250     metricsBalancer.incrMiscInvocations();
1251     if (servers == null || servers.isEmpty()) {
1252       LOG.warn("Wanted to do random assignment but no servers to assign to");
1253       return null;
1254     }
1255 
1256     Map<HRegionInfo, ServerName> assignments = new TreeMap<HRegionInfo, ServerName>();
1257     for (HRegionInfo region : regions) {
1258       assignments.put(region, randomAssignment(region, servers));
1259     }
1260     return assignments;
1261   }
1262 
1263   /**
1264    * Used to assign a single region to a random server.
1265    */
1266   @Override
1267   public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
1268     metricsBalancer.incrMiscInvocations();
1269     if (servers == null || servers.isEmpty()) {
1270       LOG.warn("Wanted to do random assignment but no servers to assign to");
1271       return null;
1272     }
1273     List<ServerName> backupMasters = normalizeServers(servers);
1274     List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
1275     Cluster cluster = createCluster(servers, regions, backupMasters, tablesOnMaster);
1276 
1277     return randomAssignment(cluster, regionInfo, servers, backupMasters);
1278   }
1279 
1280   /**
1281    * Generates a bulk assignment startup plan, attempting to reuse the existing
1282    * assignment information from META, but adjusting for the specified list of
1283    * available/online servers available for assignment.
1284    * <p>
1285    * Takes a map of all regions to their existing assignment from META. Also
1286    * takes a list of online servers for regions to be assigned to. Attempts to
1287    * retain all assignment, so in some instances initial assignment will not be
1288    * completely balanced.
1289    * <p>
1290    * Any leftover regions without an existing server to be assigned to will be
1291    * assigned randomly to available servers.
1292    *
1293    * @param regions regions and existing assignment from meta
1294    * @param servers available servers
1295    * @return map of servers and regions to be assigned to them
1296    */
1297   @Override
1298   public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
1299       List<ServerName> servers) {
1300     // Update metrics
1301     metricsBalancer.incrMiscInvocations();
1302     if (regions == null || regions.isEmpty()) {
1303       return null;
1304     }
1305 
1306     List<ServerName> backupMasters = normalizeServers(servers);
1307     int numServers = servers == null ? 0 : servers.size();
1308     int numBackupMasters = backupMasters == null ? 0 : backupMasters.size();
1309     if (numServers == 0 && numBackupMasters == 0) {
1310       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1311       return null;
1312     }
1313     Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>();
1314     if (numServers + numBackupMasters == 1) { // Only one server, nothing fancy we can do here
1315       ServerName server = numServers > 0 ? servers.get(0) : backupMasters.get(0);
1316       assignments.put(server, new ArrayList<HRegionInfo>(regions.keySet()));
1317       return assignments;
1318     }
1319 
1320     // Group all of the old assignments by their hostname.
1321     // We can't group directly by ServerName since the servers all have
1322     // new start-codes.
1323 
1324     // Group the servers by their hostname. It's possible we have multiple
1325     // servers on the same host on different ports.
1326     ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
1327     for (ServerName server : servers) {
1328       assignments.put(server, new ArrayList<HRegionInfo>());
1329       if (!server.equals(masterServerName)) {
1330         serversByHostname.put(server.getHostname(), server);
1331       }
1332     }
1333     if (numBackupMasters > 0) {
1334       for (ServerName server : backupMasters) {
1335         assignments.put(server, new ArrayList<HRegionInfo>());
1336       }
1337     }
1338 
1339     // Collection of the hostnames that used to have regions
1340     // assigned, but for which we no longer have any RS running
1341     // after the cluster restart.
1342     Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
1343 
1344     // Master regionserver is in the server list.
1345     boolean masterIncluded = servers.contains(masterServerName);
1346 
1347     int numRandomAssignments = 0;
1348     int numRetainedAssigments = 0;
1349 
1350     Cluster cluster = createCluster(servers, regions.keySet(), backupMasters, tablesOnMaster);
1351 
1352     for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
1353       HRegionInfo region = entry.getKey();
1354       ServerName oldServerName = entry.getValue();
1355       List<ServerName> localServers = new ArrayList<ServerName>();
1356       if (oldServerName != null) {
1357         localServers = serversByHostname.get(oldServerName.getHostname());
1358       }
1359       if (masterIncluded && shouldBeOnMaster(region)) {
1360         assignments.get(masterServerName).add(region);
1361         if (localServers.contains(masterServerName)) {
1362           numRetainedAssigments++;
1363         } else {
1364           numRandomAssignments++;
1365         }
1366       } else if (localServers.isEmpty()) {
1367         // No servers on the new cluster match up with this hostname,
1368         // assign randomly.
1369         ServerName randomServer = randomAssignment(cluster, region, servers, backupMasters);
1370         assignments.get(randomServer).add(region);
1371         numRandomAssignments++;
1372         if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
1373       } else if (localServers.size() == 1) {
1374         // the usual case - one new server on same host
1375         ServerName target = localServers.get(0);
1376         assignments.get(target).add(region);
1377         cluster.doAssignRegion(region, target);
1378         numRetainedAssigments++;
1379       } else {
1380         // multiple new servers in the cluster on this same host
1381         if (localServers.contains(oldServerName)) {
1382           assignments.get(oldServerName).add(region);
1383           cluster.doAssignRegion(region, oldServerName);
1384         } else {
1385           ServerName target = null;
1386           for (ServerName tmp: localServers) {
1387             if (tmp.getPort() == oldServerName.getPort()) {
1388               target = tmp;
1389               break;
1390             }
1391           }
1392           if (target == null) {
1393             target = randomAssignment(cluster, region, localServers, backupMasters);
1394           }
1395           assignments.get(target).add(region);
1396         }
1397         numRetainedAssigments++;
1398       }
1399     }
1400 
1401     String randomAssignMsg = "";
1402     if (numRandomAssignments > 0) {
1403       randomAssignMsg =
1404           numRandomAssignments + " regions were assigned "
1405               + "to random hosts, since the old hosts for these regions are no "
1406               + "longer present in the cluster. These hosts were:\n  "
1407               + Joiner.on("\n  ").join(oldHostsNoLongerPresent);
1408     }
1409 
1410     LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
1411         + " retained the pre-restart assignment. " + randomAssignMsg);
1412     return assignments;
1413   }
1414 
1415   @Override
1416   public void initialize() throws HBaseIOException{
1417   }
1418 
1419   @Override
1420   public void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1421   }
1422 
1423   @Override
1424   public void regionOffline(HRegionInfo regionInfo) {
1425   }
1426 
1427   @Override
1428   public boolean isStopped() {
1429     return stopped;
1430   }
1431 
1432   @Override
1433   public void stop(String why) {
1434     LOG.info("Load Balancer stop requested: "+why);
1435     stopped = true;
1436   }
1437 
1438   /**
1439    * Prepare the list of target regionservers so that it doesn't
1440    * contain any excluded server, or backup master. Those backup masters
1441    * used to be in the original list are returned.
1442    */
1443   private List<ServerName> normalizeServers(List<ServerName> servers) {
1444     if (servers == null) {
1445       return null;
1446     }
1447     if (!excludedServers.isEmpty()) {
1448       servers.removeAll(excludedServers);
1449     }
1450     Collection<ServerName> allBackupMasters = getBackupMasters();
1451     List<ServerName> backupMasters = null;
1452     if (allBackupMasters != null && !allBackupMasters.isEmpty()) {
1453       for (ServerName server: allBackupMasters) {
1454         if (!servers.contains(server)) {
1455           // Ignore backup masters not included
1456           continue;
1457         }
1458         servers.remove(server);
1459         if (backupMasters == null) {
1460           backupMasters = new ArrayList<ServerName>();
1461         }
1462         backupMasters.add(server);
1463       }
1464     }
1465     return backupMasters;
1466   }
1467 
1468   /**
1469    * Used to assign a single region to a random server. The input should
1470    * have been already normalized: 1) servers doesn't include any exclude sever,
1471    * 2) servers doesn't include any backup master, 3) backupMasters contains
1472    * only backup masters that are intended to host this region, i.e, it
1473    * may not have all the backup masters.
1474    */
1475   private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo,
1476       List<ServerName> servers, List<ServerName> backupMasters) {
1477     int numServers = servers == null ? 0 : servers.size();
1478     int numBackupMasters = backupMasters == null ? 0 : backupMasters.size();
1479     if (numServers == 0 && numBackupMasters == 0) {
1480       LOG.warn("Wanted to do random assignment but no servers to assign to");
1481       return null;
1482     }
1483     if (servers != null && shouldBeOnMaster(regionInfo)
1484         && servers.contains(masterServerName)) {
1485       return masterServerName;
1486     }
1487     ServerName sn = null;
1488     final int maxIterations = servers.size() * 4;
1489     int iterations = 0;
1490 
1491     do {
1492       // Generate a random number weighted more towards
1493       // regular regionservers instead of backup masters.
1494       // This formula is chosen for simplicity.
1495       int i = RANDOM.nextInt(
1496         numBackupMasters + numServers * backupMasterWeight);
1497       if (i < numBackupMasters) {
1498         sn = backupMasters.get(i);
1499         continue;
1500       }
1501       i = (i - numBackupMasters)/backupMasterWeight;
1502       sn = servers.get(i);
1503       if (sn.equals(masterServerName)) {
1504         // Try to avoid master for a user region
1505         if (numServers > 1) {
1506           i = (i == 0 ? 1 : i - 1);
1507           sn = servers.get(i);
1508         } else if (numBackupMasters > 0) {
1509           sn = backupMasters.get(0);
1510         }
1511       }
1512     } while (cluster.wouldLowerAvailability(regionInfo, sn)
1513         && iterations++ < maxIterations);
1514     cluster.doAssignRegion(regionInfo, sn);
1515     return sn;
1516   }
1517 
1518   /**
1519    * Round robin a chunk of a list of regions to a list of servers
1520    */
1521   private void roundRobinAssignment(Cluster cluster, List<HRegionInfo> regions,
1522       List<HRegionInfo> unassignedRegions, int offset,
1523       int numRegions, List<ServerName> servers, List<HRegionInfo> masterRegions,
1524       Map<ServerName, List<HRegionInfo>> assignments) {
1525 
1526     boolean masterIncluded = servers.contains(masterServerName);
1527     int numServers = servers.size();
1528     int skipServers = numServers;
1529     if (masterIncluded) {
1530       skipServers--;
1531     }
1532     int max = (int) Math.ceil((float) numRegions / skipServers);
1533     int serverIdx = 0;
1534     if (numServers > 1) {
1535       serverIdx = RANDOM.nextInt(numServers);
1536     }
1537     int regionIdx = 0;
1538 
1539     for (int j = 0; j < numServers; j++) {
1540       ServerName server = servers.get((j + serverIdx) % numServers);
1541       if (masterIncluded && server.equals(masterServerName)) {
1542         // Don't put non-special region on the master regionserver,
1543         // So that it is not overloaded.
1544         continue;
1545       }
1546       List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
1547       for (int i = regionIdx; i < numRegions; i += skipServers) {
1548         HRegionInfo region = regions.get(offset + i % numRegions);
1549         if (masterRegions == null || !shouldBeOnMaster(region)) {
1550           if (cluster.wouldLowerAvailability(region, server)) {
1551             unassignedRegions.add(region);
1552           } else {
1553             serverRegions.add(region);
1554             cluster.doAssignRegion(region, server);
1555           }
1556           continue;
1557         }
1558         // Master is in the list and this is a special region
1559         masterRegions.add(region);
1560       }
1561       assignments.put(server, serverRegions);
1562       regionIdx++;
1563     }
1564   }
1565 
1566   protected Map<ServerName, List<HRegionInfo>> getRegionAssignmentsByServer(
1567     Collection<HRegionInfo> regions) {
1568     if (this.services != null && this.services.getAssignmentManager() != null) {
1569       return this.services.getAssignmentManager().getSnapShotOfAssignment(regions);
1570     } else {
1571       return new HashMap<ServerName, List<HRegionInfo>>();
1572     }
1573   }
1574 }