View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Collection;
23  import java.util.Collections;
24  import java.util.Comparator;
25  import java.util.Deque;
26  import java.util.HashMap;
27  import java.util.HashSet;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.Map.Entry;
32  import java.util.NavigableMap;
33  import java.util.Random;
34  import java.util.Set;
35  import java.util.TreeMap;
36  import java.util.concurrent.ExecutionException;
37
38  import org.apache.commons.lang.NotImplementedException;
39  import org.apache.commons.logging.Log;
40  import org.apache.commons.logging.LogFactory;
41  import org.apache.hadoop.conf.Configuration;
42  import org.apache.hadoop.hbase.ClusterStatus;
43  import org.apache.hadoop.hbase.HBaseIOException;
44  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
45  import org.apache.hadoop.hbase.HRegionInfo;
46  import org.apache.hadoop.hbase.RegionLoad;
47  import org.apache.hadoop.hbase.ServerName;
48  import org.apache.hadoop.hbase.TableName;
49  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
50  import org.apache.hadoop.hbase.master.LoadBalancer;
51  import org.apache.hadoop.hbase.master.MasterServices;
52  import org.apache.hadoop.hbase.master.RackManager;
53  import org.apache.hadoop.hbase.master.RegionPlan;
54  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
55  import org.apache.hadoop.hbase.security.access.AccessControlLists;
56  import org.apache.hadoop.util.StringUtils;
57
58  import com.google.common.annotations.VisibleForTesting;
59  import com.google.common.base.Joiner;
60  import com.google.common.collect.ArrayListMultimap;
61  import com.google.common.collect.Lists;
62  import com.google.common.collect.Sets;
63  import com.google.common.util.concurrent.ListenableFuture;
64
65  /**
66   * The base class for load balancers. It provides the the functions used to by
67   * {@link org.apache.hadoop.hbase.master.AssignmentManager} to assign regions
68   * in the edge cases. It doesn't provide an implementation of the
69   * actual balancing algorithm.
70   *
71   */
72  public abstract class BaseLoadBalancer implements LoadBalancer {
73    protected static final int MIN_SERVER_BALANCE = 2;
74    private volatile boolean stopped = false;
75  
76    private static final List<HRegionInfo> EMPTY_REGION_LIST = new ArrayList<HRegionInfo>(0);
77  
78    protected final RegionLocationFinder regionFinder = new RegionLocationFinder();
79
80    private static class DefaultRackManager extends RackManager {
81      @Override
82      public String getRack(ServerName server) {
83        return UNKNOWN_RACK;
84      }
85    }
86
87    /**
88     * The constructor that uses the basic MetricsBalancer
89     */
90    protected BaseLoadBalancer() {
91      metricsBalancer = new MetricsBalancer();
92    }
93
94    /**
95     * This Constructor accepts an instance of MetricsBalancer,
96     * which will be used instead of creating a new one
97     */
98    protected BaseLoadBalancer(MetricsBalancer metricsBalancer) {
99      this.metricsBalancer = (metricsBalancer != null) ? metricsBalancer : new MetricsBalancer();
100   }
101
102   /**
103    * An efficient array based implementation similar to ClusterState for keeping
104    * the status of the cluster in terms of region assignment and distribution.
105    * LoadBalancers, such as StochasticLoadBalancer uses this Cluster object because of
106    * hundreds of thousands of hashmap manipulations are very costly, which is why this
107    * class uses mostly indexes and arrays.
108    *
109    * Cluster tracks a list of unassigned regions, region assignments, and the server
110    * topology in terms of server names, hostnames and racks.
111    */
112   protected static class Cluster {
113     ServerName[] servers;
114     String[] hosts; // ServerName uniquely identifies a region server. multiple RS can run on the same host
115     String[] racks;
116     boolean multiServersPerHost = false; // whether or not any host has more than one server
117
118     ArrayList<String> tables;
119     HRegionInfo[] regions;
120     Deque<RegionLoad>[] regionLoads;
121     private RegionLocationFinder regionFinder;
122     ArrayList<ListenableFuture<HDFSBlocksDistribution>> regionLocationFutures;
123
124     int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality
125 
126     int[]   serverIndexToHostIndex;      //serverIndex -> host index
127     int[]   serverIndexToRackIndex;      //serverIndex -> rack index
128
129     int[][] regionsPerServer;            //serverIndex -> region list
130     int[][] regionsPerHost;              //hostIndex -> list of regions
131     int[][] regionsPerRack;              //rackIndex -> region list
132     int[][] primariesOfRegionsPerServer; //serverIndex -> sorted list of regions by primary region index
133     int[][] primariesOfRegionsPerHost;   //hostIndex -> sorted list of regions by primary region index
134     int[][] primariesOfRegionsPerRack;   //rackIndex -> sorted list of regions by primary region index
135
136     int[][] serversPerHost;              //hostIndex -> list of server indexes
137     int[][] serversPerRack;              //rackIndex -> list of server indexes
138     int[]   regionIndexToServerIndex;    //regionIndex -> serverIndex
139     int[]   initialRegionIndexToServerIndex;    //regionIndex -> serverIndex (initial cluster state)
140     int[]   regionIndexToTableIndex;     //regionIndex -> tableIndex
141     int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions
142     int[]   numMaxRegionsPerTable;       //tableIndex -> max number of regions in a single RS
143     int[]   regionIndexToPrimaryIndex;   //regionIndex -> regionIndex of the primary
144     boolean hasRegionReplicas = false;   //whether there is regions with replicas
145 
146     Integer[] serverIndicesSortedByRegionCount;
147     Integer[] serverIndicesSortedByLocality;
148
149     Map<String, Integer> serversToIndex;
150     Map<String, Integer> hostsToIndex;
151     Map<String, Integer> racksToIndex;
152     Map<String, Integer> tablesToIndex;
153     Map<HRegionInfo, Integer> regionsToIndex;
154     float[] localityPerServer;
155
156     int numServers;
157     int numHosts;
158     int numRacks;
159     int numTables;
160     int numRegions;
161 
162     int numMovedRegions = 0; //num moved regions from the initial configuration
163     Map<ServerName, List<HRegionInfo>> clusterState;
164
165     protected final RackManager rackManager;
166
167     protected Cluster(
168         Map<ServerName, List<HRegionInfo>> clusterState,
169         Map<String, Deque<RegionLoad>> loads,
170         RegionLocationFinder regionFinder,
171         RackManager rackManager) {
172       this(null, clusterState, loads, regionFinder,
173         rackManager);
174     }
175
176     @SuppressWarnings("unchecked")
177     protected Cluster(
178         Collection<HRegionInfo> unassignedRegions,
179         Map<ServerName, List<HRegionInfo>> clusterState,
180         Map<String, Deque<RegionLoad>> loads,
181         RegionLocationFinder regionFinder,
182         RackManager rackManager) {
183
184       if (unassignedRegions == null) {
185         unassignedRegions = EMPTY_REGION_LIST;
186       }
187
188       serversToIndex = new HashMap<String, Integer>();
189       hostsToIndex = new HashMap<String, Integer>();
190       racksToIndex = new HashMap<String, Integer>();
191       tablesToIndex = new HashMap<String, Integer>();
192
193       //TODO: We should get the list of tables from master
194       tables = new ArrayList<String>();
195       this.rackManager = rackManager != null ? rackManager : new DefaultRackManager();
196
197       numRegions = 0;
198
199       List<List<Integer>> serversPerHostList = new ArrayList<List<Integer>>();
200       List<List<Integer>> serversPerRackList = new ArrayList<List<Integer>>();
201       this.clusterState = clusterState;
202       this.regionFinder = regionFinder;
203
204       // Use servername and port as there can be dead servers in this list. We want everything with
205       // a matching hostname and port to have the same index.
206       for (ServerName sn : clusterState.keySet()) {
207         if (serversToIndex.get(sn.getHostAndPort()) == null) {
208           serversToIndex.put(sn.getHostAndPort(), numServers++);
209         }
210         if (!hostsToIndex.containsKey(sn.getHostname())) {
211           hostsToIndex.put(sn.getHostname(), numHosts++);
212           serversPerHostList.add(new ArrayList<Integer>(1));
213         }
214
215         int serverIndex = serversToIndex.get(sn.getHostAndPort());
216         int hostIndex = hostsToIndex.get(sn.getHostname());
217         serversPerHostList.get(hostIndex).add(serverIndex);
218
219         String rack = this.rackManager.getRack(sn);
220         if (!racksToIndex.containsKey(rack)) {
221           racksToIndex.put(rack, numRacks++);
222           serversPerRackList.add(new ArrayList<Integer>());
223         }
224         int rackIndex = racksToIndex.get(rack);
225         serversPerRackList.get(rackIndex).add(serverIndex);
226       }
227
228       // Count how many regions there are.
229       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
230         numRegions += entry.getValue().size();
231       }
232       numRegions += unassignedRegions.size();
233
234       regionsToIndex = new HashMap<HRegionInfo, Integer>(numRegions);
235       servers = new ServerName[numServers];
236       serversPerHost = new int[numHosts][];
237       serversPerRack = new int[numRacks][];
238       regions = new HRegionInfo[numRegions];
239       regionIndexToServerIndex = new int[numRegions];
240       initialRegionIndexToServerIndex = new int[numRegions];
241       regionIndexToTableIndex = new int[numRegions];
242       regionIndexToPrimaryIndex = new int[numRegions];
243       regionLoads = new Deque[numRegions];
244       regionLocationFutures = new ArrayList<ListenableFuture<HDFSBlocksDistribution>>(
245           numRegions);
246       if (regionFinder != null) {
247         for (int i = 0; i < numRegions; i++) {
248           regionLocationFutures.add(null);
249         }
250       }
251       regionLocations = new int[numRegions][];
252       serverIndicesSortedByRegionCount = new Integer[numServers];
253       serverIndicesSortedByLocality = new Integer[numServers];
254       localityPerServer = new float[numServers];
255
256       serverIndexToHostIndex = new int[numServers];
257       serverIndexToRackIndex = new int[numServers];
258       regionsPerServer = new int[numServers][];
259       regionsPerHost = new int[numHosts][];
260       regionsPerRack = new int[numRacks][];
261       primariesOfRegionsPerServer = new int[numServers][];
262       primariesOfRegionsPerHost = new int[numHosts][];
263       primariesOfRegionsPerRack = new int[numRacks][];
264
265       int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
266 
267       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
268         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
269
270         // keep the servername if this is the first server name for this hostname
271         // or this servername has the newest startcode.
272         if (servers[serverIndex] == null ||
273             servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
274           servers[serverIndex] = entry.getKey();
275         }
276
277         if (regionsPerServer[serverIndex] != null) {
278           // there is another server with the same hostAndPort in ClusterState.
279           // allocate the array for the total size
280           regionsPerServer[serverIndex] = new int[entry.getValue().size() + regionsPerServer[serverIndex].length];
281         } else {
282           regionsPerServer[serverIndex] = new int[entry.getValue().size()];
283         }
284         primariesOfRegionsPerServer[serverIndex] = new int[regionsPerServer[serverIndex].length];
285         serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
286         serverIndicesSortedByLocality[serverIndex] = serverIndex;
287       }
288
289       hosts = new String[numHosts];
290       for (Entry<String, Integer> entry : hostsToIndex.entrySet()) {
291         hosts[entry.getValue()] = entry.getKey();
292       }
293       racks = new String[numRacks];
294       for (Entry<String, Integer> entry : racksToIndex.entrySet()) {
295         racks[entry.getValue()] = entry.getKey();
296       }
297 
298       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
299         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
300         regionPerServerIndex = 0;
301
302         int hostIndex = hostsToIndex.get(entry.getKey().getHostname());
303         serverIndexToHostIndex[serverIndex] = hostIndex;
304
305         int rackIndex = racksToIndex.get(this.rackManager.getRack(entry.getKey()));
306         serverIndexToRackIndex[serverIndex] = rackIndex;
307
308         for (HRegionInfo region : entry.getValue()) {
309           registerRegion(region, regionIndex, serverIndex, loads, regionFinder);
310
311           regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
312           regionIndex++;
313         }
314       }
315       for (HRegionInfo region : unassignedRegions) {
316         registerRegion(region, regionIndex, -1, loads, regionFinder);
317         regionIndex++;
318       }
319 
320       if (regionFinder != null) {
321         for (int index = 0; index < regionLocationFutures.size(); index++) {
322           ListenableFuture<HDFSBlocksDistribution> future = regionLocationFutures
323               .get(index);
324           HDFSBlocksDistribution blockDistbn = null;
325           try {
326             blockDistbn = future.get();
327           } catch (InterruptedException ite) {
328           } catch (ExecutionException ee) {
329             LOG.debug(
330                 "IOException during HDFSBlocksDistribution computation. for region = "
331                     + regions[index].getEncodedName(), ee);
332           } finally {
333             if (blockDistbn == null) {
334               blockDistbn = new HDFSBlocksDistribution();
335             }
336           }
337           List<ServerName> loc = regionFinder.getTopBlockLocations(blockDistbn);
338           regionLocations[index] = new int[loc.size()];
339           for (int i = 0; i < loc.size(); i++) {
340             regionLocations[index][i] = loc.get(i) == null ? -1
341                 : (serversToIndex.get(loc.get(i).getHostAndPort()) == null ? -1
342                     : serversToIndex.get(loc.get(i).getHostAndPort()));
343           }
344         }
345       }
346
347       for (int i = 0; i < serversPerHostList.size(); i++) {
348         serversPerHost[i] = new int[serversPerHostList.get(i).size()];
349         for (int j = 0; j < serversPerHost[i].length; j++) {
350           serversPerHost[i][j] = serversPerHostList.get(i).get(j);
351         }
352         if (serversPerHost[i].length > 1) {
353           multiServersPerHost = true;
354         }
355       }
356
357       for (int i = 0; i < serversPerRackList.size(); i++) {
358         serversPerRack[i] = new int[serversPerRackList.get(i).size()];
359         for (int j = 0; j < serversPerRack[i].length; j++) {
360           serversPerRack[i][j] = serversPerRackList.get(i).get(j);
361         }
362       }
363
364       numTables = tables.size();
365       numRegionsPerServerPerTable = new int[numServers][numTables];
366
367       for (int i = 0; i < numServers; i++) {
368         for (int j = 0; j < numTables; j++) {
369           numRegionsPerServerPerTable[i][j] = 0;
370         }
371       }
372
373       for (int i=0; i < regionIndexToServerIndex.length; i++) {
374         if (regionIndexToServerIndex[i] >= 0) {
375           numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
376         }
377       }
378
379       numMaxRegionsPerTable = new int[numTables];
380       for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
381         for (tableIndex = 0 ; tableIndex < numRegionsPerServerPerTable[serverIndex].length; tableIndex++) {
382           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
383             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
384           }
385         }
386       }
387
388       for (int i = 0; i < regions.length; i ++) {
389         HRegionInfo info = regions[i];
390         if (RegionReplicaUtil.isDefaultReplica(info)) {
391           regionIndexToPrimaryIndex[i] = i;
392         } else {
393           hasRegionReplicas = true;
394           HRegionInfo primaryInfo = RegionReplicaUtil.getRegionInfoForDefaultReplica(info);
395           regionIndexToPrimaryIndex[i] =
396               regionsToIndex.containsKey(primaryInfo) ?
397               regionsToIndex.get(primaryInfo):
398               -1;
399         }
400       }
401
402       for (int i = 0; i < regionsPerServer.length; i++) {
403         primariesOfRegionsPerServer[i] = new int[regionsPerServer[i].length];
404         for (int j = 0; j < regionsPerServer[i].length; j++) {
405           int primaryIndex = regionIndexToPrimaryIndex[regionsPerServer[i][j]];
406           primariesOfRegionsPerServer[i][j] = primaryIndex;
407         }
408         // sort the regions by primaries.
409         Arrays.sort(primariesOfRegionsPerServer[i]);
410       }
411 
412       // compute regionsPerHost
413       if (multiServersPerHost) {
414         for (int i = 0 ; i < serversPerHost.length; i++) {
415           int numRegionsPerHost = 0;
416           for (int j = 0; j < serversPerHost[i].length; j++) {
417             numRegionsPerHost += regionsPerServer[serversPerHost[i][j]].length;
418           }
419           regionsPerHost[i] = new int[numRegionsPerHost];
420           primariesOfRegionsPerHost[i] = new int[numRegionsPerHost];
421         }
422         for (int i = 0 ; i < serversPerHost.length; i++) {
423           int numRegionPerHostIndex = 0;
424           for (int j = 0; j < serversPerHost[i].length; j++) {
425             for (int k = 0; k < regionsPerServer[serversPerHost[i][j]].length; k++) {
426               int region = regionsPerServer[serversPerHost[i][j]][k];
427               regionsPerHost[i][numRegionPerHostIndex] = region;
428               int primaryIndex = regionIndexToPrimaryIndex[region];
429               primariesOfRegionsPerHost[i][numRegionPerHostIndex] = primaryIndex;
430               numRegionPerHostIndex++;
431             }
432           }
433           // sort the regions by primaries.
434           Arrays.sort(primariesOfRegionsPerHost[i]);
435         }
436       }
437
438       // compute regionsPerRack
439       if (numRacks > 1) {
440         for (int i = 0 ; i < serversPerRack.length; i++) {
441           int numRegionsPerRack = 0;
442           for (int j = 0; j < serversPerRack[i].length; j++) {
443             numRegionsPerRack += regionsPerServer[serversPerRack[i][j]].length;
444           }
445           regionsPerRack[i] = new int[numRegionsPerRack];
446           primariesOfRegionsPerRack[i] = new int[numRegionsPerRack];
447         }
448
449         for (int i = 0 ; i < serversPerRack.length; i++) {
450           int numRegionPerRackIndex = 0;
451           for (int j = 0; j < serversPerRack[i].length; j++) {
452             for (int k = 0; k < regionsPerServer[serversPerRack[i][j]].length; k++) {
453               int region = regionsPerServer[serversPerRack[i][j]][k];
454               regionsPerRack[i][numRegionPerRackIndex] = region;
455               int primaryIndex = regionIndexToPrimaryIndex[region];
456               primariesOfRegionsPerRack[i][numRegionPerRackIndex] = primaryIndex;
457               numRegionPerRackIndex++;
458             }
459           }
460           // sort the regions by primaries.
461           Arrays.sort(primariesOfRegionsPerRack[i]);
462         }
463       }
464     }
465
466     /** Helper for Cluster constructor to handle a region */
467     private void registerRegion(HRegionInfo region, int regionIndex, int serverIndex,
468         Map<String, Deque<RegionLoad>> loads, RegionLocationFinder regionFinder) {
469       String tableName = region.getTable().getNameAsString();
470       if (!tablesToIndex.containsKey(tableName)) {
471         tables.add(tableName);
472         tablesToIndex.put(tableName, tablesToIndex.size());
473       }
474       int tableIndex = tablesToIndex.get(tableName);
475
476       regionsToIndex.put(region, regionIndex);
477       regions[regionIndex] = region;
478       regionIndexToServerIndex[regionIndex] = serverIndex;
479       initialRegionIndexToServerIndex[regionIndex] = serverIndex;
480       regionIndexToTableIndex[regionIndex] = tableIndex;
481
482       // region load
483       if (loads != null) {
484         Deque<RegionLoad> rl = loads.get(region.getRegionNameAsString());
485         // That could have failed if the RegionLoad is using the other regionName
486         if (rl == null) {
487           // Try getting the region load using encoded name.
488           rl = loads.get(region.getEncodedName());
489         }
490         regionLoads[regionIndex] = rl;
491       }
492
493       if (regionFinder != null) {
494         // region location
495         regionLocationFutures.set(regionIndex,
496             regionFinder.asyncGetBlockDistribution(region));
497       }
498     }
499
500     /** An action to move or swap a region */
501     public static class Action {
502       public static enum Type {
503         ASSIGN_REGION,
504         MOVE_REGION,
505         SWAP_REGIONS,
506         NULL,
507       }
508
509       public Type type;
510       public Action (Type type) {this.type = type;}
511       /** Returns an Action which would undo this action */
512       public Action undoAction() { return this; }
513       @Override
514       public String toString() { return type + ":";}
515     }
516
517     public static class AssignRegionAction extends Action {
518       public int region;
519       public int server;
520       public AssignRegionAction(int region, int server) {
521         super(Type.ASSIGN_REGION);
522         this.region = region;
523         this.server = server;
524       }
525       @Override
526       public Action undoAction() {
527         // TODO implement this. This action is not being used by the StochasticLB for now
528         // in case it uses it, we should implement this function.
529         throw new NotImplementedException();
530       }
531       @Override
532       public String toString() {
533         return type + ": " + region + ":" + server;
534       }
535     }
536
537     public static class MoveRegionAction extends Action {
538       public int region;
539       public int fromServer;
540       public int toServer;
541
542       public MoveRegionAction(int region, int fromServer, int toServer) {
543         super(Type.MOVE_REGION);
544         this.fromServer = fromServer;
545         this.region = region;
546         this.toServer = toServer;
547       }
548       @Override
549       public Action undoAction() {
550         return new MoveRegionAction (region, toServer, fromServer);
551       }
552       @Override
553       public String toString() {
554         return type + ": " + region + ":" + fromServer + " -> " + toServer;
555       }
556     }
557
558     public static class SwapRegionsAction extends Action {
559       public int fromServer;
560       public int fromRegion;
561       public int toServer;
562       public int toRegion;
563       public SwapRegionsAction(int fromServer, int fromRegion, int toServer, int toRegion) {
564         super(Type.SWAP_REGIONS);
565         this.fromServer = fromServer;
566         this.fromRegion = fromRegion;
567         this.toServer = toServer;
568         this.toRegion = toRegion;
569       }
570       @Override
571       public Action undoAction() {
572         return new SwapRegionsAction (fromServer, toRegion, toServer, fromRegion);
573       }
574       @Override
575       public String toString() {
576         return type + ": " + fromRegion + ":" + fromServer + " <-> " + toRegion + ":" + toServer;
577       }
578     }
579
580     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NM_FIELD_NAMING_CONVENTION",
581         justification="Mistake. Too disruptive to change now")
582     public static final Action NullAction = new Action(Type.NULL);
583
584     public void doAction(Action action) {
585       switch (action.type) {
586       case NULL: break;
587       case ASSIGN_REGION:
588         // FindBugs: Having the assert quietens FB BC_UNCONFIRMED_CAST warnings
589         assert action instanceof AssignRegionAction: action.getClass();
590         AssignRegionAction ar = (AssignRegionAction) action;
591         regionsPerServer[ar.server] = addRegion(regionsPerServer[ar.server], ar.region);
592         regionMoved(ar.region, -1, ar.server);
593         break;
594       case MOVE_REGION:
595         assert action instanceof MoveRegionAction: action.getClass();
596         MoveRegionAction mra = (MoveRegionAction) action;
597         regionsPerServer[mra.fromServer] = removeRegion(regionsPerServer[mra.fromServer], mra.region);
598         regionsPerServer[mra.toServer] = addRegion(regionsPerServer[mra.toServer], mra.region);
599         regionMoved(mra.region, mra.fromServer, mra.toServer);
600         break;
601       case SWAP_REGIONS:
602         assert action instanceof SwapRegionsAction: action.getClass();
603         SwapRegionsAction a = (SwapRegionsAction) action;
604         regionsPerServer[a.fromServer] = replaceRegion(regionsPerServer[a.fromServer], a.fromRegion, a.toRegion);
605         regionsPerServer[a.toServer] = replaceRegion(regionsPerServer[a.toServer], a.toRegion, a.fromRegion);
606         regionMoved(a.fromRegion, a.fromServer, a.toServer);
607         regionMoved(a.toRegion, a.toServer, a.fromServer);
608         break;
609       default:
610         throw new RuntimeException("Uknown action:" + action.type);
611       }
612     }
613
614     /**
615      * Return true if the placement of region on server would lower the availability
616      * of the region in question
617      * @param server
618      * @param region
619      * @return true or false
620      */
621     boolean wouldLowerAvailability(HRegionInfo regionInfo, ServerName serverName) {
622       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
623         return false; // safeguard against race between cluster.servers and servers from LB method args
624       }
625       int server = serversToIndex.get(serverName.getHostAndPort());
626       int region = regionsToIndex.get(regionInfo);
627
628       int primary = regionIndexToPrimaryIndex[region];
629
630       // there is a subset relation for server < host < rack
631       // check server first
632
633       if (contains(primariesOfRegionsPerServer[server], primary)) {
634         // check for whether there are other servers that we can place this region
635         for (int i = 0; i < primariesOfRegionsPerServer.length; i++) {
636           if (i != server && !contains(primariesOfRegionsPerServer[i], primary)) {
637             return true; // meaning there is a better server
638           }
639         }
640         return false; // there is not a better server to place this
641       }
642
643       // check host
644       if (multiServersPerHost) { // these arrays would only be allocated if we have more than one server per host
645         int host = serverIndexToHostIndex[server];
646         if (contains(primariesOfRegionsPerHost[host], primary)) {
647           // check for whether there are other hosts that we can place this region
648           for (int i = 0; i < primariesOfRegionsPerHost.length; i++) {
649             if (i != host && !contains(primariesOfRegionsPerHost[i], primary)) {
650               return true; // meaning there is a better host
651             }
652           }
653           return false; // there is not a better host to place this
654         }
655       }
656
657       // check rack
658       if (numRacks > 1) {
659         int rack = serverIndexToRackIndex[server];
660         if (contains(primariesOfRegionsPerRack[rack], primary)) {
661           // check for whether there are other racks that we can place this region
662           for (int i = 0; i < primariesOfRegionsPerRack.length; i++) {
663             if (i != rack && !contains(primariesOfRegionsPerRack[i], primary)) {
664               return true; // meaning there is a better rack
665             }
666           }
667           return false; // there is not a better rack to place this
668         }
669       }
670       return false;
671     }
672
673     void doAssignRegion(HRegionInfo regionInfo, ServerName serverName) {
674       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
675         return;
676       }
677       int server = serversToIndex.get(serverName.getHostAndPort());
678       int region = regionsToIndex.get(regionInfo);
679       doAction(new AssignRegionAction(region, server));
680     }
681
682     void regionMoved(int region, int oldServer, int newServer) {
683       regionIndexToServerIndex[region] = newServer;
684       if (initialRegionIndexToServerIndex[region] == newServer) {
685         numMovedRegions--; //region moved back to original location
686       } else if (oldServer >= 0 && initialRegionIndexToServerIndex[region] == oldServer) {
687         numMovedRegions++; //region moved from original location
688       }
689       int tableIndex = regionIndexToTableIndex[region];
690       if (oldServer >= 0) {
691         numRegionsPerServerPerTable[oldServer][tableIndex]--;
692       }
693       numRegionsPerServerPerTable[newServer][tableIndex]++;
694
695       //check whether this caused maxRegionsPerTable in the new Server to be updated
696       if (numRegionsPerServerPerTable[newServer][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
697         numRegionsPerServerPerTable[newServer][tableIndex] = numMaxRegionsPerTable[tableIndex];
698       } else if (oldServer >= 0 && (numRegionsPerServerPerTable[oldServer][tableIndex] + 1)
699           == numMaxRegionsPerTable[tableIndex]) {
700         //recompute maxRegionsPerTable since the previous value was coming from the old server
701         for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
702           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
703             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
704           }
705         }
706       }
707
708       // update for servers
709       int primary = regionIndexToPrimaryIndex[region];
710       if (oldServer >= 0) {
711         primariesOfRegionsPerServer[oldServer] = removeRegion(
712           primariesOfRegionsPerServer[oldServer], primary);
713       }
714       primariesOfRegionsPerServer[newServer] = addRegionSorted(
715         primariesOfRegionsPerServer[newServer], primary);
716 
717       // update for hosts
718       if (multiServersPerHost) {
719         int oldHost = oldServer >= 0 ? serverIndexToHostIndex[oldServer] : -1;
720         int newHost = serverIndexToHostIndex[newServer];
721         if (newHost != oldHost) {
722           regionsPerHost[newHost] = addRegion(regionsPerHost[newHost], region);
723           primariesOfRegionsPerHost[newHost] = addRegionSorted(primariesOfRegionsPerHost[newHost], primary);
724           if (oldHost >= 0) {
725             regionsPerHost[oldHost] = removeRegion(regionsPerHost[oldHost], region);
726             primariesOfRegionsPerHost[oldHost] = removeRegion(
727               primariesOfRegionsPerHost[oldHost], primary); // will still be sorted
728           }
729         }
730       }
731
732       // update for racks
733       if (numRacks > 1) {
734         int oldRack = oldServer >= 0 ? serverIndexToRackIndex[oldServer] : -1;
735         int newRack = serverIndexToRackIndex[newServer];
736         if (newRack != oldRack) {
737           regionsPerRack[newRack] = addRegion(regionsPerRack[newRack], region);
738           primariesOfRegionsPerRack[newRack] = addRegionSorted(primariesOfRegionsPerRack[newRack], primary);
739           if (oldRack >= 0) {
740             regionsPerRack[oldRack] = removeRegion(regionsPerRack[oldRack], region);
741             primariesOfRegionsPerRack[oldRack] = removeRegion(
742               primariesOfRegionsPerRack[oldRack], primary); // will still be sorted
743           }
744         }
745       }
746     }
747
748     int[] removeRegion(int[] regions, int regionIndex) {
749       //TODO: this maybe costly. Consider using linked lists
750       int[] newRegions = new int[regions.length - 1];
751       int i = 0;
752       for (i = 0; i < regions.length; i++) {
753         if (regions[i] == regionIndex) {
754           break;
755         }
756         newRegions[i] = regions[i];
757       }
758       System.arraycopy(regions, i+1, newRegions, i, newRegions.length - i);
759       return newRegions;
760     }
761
762     int[] addRegion(int[] regions, int regionIndex) {
763       int[] newRegions = new int[regions.length + 1];
764       System.arraycopy(regions, 0, newRegions, 0, regions.length);
765       newRegions[newRegions.length - 1] = regionIndex;
766       return newRegions;
767     }
768
769     int[] addRegionSorted(int[] regions, int regionIndex) {
770       int[] newRegions = new int[regions.length + 1];
771       int i = 0;
772       for (i = 0; i < regions.length; i++) { // find the index to insert
773         if (regions[i] > regionIndex) {
774           break;
775         }
776       }
777       System.arraycopy(regions, 0, newRegions, 0, i); // copy first half
778       System.arraycopy(regions, i, newRegions, i+1, regions.length - i); // copy second half
779       newRegions[i] = regionIndex;
780
781       return newRegions;
782     }
783
784     int[] replaceRegion(int[] regions, int regionIndex, int newRegionIndex) {
785       int i = 0;
786       for (i = 0; i < regions.length; i++) {
787         if (regions[i] == regionIndex) {
788           regions[i] = newRegionIndex;
789           break;
790         }
791       }
792       return regions;
793     }
794
795     void sortServersByRegionCount() {
796       Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
797     }
798
799     int getNumRegions(int server) {
800       return regionsPerServer[server].length;
801     }
802
803     boolean contains(int[] arr, int val) {
804       return Arrays.binarySearch(arr, val) >= 0;
805     }
806
807     private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
808       @Override
809       public int compare(Integer integer, Integer integer2) {
810         return Integer.valueOf(getNumRegions(integer)).compareTo(getNumRegions(integer2));
811       }
812     };
813
814     void sortServersByLocality() {
815       Arrays.sort(serverIndicesSortedByLocality, localityComparator);
816     }
817
818     float getLocality(int server) {
819       return localityPerServer[server];
820     }
821
822     private Comparator<Integer> localityComparator = new Comparator<Integer>() {
823       @Override
824       public int compare(Integer integer, Integer integer2) {
825         float locality1 = getLocality(integer);
826         float locality2 = getLocality(integer2);
827         if (locality1 < locality2) {
828           return -1;
829         } else if (locality1 > locality2) {
830           return 1;
831         } else {
832           return 0;
833         }
834       }
835     };
836
837     int getLowestLocalityRegionServer() {
838       if (regionFinder == null) {
839         return -1;
840       } else {
841         sortServersByLocality();
842         // We want to find server with non zero regions having lowest locality.
843         int i = 0;
844         int lowestLocalityServerIndex = serverIndicesSortedByLocality[i];
845         while (localityPerServer[lowestLocalityServerIndex] == 0
846             && (regionsPerServer[lowestLocalityServerIndex].length == 0)) {
847           i++;
848           lowestLocalityServerIndex = serverIndicesSortedByLocality[i];
849         }
850         if (LOG.isTraceEnabled()) {
851           LOG.trace("Lowest locality region server with non zero regions is "
852             + servers[lowestLocalityServerIndex].getHostname() + " with locality "
853             + localityPerServer[lowestLocalityServerIndex]);
854         }
855         return lowestLocalityServerIndex;
856       }
857     }
858
859     int getLowestLocalityRegionOnServer(int serverIndex) {
860       if (regionFinder != null) {
861         float lowestLocality = 1.0f;
862         int lowestLocalityRegionIndex = -1;
863         if (regionsPerServer[serverIndex].length == 0) {
864           // No regions on that region server
865           return -1;
866         }
867         for (int j = 0; j < regionsPerServer[serverIndex].length; j++) {
868           int regionIndex = regionsPerServer[serverIndex][j];
869           HDFSBlocksDistribution distribution = regionFinder
870               .getBlockDistribution(regions[regionIndex]);
871           float locality = distribution.getBlockLocalityIndex(servers[serverIndex].getHostname());
872           // skip empty region
873           if (distribution.getUniqueBlocksTotalWeight() == 0) {
874             continue;
875           }
876           if (locality < lowestLocality) {
877             lowestLocality = locality;
878             lowestLocalityRegionIndex = j;
879           }
880         }
881         if (lowestLocalityRegionIndex == -1) {
882           return -1;
883         }
884         if (LOG.isTraceEnabled()) {
885           LOG.trace("Lowest locality region is "
886               + regions[regionsPerServer[serverIndex][lowestLocalityRegionIndex]]
887                   .getRegionNameAsString() + " with locality " + lowestLocality
888               + " and its region server contains " + regionsPerServer[serverIndex].length
889               + " regions");
890         }
891         return regionsPerServer[serverIndex][lowestLocalityRegionIndex];
892       } else {
893         return -1;
894       }
895     }
896
897     float getLocalityOfRegion(int region, int server) {
898       if (regionFinder != null) {
899         HDFSBlocksDistribution distribution = regionFinder.getBlockDistribution(regions[region]);
900         return distribution.getBlockLocalityIndex(servers[server].getHostname());
901       } else {
902         return 0f;
903       }
904     }
905
906     /**
907      * Returns a least loaded server which has better locality for this region
908      * than the current server.
909      */
910     int getLeastLoadedTopServerForRegion(int region, int currentServer) {
911       if (regionFinder != null) {
912         List<ServerName> topLocalServers = regionFinder.getTopBlockLocations(regions[region],
913           servers[currentServer].getHostname());
914         int leastLoadedServerIndex = -1;
915         int load = Integer.MAX_VALUE;
916         for (ServerName sn : topLocalServers) {
917           if (!serversToIndex.containsKey(sn.getHostAndPort())) {
918             continue;
919           }
920           int index = serversToIndex.get(sn.getHostAndPort());
921           if (regionsPerServer[index] == null) {
922             continue;
923           }
924           int tempLoad = regionsPerServer[index].length;
925           if (tempLoad <= load) {
926             leastLoadedServerIndex = index;
927             load = tempLoad;
928           }
929         }
930         if (leastLoadedServerIndex != -1) {
931           LOG.debug("Pick the least loaded server " + servers[leastLoadedServerIndex].getHostname()
932             + " with better locality for region " + regions[region]);
933         }
934         return leastLoadedServerIndex;
935       } else {
936         return -1;
937       }
938     }
939
940     void calculateRegionServerLocalities() {
941       if (regionFinder == null) {
942         LOG.warn("Region location finder found null, skipping locality calculations.");
943         return;
944       }
945       for (int i = 0; i < regionsPerServer.length; i++) {
946         HDFSBlocksDistribution distribution = new HDFSBlocksDistribution();
947         if (regionsPerServer[i].length > 0) {
948           for (int j = 0; j < regionsPerServer[i].length; j++) {
949             int regionIndex = regionsPerServer[i][j];
950             distribution.add(regionFinder.getBlockDistribution(regions[regionIndex]));
951           }
952         } else {
953           LOG.debug("Server " + servers[i].getHostname() + " had 0 regions.");
954         }
955         localityPerServer[i] = distribution.getBlockLocalityIndex(servers[i].getHostname());
956       }
957     }
958
959     @VisibleForTesting
960     protected void setNumRegions(int numRegions) {
961       this.numRegions = numRegions;
962     }
963
964     @VisibleForTesting
965     protected void setNumMovedRegions(int numMovedRegions) {
966       this.numMovedRegions = numMovedRegions;
967     }
968
969     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SBSC_USE_STRINGBUFFER_CONCATENATION",
970         justification="Not important but should be fixed")
971     @Override
972     public String toString() {
973       String desc = "Cluster{" +
974           "servers=[";
975           for(ServerName sn:servers) {
976              desc += sn.getHostAndPort() + ", ";
977           }
978           desc +=
979           ", serverIndicesSortedByRegionCount="+
980           Arrays.toString(serverIndicesSortedByRegionCount) +
981           ", regionsPerServer=[";
982
983           for (int[]r:regionsPerServer) {
984             desc += Arrays.toString(r);
985           }
986           desc += "]" +
987           ", numMaxRegionsPerTable=" +
988           Arrays.toString(numMaxRegionsPerTable) +
989           ", numRegions=" +
990           numRegions +
991           ", numServers=" +
992           numServers +
993           ", numTables=" +
994           numTables +
995           ", numMovedRegions=" +
996           numMovedRegions +
997           '}';
998       return desc;
999     }
1000   }
1001
1002   // slop for regions
1003   protected float slop;
1004   protected Configuration config;
1005   protected RackManager rackManager;
1006   private static final Random RANDOM = new Random(System.currentTimeMillis());
1007   private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
1008
1009   // Regions of these tables are put on the master by default.
1010   private static final String[] DEFAULT_TABLES_ON_MASTER =
1011     new String[] {AccessControlLists.ACL_TABLE_NAME.getNameAsString(),
1012       TableName.NAMESPACE_TABLE_NAME.getNameAsString(),
1013       TableName.META_TABLE_NAME.getNameAsString()};
1014
1015   public static final String TABLES_ON_MASTER =
1016     "hbase.balancer.tablesOnMaster";
1017
1018   protected final Set<String> tablesOnMaster = new HashSet<String>();
1019   protected MetricsBalancer metricsBalancer = null;
1020   protected ClusterStatus clusterStatus = null;
1021   protected ServerName masterServerName;
1022   protected MasterServices services;
1023
1024   /**
1025    * By default, regions of some small system tables such as meta,
1026    * namespace, and acl are assigned to the active master. If you don't
1027    * want to assign any region to the active master, you need to
1028    * configure "hbase.balancer.tablesOnMaster" to "none".
1029    */
1030   protected static String[] getTablesOnMaster(Configuration conf) {
1031     String valueString = conf.get(TABLES_ON_MASTER);
1032     if (valueString == null) {
1033       return DEFAULT_TABLES_ON_MASTER;
1034     }
1035     valueString = valueString.trim();
1036     if (valueString.equalsIgnoreCase("none")) {
1037       return null;
1038     }
1039     return StringUtils.getStrings(valueString);
1040   }
1041
1042   /**
1043    * Check if configured to put any tables on the active master
1044    */
1045   public static boolean tablesOnMaster(Configuration conf) {
1046     String[] tables = getTablesOnMaster(conf);
1047     return tables != null && tables.length > 0;
1048   }
1049
1050   public static boolean userTablesOnMaster(Configuration conf) {
1051     String[] tables = getTablesOnMaster(conf);
1052     if (tables == null || tables.length == 0) {
1053       return false;
1054     }
1055     for (String tn:tables) {
1056       if (!tn.startsWith("hbase:")) {
1057         return true;
1058       }
1059     }
1060     return false;
1061   }
1062
1063   @Override
1064   public void setConf(Configuration conf) {
1065     setSlop(conf);
1066     if (slop < 0) slop = 0;
1067     else if (slop > 1) slop = 1;
1068
1069     this.config = conf;
1070     String[] tables = getTablesOnMaster(conf);
1071     if (tables != null && tables.length > 0) {
1072       Collections.addAll(tablesOnMaster, tables);
1073     }
1074     this.rackManager = new RackManager(getConf());
1075     regionFinder.setConf(conf);
1076   }
1077
1078   protected void setSlop(Configuration conf) {
1079     this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
1080   }
1081
1082   /**
1083    * Check if a region belongs to some small system table.
1084    * If so, the primary replica may be expected to be put on the master regionserver.
1085    */
1086   public boolean shouldBeOnMaster(HRegionInfo region) {
1087     return tablesOnMaster.contains(region.getTable().getNameAsString())
1088         && region.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID;
1089   }
1090
1091   /**
1092    * Balance the regions that should be on master regionserver.
1093    */
1094   protected List<RegionPlan> balanceMasterRegions(
1095       Map<ServerName, List<HRegionInfo>> clusterMap) {
1096     if (masterServerName == null
1097         || clusterMap == null || clusterMap.size() <= 1) return null;
1098     List<RegionPlan> plans = null;
1099     List<HRegionInfo> regions = clusterMap.get(masterServerName);
1100     if (regions != null) {
1101       Iterator<ServerName> keyIt = null;
1102       for (HRegionInfo region: regions) {
1103         if (shouldBeOnMaster(region)) continue;
1104
1105         // Find a non-master regionserver to host the region
1106         if (keyIt == null || !keyIt.hasNext()) {
1107           keyIt = clusterMap.keySet().iterator();
1108         }
1109         ServerName dest = keyIt.next();
1110         if (masterServerName.equals(dest)) {
1111           if (!keyIt.hasNext()) {
1112             keyIt = clusterMap.keySet().iterator();
1113           }
1114           dest = keyIt.next();
1115         }
1116
1117         // Move this region away from the master regionserver
1118         RegionPlan plan = new RegionPlan(region, masterServerName, dest);
1119         if (plans == null) {
1120           plans = new ArrayList<RegionPlan>();
1121         }
1122         plans.add(plan);
1123       }
1124     }
1125     for (Map.Entry<ServerName, List<HRegionInfo>> server: clusterMap.entrySet()) {
1126       if (masterServerName.equals(server.getKey())) continue;
1127       for (HRegionInfo region: server.getValue()) {
1128         if (!shouldBeOnMaster(region)) continue;
1129
1130         // Move this region to the master regionserver
1131         RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName);
1132         if (plans == null) {
1133           plans = new ArrayList<RegionPlan>();
1134         }
1135         plans.add(plan);
1136       }
1137     }
1138     return plans;
1139   }
1140
1141   /**
1142    * Assign the regions that should be on master regionserver.
1143    */
1144   protected Map<ServerName, List<HRegionInfo>> assignMasterRegions(
1145       Collection<HRegionInfo> regions, List<ServerName> servers) {
1146     if (servers == null || regions == null || regions.isEmpty()) {
1147       return null;
1148     }
1149     Map<ServerName, List<HRegionInfo>> assignments
1150       = new TreeMap<ServerName, List<HRegionInfo>>();
1151     if (masterServerName != null && servers.contains(masterServerName)) {
1152       assignments.put(masterServerName, new ArrayList<HRegionInfo>());
1153       for (HRegionInfo region: regions) {
1154         if (shouldBeOnMaster(region)) {
1155           assignments.get(masterServerName).add(region);
1156         }
1157       }
1158     }
1159     return assignments;
1160   }
1161
1162   @Override
1163   public Configuration getConf() {
1164     return this.config;
1165   }
1166
1167   @Override
1168   public synchronized void setClusterStatus(ClusterStatus st) {
1169     this.clusterStatus = st;
1170     regionFinder.setClusterStatus(st);
1171   }
1172
1173   @Override
1174   public void setMasterServices(MasterServices masterServices) {
1175     masterServerName = masterServices.getServerName();
1176     this.services = masterServices;
1177     this.regionFinder.setServices(masterServices);
1178   }
1179
1180   public void setRackManager(RackManager rackManager) {
1181     this.rackManager = rackManager;
1182   }
1183
1184   protected boolean needsBalance(Cluster c) {
1185     ClusterLoadState cs = new ClusterLoadState(c.clusterState);
1186     if (cs.getNumServers() < MIN_SERVER_BALANCE) {
1187       if (LOG.isDebugEnabled()) {
1188         LOG.debug("Not running balancer because only " + cs.getNumServers()
1189             + " active regionserver(s)");
1190       }
1191       return false;
1192     }
1193     if(areSomeRegionReplicasColocated(c)) return true;
1194     // Check if we even need to do any load balancing
1195     // HBASE-3681 check sloppiness first
1196     float average = cs.getLoadAverage(); // for logging
1197     int floor = (int) Math.floor(average * (1 - slop));
1198     int ceiling = (int) Math.ceil(average * (1 + slop));
1199     if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
1200       NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
1201       if (LOG.isTraceEnabled()) {
1202         // If nothing to balance, then don't say anything unless trace-level logging.
1203         LOG.trace("Skipping load balancing because balanced cluster; " +
1204           "servers=" + cs.getNumServers() +
1205           " regions=" + cs.getNumRegions() + " average=" + average +
1206           " mostloaded=" + serversByLoad.lastKey().getLoad() +
1207           " leastloaded=" + serversByLoad.firstKey().getLoad());
1208       }
1209       return false;
1210     }
1211     return true;
1212   }
1213
1214   /**
1215    * Subclasses should implement this to return true if the cluster has nodes that hosts
1216    * multiple replicas for the same region, or, if there are multiple racks and the same
1217    * rack hosts replicas of the same region
1218    * @param c Cluster information
1219    * @return whether region replicas are currently co-located
1220    */
1221   protected boolean areSomeRegionReplicasColocated(Cluster c) {
1222     return false;
1223   }
1224
1225   /**
1226    * Generates a bulk assignment plan to be used on cluster startup using a
1227    * simple round-robin assignment.
1228    * <p>
1229    * Takes a list of all the regions and all the servers in the cluster and
1230    * returns a map of each server to the regions that it should be assigned.
1231    * <p>
1232    * Currently implemented as a round-robin assignment. Same invariant as load
1233    * balancing, all servers holding floor(avg) or ceiling(avg).
1234    *
1235    * TODO: Use block locations from HDFS to place regions with their blocks
1236    *
1237    * @param regions all regions
1238    * @param servers all servers
1239    * @return map of server to the regions it should take, or null if no
1240    *         assignment is possible (ie. no regions or no servers)
1241    */
1242   @Override
1243   public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
1244       List<ServerName> servers) {
1245     metricsBalancer.incrMiscInvocations();
1246     Map<ServerName, List<HRegionInfo>> assignments = assignMasterRegions(regions, servers);
1247     if (assignments != null && !assignments.isEmpty()) {
1248       servers = new ArrayList<ServerName>(servers);
1249       // Guarantee not to put other regions on master
1250       servers.remove(masterServerName);
1251       List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1252       if (!masterRegions.isEmpty()) {
1253         regions = new ArrayList<HRegionInfo>(regions);
1254         for (HRegionInfo region: masterRegions) {
1255           regions.remove(region);
1256         }
1257       }
1258     }
1259     if (regions == null || regions.isEmpty()) {
1260       return assignments;
1261     }
1262
1263     int numServers = servers == null ? 0 : servers.size();
1264     if (numServers == 0) {
1265       LOG.warn("Wanted to do round robin assignment but no servers to assign to");
1266       return null;
1267     }
1268
1269     // TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the
1270     // normal LB.balancerCluster() with unassignedRegions. We only need to have a candidate
1271     // generator for AssignRegionAction. The LB will ensure the regions are mostly local
1272     // and balanced. This should also run fast with fewer number of iterations.
1273
1274     if (numServers == 1) { // Only one server, nothing fancy we can do here
1275       ServerName server = servers.get(0);
1276       assignments.put(server, new ArrayList<HRegionInfo>(regions));
1277       return assignments;
1278     }
1279
1280     Cluster cluster = createCluster(servers, regions);
1281     List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>();
1282
1283     roundRobinAssignment(cluster, regions, unassignedRegions,
1284       servers, assignments);
1285
1286     List<HRegionInfo> lastFewRegions = new ArrayList<HRegionInfo>();
1287     // assign the remaining by going through the list and try to assign to servers one-by-one
1288     int serverIdx = RANDOM.nextInt(numServers);
1289     for (HRegionInfo region : unassignedRegions) {
1290       boolean assigned = false;
1291       for (int j = 0; j < numServers; j++) { // try all servers one by one
1292         ServerName serverName = servers.get((j + serverIdx) % numServers);
1293         if (!cluster.wouldLowerAvailability(region, serverName)) {
1294           List<HRegionInfo> serverRegions = assignments.get(serverName);
1295           if (serverRegions == null) {
1296             serverRegions = new ArrayList<HRegionInfo>();
1297             assignments.put(serverName, serverRegions);
1298           }
1299           serverRegions.add(region);
1300           cluster.doAssignRegion(region, serverName);
1301           serverIdx = (j + serverIdx + 1) % numServers; //remain from next server
1302           assigned = true;
1303           break;
1304         }
1305       }
1306       if (!assigned) {
1307         lastFewRegions.add(region);
1308       }
1309     }
1310     // just sprinkle the rest of the regions on random regionservers. The balanceCluster will
1311     // make it optimal later. we can end up with this if numReplicas > numServers.
1312     for (HRegionInfo region : lastFewRegions) {
1313       int i = RANDOM.nextInt(numServers);
1314       ServerName server = servers.get(i);
1315       List<HRegionInfo> serverRegions = assignments.get(server);
1316       if (serverRegions == null) {
1317         serverRegions = new ArrayList<HRegionInfo>();
1318         assignments.put(server, serverRegions);
1319       }
1320       serverRegions.add(region);
1321       cluster.doAssignRegion(region, server);
1322     }
1323     return assignments;
1324   }
1325
1326   protected Cluster createCluster(List<ServerName> servers,
1327       Collection<HRegionInfo> regions) {
1328     // Get the snapshot of the current assignments for the regions in question, and then create
1329     // a cluster out of it. Note that we might have replicas already assigned to some servers
1330     // earlier. So we want to get the snapshot to see those assignments, but this will only contain
1331     // replicas of the regions that are passed (for performance).
1332     Map<ServerName, List<HRegionInfo>> clusterState = getRegionAssignmentsByServer(regions);
1333
1334     for (ServerName server : servers) {
1335       if (!clusterState.containsKey(server)) {
1336         clusterState.put(server, EMPTY_REGION_LIST);
1337       }
1338     }
1339     return new Cluster(regions, clusterState, null, this.regionFinder,
1340       rackManager);
1341   }
1342
1343   /**
1344    * Used to assign a single region to a random server.
1345    */
1346   @Override
1347   public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
1348     metricsBalancer.incrMiscInvocations();
1349     if (servers != null && servers.contains(masterServerName)) {
1350       if (shouldBeOnMaster(regionInfo)) {
1351         return masterServerName;
1352       }
1353       servers = new ArrayList<ServerName>(servers);
1354       // Guarantee not to put other regions on master
1355       servers.remove(masterServerName);
1356     }
1357
1358     int numServers = servers == null ? 0 : servers.size();
1359     if (numServers == 0) {
1360       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1361       return null;
1362     }
1363     if (numServers == 1) { // Only one server, nothing fancy we can do here
1364       return servers.get(0);
1365     }
1366
1367     List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
1368     Cluster cluster = createCluster(servers, regions);
1369     return randomAssignment(cluster, regionInfo, servers);
1370   }
1371
1372   /**
1373    * Generates a bulk assignment startup plan, attempting to reuse the existing
1374    * assignment information from META, but adjusting for the specified list of
1375    * available/online servers available for assignment.
1376    * <p>
1377    * Takes a map of all regions to their existing assignment from META. Also
1378    * takes a list of online servers for regions to be assigned to. Attempts to
1379    * retain all assignment, so in some instances initial assignment will not be
1380    * completely balanced.
1381    * <p>
1382    * Any leftover regions without an existing server to be assigned to will be
1383    * assigned randomly to available servers.
1384    *
1385    * @param regions regions and existing assignment from meta
1386    * @param servers available servers
1387    * @return map of servers and regions to be assigned to them
1388    */
1389   @Override
1390   public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
1391       List<ServerName> servers) {
1392     // Update metrics
1393     metricsBalancer.incrMiscInvocations();
1394     Map<ServerName, List<HRegionInfo>> assignments
1395       = assignMasterRegions(regions.keySet(), servers);
1396     if (assignments != null && !assignments.isEmpty()) {
1397       servers = new ArrayList<ServerName>(servers);
1398       // Guarantee not to put other regions on master
1399       servers.remove(masterServerName);
1400       List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1401       if (!masterRegions.isEmpty()) {
1402         regions = new HashMap<HRegionInfo, ServerName>(regions);
1403         for (HRegionInfo region: masterRegions) {
1404           regions.remove(region);
1405         }
1406       }
1407     }
1408     if (regions == null || regions.isEmpty()) {
1409       return assignments;
1410     }
1411
1412     int numServers = servers == null ? 0 : servers.size();
1413     if (numServers == 0) {
1414       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1415       return null;
1416     }
1417     if (numServers == 1) { // Only one server, nothing fancy we can do here
1418       ServerName server = servers.get(0);
1419       assignments.put(server, new ArrayList<HRegionInfo>(regions.keySet()));
1420       return assignments;
1421     }
1422
1423     // Group all of the old assignments by their hostname.
1424     // We can't group directly by ServerName since the servers all have
1425     // new start-codes.
1426
1427     // Group the servers by their hostname. It's possible we have multiple
1428     // servers on the same host on different ports.
1429     ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
1430     for (ServerName server : servers) {
1431       assignments.put(server, new ArrayList<HRegionInfo>());
1432       serversByHostname.put(server.getHostname(), server);
1433     }
1434
1435     // Collection of the hostnames that used to have regions
1436     // assigned, but for which we no longer have any RS running
1437     // after the cluster restart.
1438     Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
1439
1440     int numRandomAssignments = 0;
1441     int numRetainedAssigments = 0;
1442
1443     Cluster cluster = createCluster(servers, regions.keySet());
1444
1445     for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
1446       HRegionInfo region = entry.getKey();
1447       ServerName oldServerName = entry.getValue();
1448       List<ServerName> localServers = new ArrayList<ServerName>();
1449       if (oldServerName != null) {
1450         localServers = serversByHostname.get(oldServerName.getHostname());
1451       }
1452       if (localServers.isEmpty()) {
1453         // No servers on the new cluster match up with this hostname,
1454         // assign randomly.
1455         ServerName randomServer = randomAssignment(cluster, region, servers);
1456         assignments.get(randomServer).add(region);
1457         numRandomAssignments++;
1458         if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
1459       } else if (localServers.size() == 1) {
1460         // the usual case - one new server on same host
1461         ServerName target = localServers.get(0);
1462         assignments.get(target).add(region);
1463         cluster.doAssignRegion(region, target);
1464         numRetainedAssigments++;
1465       } else {
1466         // multiple new servers in the cluster on this same host
1467         if (localServers.contains(oldServerName)) {
1468           assignments.get(oldServerName).add(region);
1469           cluster.doAssignRegion(region, oldServerName);
1470         } else {
1471           ServerName target = null;
1472           for (ServerName tmp: localServers) {
1473             if (tmp.getPort() == oldServerName.getPort()) {
1474               target = tmp;
1475               break;
1476             }
1477           }
1478           if (target == null) {
1479             target = randomAssignment(cluster, region, localServers);
1480           }
1481           assignments.get(target).add(region);
1482         }
1483         numRetainedAssigments++;
1484       }
1485     }
1486
1487     String randomAssignMsg = "";
1488     if (numRandomAssignments > 0) {
1489       randomAssignMsg =
1490           numRandomAssignments + " regions were assigned "
1491               + "to random hosts, since the old hosts for these regions are no "
1492               + "longer present in the cluster. These hosts were:\n  "
1493               + Joiner.on("\n  ").join(oldHostsNoLongerPresent);
1494     }
1495
1496     LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
1497         + " retained the pre-restart assignment. " + randomAssignMsg);
1498     return assignments;
1499   }
1500
1501   @Override
1502   public void initialize() throws HBaseIOException{
1503   }
1504
1505   @Override
1506   public void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1507   }
1508
1509   @Override
1510   public void regionOffline(HRegionInfo regionInfo) {
1511   }
1512
1513   @Override
1514   public boolean isStopped() {
1515     return stopped;
1516   }
1517
1518   @Override
1519   public void stop(String why) {
1520     LOG.info("Load Balancer stop requested: "+why);
1521     stopped = true;
1522   }
1523
1524   /**
1525    * Used to assign a single region to a random server.
1526    */
1527   private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo,
1528       List<ServerName> servers) {
1529     int numServers = servers.size(); // servers is not null, numServers > 1
1530     ServerName sn = null;
1531     final int maxIterations = numServers * 4;
1532     int iterations = 0;
1533
1534     do {
1535       int i = RANDOM.nextInt(numServers);
1536       sn = servers.get(i);
1537     } while (cluster.wouldLowerAvailability(regionInfo, sn)
1538         && iterations++ < maxIterations);
1539     cluster.doAssignRegion(regionInfo, sn);
1540     return sn;
1541   }
1542
1543   /**
1544    * Round robin a list of regions to a list of servers
1545    */
1546   private void roundRobinAssignment(Cluster cluster, List<HRegionInfo> regions,
1547       List<HRegionInfo> unassignedRegions, List<ServerName> servers,
1548       Map<ServerName, List<HRegionInfo>> assignments) {
1549
1550     int numServers = servers.size();
1551     int numRegions = regions.size();
1552     int max = (int) Math.ceil((float) numRegions / numServers);
1553     int serverIdx = 0;
1554     if (numServers > 1) {
1555       serverIdx = RANDOM.nextInt(numServers);
1556     }
1557     int regionIdx = 0;
1558
1559     for (int j = 0; j < numServers; j++) {
1560       ServerName server = servers.get((j + serverIdx) % numServers);
1561       List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
1562       for (int i = regionIdx; i < numRegions; i += numServers) {
1563         HRegionInfo region = regions.get(i % numRegions);
1564         if (cluster.wouldLowerAvailability(region, server)) {
1565           unassignedRegions.add(region);
1566         } else {
1567           serverRegions.add(region);
1568           cluster.doAssignRegion(region, server);
1569         }
1570       }
1571       assignments.put(server, serverRegions);
1572       regionIdx++;
1573     }
1574   }
1575
1576   protected Map<ServerName, List<HRegionInfo>> getRegionAssignmentsByServer(
1577     Collection<HRegionInfo> regions) {
1578     if (this.services != null && this.services.getAssignmentManager() != null) {
1579       return this.services.getAssignmentManager().getSnapShotOfAssignment(regions);
1580     } else {
1581       return new HashMap<ServerName, List<HRegionInfo>>();
1582     }
1583   }
1584
1585   @Override
1586   public void onConfigurationChange(Configuration conf) {
1587   }
1588 }