View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayDeque;
21  import java.util.Arrays;
22  import java.util.Collection;
23  import java.util.Deque;
24  import java.util.HashMap;
25  import java.util.LinkedList;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.Map.Entry;
29  import java.util.Random;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
34  import org.apache.hadoop.classification.InterfaceAudience;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.hbase.ClusterStatus;
37  import org.apache.hadoop.hbase.HRegionInfo;
38  import org.apache.hadoop.hbase.RegionLoad;
39  import org.apache.hadoop.hbase.ServerLoad;
40  import org.apache.hadoop.hbase.ServerName;
41  import org.apache.hadoop.hbase.master.MasterServices;
42  import org.apache.hadoop.hbase.master.RegionPlan;
43  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action;
44  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
45  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.AssignRegionAction;
46  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.MoveRegionAction;
47  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.SwapRegionsAction;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
50  
51  /**
52   * <p>This is a best effort load balancer. Given a Cost function F(C) => x It will
53   * randomly try and mutate the cluster to Cprime. If F(Cprime) < F(C) then the
54   * new cluster state becomes the plan. It includes costs functions to compute the cost of:</p>
55   * <ul>
56   * <li>Region Load</li>
57   * <li>Table Load</li>
58   * <li>Data Locality</li>
59   * <li>Memstore Sizes</li>
60   * <li>Storefile Sizes</li>
61   * </ul>
62   *
63   *
64   * <p>Every cost function returns a number between 0 and 1 inclusive; where 0 is the lowest cost
65   * best solution, and 1 is the highest possible cost and the worst solution.  The computed costs are
66   * scaled by their respective multipliers:</p>
67   *
68   * <ul>
69   *   <li>hbase.master.balancer.stochastic.regionLoadCost</li>
70   *   <li>hbase.master.balancer.stochastic.moveCost</li>
71   *   <li>hbase.master.balancer.stochastic.tableLoadCost</li>
72   *   <li>hbase.master.balancer.stochastic.localityCost</li>
73   *   <li>hbase.master.balancer.stochastic.memstoreSizeCost</li>
74   *   <li>hbase.master.balancer.stochastic.storefileSizeCost</li>
75   * </ul>
76   *
77   * <p>In addition to the above configurations, the balancer can be tuned by the following
78   * configuration values:</p>
79   * <ul>
80   *   <li>hbase.master.balancer.stochastic.maxMoveRegions which
81   *   controls what the max number of regions that can be moved in a single invocation of this
82   *   balancer.</li>
83   *   <li>hbase.master.balancer.stochastic.stepsPerRegion is the coefficient by which the number of
84   *   regions is multiplied to try and get the number of times the balancer will
85   *   mutate all servers.</li>
86   *   <li>hbase.master.balancer.stochastic.maxSteps which controls the maximum number of times that
87   *   the balancer will try and mutate all the servers. The balancer will use the minimum of this
88   *   value and the above computation.</li>
89   * </ul>
90   *
91   * <p>This balancer is best used with hbase.master.loadbalance.bytable set to false
92   * so that the balancer gets the full picture of all loads on the cluster.</p>
93   */
94  @InterfaceAudience.Private
95  public class StochasticLoadBalancer extends BaseLoadBalancer {
96  
97    protected static final String STEPS_PER_REGION_KEY =
98        "hbase.master.balancer.stochastic.stepsPerRegion";
99    protected static final String MAX_STEPS_KEY =
100       "hbase.master.balancer.stochastic.maxSteps";
101   protected static final String MAX_RUNNING_TIME_KEY =
102       "hbase.master.balancer.stochastic.maxRunningTime";
103   protected static final String KEEP_REGION_LOADS =
104       "hbase.master.balancer.stochastic.numRegionLoadsToRemember";
105 
106   private static final Random RANDOM = new Random(System.currentTimeMillis());
107   private static final Log LOG = LogFactory.getLog(StochasticLoadBalancer.class);
108 
109   Map<String, Deque<RegionLoad>> loads = new HashMap<String, Deque<RegionLoad>>();
110 
111   // values are defaults
112   private int maxSteps = 1000000;
113   private int stepsPerRegion = 800;
114   private long maxRunningTime = 30 * 1000 * 1; // 30 seconds.
115   private int numRegionLoadsToRemember = 15;
116 
117   private CandidateGenerator[] candidateGenerators;
118   private CostFromRegionLoadFunction[] regionLoadFunctions;
119   private CostFunction[] costFunctions;
120   // Keep locality based picker and cost function to alert them
121   // when new services are offered
122   private LocalityBasedCandidateGenerator localityCandidateGenerator;
123   private LocalityCostFunction localityCost;
124   private RegionReplicaHostCostFunction regionReplicaHostCostFunction;
125   private RegionReplicaRackCostFunction regionReplicaRackCostFunction;
126 
127   @Override
128   public void setConf(Configuration conf) {
129     super.setConf(conf);
130 
131     maxSteps = conf.getInt(MAX_STEPS_KEY, maxSteps);
132 
133     stepsPerRegion = conf.getInt(STEPS_PER_REGION_KEY, stepsPerRegion);
134     maxRunningTime = conf.getLong(MAX_RUNNING_TIME_KEY, maxRunningTime);
135 
136     numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember);
137 
138     localityCandidateGenerator = new LocalityBasedCandidateGenerator(services);
139     localityCost = new LocalityCostFunction(conf, services);
140 
141     candidateGenerators = new CandidateGenerator[] {
142       new RandomCandidateGenerator(),
143       new LoadCandidateGenerator(),
144       localityCandidateGenerator,
145       new RegionReplicaRackCandidateGenerator(),
146     };
147 
148     regionLoadFunctions = new CostFromRegionLoadFunction[] {
149       new ReadRequestCostFunction(conf),
150       new WriteRequestCostFunction(conf),
151       new MemstoreSizeCostFunction(conf),
152       new StoreFileCostFunction(conf)
153     };
154 
155     regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
156     regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
157 
158     costFunctions = new CostFunction[]{
159       new RegionCountSkewCostFunction(conf, activeMasterWeight, backupMasterWeight),
160       new MoveCostFunction(conf),
161       localityCost,
162       new TableSkewCostFunction(conf),
163       regionReplicaHostCostFunction,
164       regionReplicaRackCostFunction,
165       regionLoadFunctions[0],
166       regionLoadFunctions[1],
167       regionLoadFunctions[2],
168       regionLoadFunctions[3],
169     };
170   }
171 
172   @Override
173   protected void setSlop(Configuration conf) {
174     this.slop = conf.getFloat("hbase.regions.slop", 0.001F);
175   }
176 
177   @Override
178   public void setClusterStatus(ClusterStatus st) {
179     super.setClusterStatus(st);
180     updateRegionLoad();
181     for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
182       cost.setClusterStatus(st);
183     }
184   }
185 
186   @Override
187   public void setMasterServices(MasterServices masterServices) {
188     super.setMasterServices(masterServices);
189     this.localityCost.setServices(masterServices);
190     this.localityCandidateGenerator.setServices(masterServices);
191 
192   }
193 
194   @Override
195   protected boolean areSomeRegionReplicasColocated(Cluster c) {
196     regionReplicaHostCostFunction.init(c);
197     if (regionReplicaHostCostFunction.cost() > 0) return true;
198     regionReplicaRackCostFunction.init(c);
199     if (regionReplicaRackCostFunction.cost() > 0) return true;
200     return false;
201   }
202 
203   /**
204    * Given the cluster state this will try and approach an optimal balance. This
205    * should always approach the optimal state given enough steps.
206    */
207   @Override
208   public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
209     List<RegionPlan> plans = balanceMasterRegions(clusterState);
210     if (plans != null) {
211       return plans;
212     }
213     filterExcludedServers(clusterState);
214     //The clusterState that is given to this method contains the state
215     //of all the regions in the table(s) (that's true today)
216     // Keep track of servers to iterate through them.
217     Cluster cluster = new Cluster(masterServerName,
218       clusterState, loads, regionFinder, getBackupMasters(), tablesOnMaster, rackManager);
219     if (!needsBalance(cluster)) {
220       return null;
221     }
222 
223     long startTime = EnvironmentEdgeManager.currentTimeMillis();
224 
225     initCosts(cluster);
226 
227     double currentCost = computeCost(cluster, Double.MAX_VALUE);
228 
229     double initCost = currentCost;
230     double newCost = currentCost;
231 
232     long computedMaxSteps = Math.min(this.maxSteps,
233         ((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers));
234     // Perform a stochastic walk to see if we can get a good fit.
235     long step;
236 
237     for (step = 0; step < computedMaxSteps; step++) {
238       int generatorIdx = RANDOM.nextInt(candidateGenerators.length);
239       CandidateGenerator p = candidateGenerators[generatorIdx];
240       Cluster.Action action = p.generate(cluster);
241 
242       if (action.type == Type.NULL) {
243         continue;
244       }
245 
246       cluster.doAction(action);
247       updateCostsWithAction(cluster, action);
248 
249       newCost = computeCost(cluster, currentCost);
250 
251       // Should this be kept?
252       if (newCost < currentCost) {
253         currentCost = newCost;
254       } else {
255         // Put things back the way they were before.
256         // TODO: undo by remembering old values
257         Action undoAction = action.undoAction();
258         cluster.doAction(undoAction);
259         updateCostsWithAction(cluster, undoAction);
260       }
261 
262       if (EnvironmentEdgeManager.currentTimeMillis() - startTime >
263           maxRunningTime) {
264         break;
265       }
266     }
267 
268     long endTime = EnvironmentEdgeManager.currentTimeMillis();
269 
270     metricsBalancer.balanceCluster(endTime - startTime);
271 
272     if (initCost > currentCost) {
273       plans = createRegionPlans(cluster);
274       if (LOG.isDebugEnabled()) {
275         LOG.debug("Finished computing new load balance plan.  Computation took "
276             + (endTime - startTime) + "ms to try " + step
277             + " different iterations.  Found a solution that moves "
278             + plans.size() + " regions; Going from a computed cost of "
279             + initCost + " to a new cost of " + currentCost);
280       }
281       return plans;
282     }
283     if (LOG.isDebugEnabled()) {
284       LOG.debug("Could not find a better load balance plan.  Tried "
285           + step + " different configurations in " + (endTime - startTime)
286           + "ms, and did not find anything with a computed cost less than " + initCost);
287     }
288     return null;
289   }
290 
291   /**
292    * Create all of the RegionPlan's needed to move from the initial cluster state to the desired
293    * state.
294    *
295    * @param cluster The state of the cluster
296    * @return List of RegionPlan's that represent the moves needed to get to desired final state.
297    */
298   private List<RegionPlan> createRegionPlans(Cluster cluster) {
299     List<RegionPlan> plans = new LinkedList<RegionPlan>();
300     for (int regionIndex = 0;
301          regionIndex < cluster.regionIndexToServerIndex.length; regionIndex++) {
302       int initialServerIndex = cluster.initialRegionIndexToServerIndex[regionIndex];
303       int newServerIndex = cluster.regionIndexToServerIndex[regionIndex];
304 
305       if (initialServerIndex != newServerIndex) {
306         HRegionInfo region = cluster.regions[regionIndex];
307         ServerName initialServer = cluster.servers[initialServerIndex];
308         ServerName newServer = cluster.servers[newServerIndex];
309 
310         if (LOG.isTraceEnabled()) {
311           LOG.trace("Moving Region " + region.getEncodedName() + " from server "
312               + initialServer.getHostname() + " to " + newServer.getHostname());
313         }
314         RegionPlan rp = new RegionPlan(region, initialServer, newServer);
315         plans.add(rp);
316       }
317     }
318     return plans;
319   }
320 
321   /**
322    * Store the current region loads.
323    */
324   private synchronized void updateRegionLoad() {
325     // We create a new hashmap so that regions that are no longer there are removed.
326     // However we temporarily need the old loads so we can use them to keep the rolling average.
327     Map<String, Deque<RegionLoad>> oldLoads = loads;
328     loads = new HashMap<String, Deque<RegionLoad>>();
329 
330     for (ServerName sn : clusterStatus.getServers()) {
331       ServerLoad sl = clusterStatus.getLoad(sn);
332       if (sl == null) {
333         continue;
334       }
335       for (Entry<byte[], RegionLoad> entry : sl.getRegionsLoad().entrySet()) {
336         Deque<RegionLoad> rLoads = oldLoads.get(Bytes.toString(entry.getKey()));
337         if (rLoads == null) {
338           // There was nothing there
339           rLoads = new ArrayDeque<RegionLoad>();
340         } else if (rLoads.size() >= 15) {
341           rLoads.remove();
342         }
343         rLoads.add(entry.getValue());
344         loads.put(Bytes.toString(entry.getKey()), rLoads);
345 
346       }
347     }
348 
349     for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
350       cost.setLoads(loads);
351     }
352   }
353 
354   protected void initCosts(Cluster cluster) {
355     for (CostFunction c:costFunctions) {
356       c.init(cluster);
357     }
358   }
359 
360   protected void updateCostsWithAction(Cluster cluster, Action action) {
361     for (CostFunction c : costFunctions) {
362       c.postAction(action);
363     }
364   }
365 
366   /**
367    * This is the main cost function.  It will compute a cost associated with a proposed cluster
368    * state.  All different costs will be combined with their multipliers to produce a double cost.
369    *
370    * @param cluster The state of the cluster
371    * @param previousCost the previous cost. This is used as an early out.
372    * @return a double of a cost associated with the proposed cluster state.  This cost is an
373    *         aggregate of all individual cost functions.
374    */
375   protected double computeCost(Cluster cluster, double previousCost) {
376     double total = 0;
377 
378     for (CostFunction c:costFunctions) {
379       if (c.getMultiplier() <= 0) {
380         continue;
381       }
382 
383       total += c.getMultiplier() * c.cost();
384 
385       if (total > previousCost) {
386         return total;
387       }
388     }
389     return total;
390   }
391 
392   /** Generates a candidate action to be applied to the cluster for cost function search */
393   abstract static class CandidateGenerator {
394     abstract Cluster.Action generate(Cluster cluster);
395 
396     /**
397      * From a list of regions pick a random one. Null can be returned which
398      * {@link StochasticLoadBalancer#balanceCluster(Map)} recognize as signal to try a region move
399      * rather than swap.
400      *
401      * @param cluster        The state of the cluster
402      * @param server         index of the server
403      * @param chanceOfNoSwap Chance that this will decide to try a move rather
404      *                       than a swap.
405      * @return a random {@link HRegionInfo} or null if an asymmetrical move is
406      *         suggested.
407      */
408     protected int pickRandomRegion(Cluster cluster, int server, double chanceOfNoSwap) {
409       // Check to see if this is just a move.
410       if (cluster.regionsPerServer[server].length == 0 || RANDOM.nextFloat() < chanceOfNoSwap) {
411         // signal a move only.
412         return -1;
413       }
414       int rand = RANDOM.nextInt(cluster.regionsPerServer[server].length);
415       return cluster.regionsPerServer[server][rand];
416 
417     }
418     protected int pickRandomServer(Cluster cluster) {
419       if (cluster.numServers < 1) {
420         return -1;
421       }
422 
423       return RANDOM.nextInt(cluster.numServers);
424     }
425 
426     protected int pickRandomRack(Cluster cluster) {
427       if (cluster.numRacks < 1) {
428         return -1;
429       }
430 
431       return RANDOM.nextInt(cluster.numRacks);
432     }
433 
434     protected int pickOtherRandomServer(Cluster cluster, int serverIndex) {
435       if (cluster.numServers < 2) {
436         return -1;
437       }
438       while (true) {
439         int otherServerIndex = pickRandomServer(cluster);
440         if (otherServerIndex != serverIndex) {
441           return otherServerIndex;
442         }
443       }
444     }
445 
446     protected int pickOtherRandomRack(Cluster cluster, int rackIndex) {
447       if (cluster.numRacks < 2) {
448         return -1;
449       }
450       while (true) {
451         int otherRackIndex = pickRandomRack(cluster);
452         if (otherRackIndex != rackIndex) {
453           return otherRackIndex;
454         }
455       }
456     }
457 
458     protected Cluster.Action pickRandomRegions(Cluster cluster,
459                                                        int thisServer,
460                                                        int otherServer) {
461       if (thisServer < 0 || otherServer < 0) {
462         return Cluster.NullAction;
463       }
464 
465       // Decide who is most likely to need another region
466       int thisRegionCount = cluster.getNumRegions(thisServer);
467       int otherRegionCount = cluster.getNumRegions(otherServer);
468 
469       // Assign the chance based upon the above
470       double thisChance = (thisRegionCount > otherRegionCount) ? 0 : 0.5;
471       double otherChance = (thisRegionCount <= otherRegionCount) ? 0 : 0.5;
472 
473       int thisRegion = pickRandomRegion(cluster, thisServer, thisChance);
474       int otherRegion = pickRandomRegion(cluster, otherServer, otherChance);
475 
476       return getAction(thisServer, thisRegion, otherServer, otherRegion);
477     }
478 
479     protected Cluster.Action getAction (int fromServer, int fromRegion,
480         int toServer, int toRegion) {
481       if (fromServer < 0 || toServer < 0) {
482         return Cluster.NullAction;
483       }
484       if (fromRegion > 0 && toRegion > 0) {
485         return new Cluster.SwapRegionsAction(fromServer, fromRegion,
486           toServer, toRegion);
487       } else if (fromRegion > 0) {
488         return new Cluster.MoveRegionAction(fromRegion, fromServer, toServer);
489       } else if (toRegion > 0) {
490         return new Cluster.MoveRegionAction(toRegion, toServer, fromServer);
491       } else {
492         return Cluster.NullAction;
493       }
494     }
495   }
496 
497   static class RandomCandidateGenerator extends CandidateGenerator {
498 
499     @Override
500     Cluster.Action generate(Cluster cluster) {
501 
502       int thisServer = pickRandomServer(cluster);
503 
504       // Pick the other server
505       int otherServer = pickOtherRandomServer(cluster, thisServer);
506 
507       return pickRandomRegions(cluster, thisServer, otherServer);
508     }
509   }
510 
511   static class LoadCandidateGenerator extends CandidateGenerator {
512 
513     @Override
514     Cluster.Action generate(Cluster cluster) {
515       cluster.sortServersByRegionCount();
516       int thisServer = pickMostLoadedServer(cluster, -1);
517       int otherServer = pickLeastLoadedServer(cluster, thisServer);
518 
519       return pickRandomRegions(cluster, thisServer, otherServer);
520     }
521 
522     private int pickLeastLoadedServer(final Cluster cluster, int thisServer) {
523       Integer[] servers = cluster.serverIndicesSortedByRegionCount;
524 
525       int index = 0;
526       while (servers[index] == null || servers[index] == thisServer) {
527         index++;
528         if (index == servers.length) {
529           return -1;
530         }
531       }
532       return servers[index];
533     }
534 
535     private int pickMostLoadedServer(final Cluster cluster, int thisServer) {
536       Integer[] servers = cluster.serverIndicesSortedByRegionCount;
537 
538       int index = servers.length - 1;
539       while (servers[index] == null || servers[index] == thisServer) {
540         index--;
541         if (index < 0) {
542           return -1;
543         }
544       }
545       return servers[index];
546     }
547   }
548 
549   static class LocalityBasedCandidateGenerator extends CandidateGenerator {
550 
551     private MasterServices masterServices;
552 
553     LocalityBasedCandidateGenerator(MasterServices masterServices) {
554       this.masterServices = masterServices;
555     }
556 
557     @Override
558     Cluster.Action generate(Cluster cluster) {
559       if (this.masterServices == null) {
560         return Cluster.NullAction;
561       }
562       // Pick a random region server
563       int thisServer = pickRandomServer(cluster);
564 
565       // Pick a random region on this server
566       int thisRegion = pickRandomRegion(cluster, thisServer, 0.0f);
567 
568       if (thisRegion == -1) {
569         return Cluster.NullAction;
570       }
571 
572       // Pick the server with the highest locality
573       int otherServer = pickHighestLocalityServer(cluster, thisServer, thisRegion);
574 
575       // pick an region on the other server to potentially swap
576       int otherRegion = this.pickRandomRegion(cluster, otherServer, 0.5f);
577 
578       return getAction(thisServer, thisRegion, otherServer, otherRegion);
579     }
580 
581     private int pickHighestLocalityServer(Cluster cluster, int thisServer, int thisRegion) {
582       int[] regionLocations = cluster.regionLocations[thisRegion];
583 
584       if (regionLocations == null || regionLocations.length <= 1) {
585         return pickOtherRandomServer(cluster, thisServer);
586       }
587 
588       for (int loc : regionLocations) {
589         if (loc >= 0 && loc != thisServer) { // find the first suitable server
590           return loc;
591         }
592       }
593 
594       // no location found
595       return pickOtherRandomServer(cluster, thisServer);
596     }
597 
598     void setServices(MasterServices services) {
599       this.masterServices = services;
600     }
601   }
602 
603   /**
604    * Generates candidates which moves the replicas out of the region server for
605    * co-hosted region replicas
606    */
607   static class RegionReplicaCandidateGenerator extends CandidateGenerator {
608 
609     RandomCandidateGenerator randomGenerator = new RandomCandidateGenerator();
610 
611     /**
612      * Randomly select one regionIndex out of all region replicas co-hosted in the same group
613      * (a group is a server, host or rack)
614      * @param primariesOfRegionsPerGroup either Cluster.primariesOfRegionsPerServer,
615      * primariesOfRegionsPerHost or primariesOfRegionsPerRack
616      * @param regionsPerGroup either Cluster.regionsPerServer, regionsPerHost or regionsPerRack
617      * @param regionIndexToPrimaryIndex Cluster.regionsIndexToPrimaryIndex
618      * @return a regionIndex for the selected primary or -1 if there is no co-locating
619      */
620     int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[] regionsPerGroup
621         , int[] regionIndexToPrimaryIndex) {
622       int currentPrimary = -1;
623       int currentPrimaryIndex = -1;
624       int selectedPrimaryIndex = -1;
625       double currentLargestRandom = -1;
626       // primariesOfRegionsPerGroup is a sorted array. Since it contains the primary region
627       // ids for the regions hosted in server, a consecutive repetition means that replicas
628       // are co-hosted
629       for (int j = 0; j <= primariesOfRegionsPerGroup.length; j++) {
630         int primary = j < primariesOfRegionsPerGroup.length
631             ? primariesOfRegionsPerGroup[j] : -1;
632         if (primary != currentPrimary) { // check for whether we see a new primary
633           int numReplicas = j - currentPrimaryIndex;
634           if (numReplicas > 1) { // means consecutive primaries, indicating co-location
635             // decide to select this primary region id or not
636             double currentRandom = RANDOM.nextDouble();
637             // we don't know how many region replicas are co-hosted, we will randomly select one
638             // using reservoir sampling (http://gregable.com/2007/10/reservoir-sampling.html)
639             if (currentRandom > currentLargestRandom) {
640               selectedPrimaryIndex = currentPrimary;
641               currentLargestRandom = currentRandom;
642             }
643           }
644           currentPrimary = primary;
645           currentPrimaryIndex = j;
646         }
647       }
648 
649       // we have found the primary id for the region to move. Now find the actual regionIndex
650       // with the given primary, prefer to move the secondary region.
651       for (int j = 0; j < regionsPerGroup.length; j++) {
652         int regionIndex = regionsPerGroup[j];
653         if (selectedPrimaryIndex == regionIndexToPrimaryIndex[regionIndex]) {
654           // always move the secondary, not the primary
655           if (selectedPrimaryIndex != regionIndex) {
656             return regionIndex;
657           }
658         }
659       }
660       return -1;
661     }
662 
663     @Override
664     Cluster.Action generate(Cluster cluster) {
665       int serverIndex = pickRandomServer(cluster);
666       if (cluster.numServers <= 1 || serverIndex == -1) {
667         return Cluster.NullAction;
668       }
669 
670       int regionIndex = selectCoHostedRegionPerGroup(
671         cluster.primariesOfRegionsPerServer[serverIndex],
672         cluster.regionsPerServer[serverIndex],
673         cluster.regionIndexToPrimaryIndex);
674 
675       // if there are no pairs of region replicas co-hosted, default to random generator
676       if (regionIndex == -1) {
677         // default to randompicker
678         return randomGenerator.generate(cluster);
679       }
680 
681       int toServerIndex = pickOtherRandomServer(cluster, serverIndex);
682       int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f);
683       return getAction (serverIndex, regionIndex, toServerIndex, toRegionIndex);
684     }
685   }
686 
687   /**
688    * Generates candidates which moves the replicas out of the rack for
689    * co-hosted region replicas in the same rack
690    */
691   static class RegionReplicaRackCandidateGenerator extends RegionReplicaCandidateGenerator {
692     @Override
693     Cluster.Action generate(Cluster cluster) {
694       int rackIndex = pickRandomRack(cluster);
695       if (cluster.numRacks <= 1 || rackIndex == -1) {
696         return super.generate(cluster);
697       }
698 
699       int regionIndex = selectCoHostedRegionPerGroup(
700         cluster.primariesOfRegionsPerRack[rackIndex],
701         cluster.regionsPerRack[rackIndex],
702         cluster.regionIndexToPrimaryIndex);
703 
704       // if there are no pairs of region replicas co-hosted, default to random generator
705       if (regionIndex == -1) {
706         // default to randompicker
707         return randomGenerator.generate(cluster);
708       }
709 
710       int serverIndex = cluster.regionIndexToServerIndex[regionIndex];
711       int toRackIndex = pickOtherRandomRack(cluster, rackIndex);
712 
713       int rand = RANDOM.nextInt(cluster.serversPerRack[toRackIndex].length);
714       int toServerIndex = cluster.serversPerRack[toRackIndex][rand];
715       int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f);
716       return getAction (serverIndex, regionIndex, toServerIndex, toRegionIndex);
717     }
718   }
719 
720   /**
721    * Base class of StochasticLoadBalancer's Cost Functions.
722    */
723   abstract static class CostFunction {
724 
725     private float multiplier = 0;
726 
727     protected Cluster cluster;
728 
729     CostFunction(Configuration c) {
730 
731     }
732 
733     float getMultiplier() {
734       return multiplier;
735     }
736 
737     void setMultiplier(float m) {
738       this.multiplier = m;
739     }
740 
741     /** Called once per LB invocation to give the cost function
742      * to initialize it's state, and perform any costly calculation.
743      */
744     void init(Cluster cluster) {
745       this.cluster = cluster;
746     }
747 
748     /** Called once per cluster Action to give the cost function
749      * an opportunity to update it's state. postAction() is always
750      * called at least once before cost() is called with the cluster
751      * that this action is performed on. */
752     void postAction(Action action) {
753       switch (action.type) {
754       case NULL: break;
755       case ASSIGN_REGION:
756         AssignRegionAction ar = (AssignRegionAction) action;
757         regionMoved(ar.region, -1, ar.server);
758         break;
759       case MOVE_REGION:
760         MoveRegionAction mra = (MoveRegionAction) action;
761         regionMoved(mra.region, mra.fromServer, mra.toServer);
762         break;
763       case SWAP_REGIONS:
764         SwapRegionsAction a = (SwapRegionsAction) action;
765         regionMoved(a.fromRegion, a.fromServer, a.toServer);
766         regionMoved(a.toRegion, a.toServer, a.fromServer);
767         break;
768       default:
769         throw new RuntimeException("Uknown action:" + action.type);
770       }
771     }
772 
773     protected void regionMoved(int region, int oldServer, int newServer) {
774     }
775 
776     abstract double cost();
777 
778     /**
779      * Function to compute a scaled cost using {@link DescriptiveStatistics}. It
780      * assumes that this is a zero sum set of costs.  It assumes that the worst case
781      * possible is all of the elements in one region server and the rest having 0.
782      *
783      * @param stats the costs
784      * @return a scaled set of costs.
785      */
786     protected double costFromArray(double[] stats) {
787       double totalCost = 0;
788       double total = getSum(stats);
789       double mean = total/((double)stats.length);
790       double count = stats.length;
791 
792       // Compute max as if all region servers had 0 and one had the sum of all costs.  This must be
793       // a zero sum cost for this to make sense.
794       // TODO: Should we make this sum of square errors?
795       double max = ((count - 1) * mean) + (total - mean);
796       for (double n : stats) {
797         double diff = Math.abs(mean - n);
798         totalCost += diff;
799       }
800 
801       double scaled =  scale(0, max, totalCost);
802       return scaled;
803     }
804 
805     private double getSum(double[] stats) {
806       double total = 0;
807       for(double s:stats) {
808         total += s;
809       }
810       return total;
811     }
812 
813     /**
814      * Scale the value between 0 and 1.
815      *
816      * @param min   Min value
817      * @param max   The Max value
818      * @param value The value to be scaled.
819      * @return The scaled value.
820      */
821     protected double scale(double min, double max, double value) {
822       if (max <= min || value <= min) {
823         return 0;
824       }
825 
826       return Math.max(0d, Math.min(1d, (value - min) / (max - min)));
827     }
828   }
829 
830   /**
831    * Given the starting state of the regions and a potential ending state
832    * compute cost based upon the number of regions that have moved.
833    */
834   static class MoveCostFunction extends CostFunction {
835     private static final String MOVE_COST_KEY = "hbase.master.balancer.stochastic.moveCost";
836     private static final String MAX_MOVES_PERCENT_KEY =
837         "hbase.master.balancer.stochastic.maxMovePercent";
838     private static final float DEFAULT_MOVE_COST = 100;
839     private static final int DEFAULT_MAX_MOVES = 600;
840     private static final float DEFAULT_MAX_MOVE_PERCENT = 0.25f;
841 
842     private final float maxMovesPercent;
843 
844     MoveCostFunction(Configuration conf) {
845       super(conf);
846 
847       // Move cost multiplier should be the same cost or higher than the rest of the costs to ensure
848       // that large benefits are need to overcome the cost of a move.
849       this.setMultiplier(conf.getFloat(MOVE_COST_KEY, DEFAULT_MOVE_COST));
850       // What percent of the number of regions a single run of the balancer can move.
851       maxMovesPercent = conf.getFloat(MAX_MOVES_PERCENT_KEY, DEFAULT_MAX_MOVE_PERCENT);
852     }
853 
854     @Override
855     double cost() {
856       // Try and size the max number of Moves, but always be prepared to move some.
857       int maxMoves = Math.max((int) (cluster.numRegions * maxMovesPercent),
858           DEFAULT_MAX_MOVES);
859 
860       double moveCost = cluster.numMovedRegions;
861 
862       // Don't let this single balance move more than the max moves,
863       // or move a region that should be on master away from the master.
864       // It is ok to move any master hosted region back to the master.
865       // This allows better scaling to accurately represent the actual cost of a move.
866       if (moveCost > maxMoves || cluster.numMovedMasterHostedRegions > 0) {
867         return 1000000;   // return a number much greater than any of the other cost
868       }
869 
870       return scale(0, cluster.numRegions, moveCost);
871     }
872   }
873 
874   /**
875    * Compute the cost of a potential cluster state from skew in number of
876    * regions on a cluster.
877    */
878   static class RegionCountSkewCostFunction extends CostFunction {
879     private static final String REGION_COUNT_SKEW_COST_KEY =
880         "hbase.master.balancer.stochastic.regionCountCost";
881     private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
882 
883     private double activeMasterWeight;
884     private double backupMasterWeight;
885     private double[] stats = null;
886 
887     RegionCountSkewCostFunction(Configuration conf,
888         double activeMasterWeight, double backupMasterWeight) {
889       super(conf);
890       // Load multiplier should be the greatest as it is the most general way to balance data.
891       this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
892       this.activeMasterWeight = activeMasterWeight;
893       this.backupMasterWeight = backupMasterWeight;
894     }
895 
896     @Override
897     double cost() {
898       if (stats == null || stats.length != cluster.numServers) {
899         stats = new double[cluster.numServers];
900       }
901 
902       for (int i =0; i < cluster.numServers; i++) {
903         stats[i] = cluster.regionsPerServer[i].length;
904         // Use some weight on regions assigned to active/backup masters,
905         // so that they won't carry as many regions as normal regionservers.
906         if (cluster.isActiveMaster(i)) {
907           stats[i] += cluster.numUserRegionsOnMaster * (activeMasterWeight - 1);
908         } else if (cluster.isBackupMaster(i)) {
909           stats[i] *= backupMasterWeight;
910         }
911       }
912       return costFromArray(stats);
913     }
914   }
915 
916   /**
917    * Compute the cost of a potential cluster configuration based upon how evenly
918    * distributed tables are.
919    */
920   static class TableSkewCostFunction extends CostFunction {
921 
922     private static final String TABLE_SKEW_COST_KEY =
923         "hbase.master.balancer.stochastic.tableSkewCost";
924     private static final float DEFAULT_TABLE_SKEW_COST = 35;
925 
926     TableSkewCostFunction(Configuration conf) {
927       super(conf);
928       this.setMultiplier(conf.getFloat(TABLE_SKEW_COST_KEY, DEFAULT_TABLE_SKEW_COST));
929     }
930 
931     @Override
932     double cost() {
933       double max = cluster.numRegions;
934       double min = ((double) cluster.numRegions) / cluster.numServers;
935       double value = 0;
936 
937       for (int i = 0; i < cluster.numMaxRegionsPerTable.length; i++) {
938         value += cluster.numMaxRegionsPerTable[i];
939       }
940 
941       return scale(min, max, value);
942     }
943   }
944 
945   /**
946    * Compute a cost of a potential cluster configuration based upon where
947    * {@link org.apache.hadoop.hbase.regionserver.StoreFile}s are located.
948    */
949   static class LocalityCostFunction extends CostFunction {
950 
951     private static final String LOCALITY_COST_KEY = "hbase.master.balancer.stochastic.localityCost";
952     private static final float DEFAULT_LOCALITY_COST = 25;
953 
954     private MasterServices services;
955 
956     LocalityCostFunction(Configuration conf, MasterServices srv) {
957       super(conf);
958       this.setMultiplier(conf.getFloat(LOCALITY_COST_KEY, DEFAULT_LOCALITY_COST));
959       this.services = srv;
960     }
961 
962     void setServices(MasterServices srvc) {
963       this.services = srvc;
964     }
965 
966     @Override
967     double cost() {
968       double max = 0;
969       double cost = 0;
970 
971       // If there's no master so there's no way anything else works.
972       if (this.services == null) {
973         return cost;
974       }
975 
976       for (int i = 0; i < cluster.regionLocations.length; i++) {
977         max += 1;
978         int serverIndex = cluster.regionIndexToServerIndex[i];
979         int[] regionLocations = cluster.regionLocations[i];
980 
981         // If we can't find where the data is getTopBlock returns null.
982         // so count that as being the best possible.
983         if (regionLocations == null) {
984           continue;
985         }
986 
987         int index = -1;
988         for (int j = 0; j < regionLocations.length; j++) {
989           if (regionLocations[j] >= 0 && regionLocations[j] == serverIndex) {
990             index = j;
991             break;
992           }
993         }
994 
995         if (index < 0) {
996           cost += 1;
997         } else {
998           cost += (double) index / (double) regionLocations.length;
999         }
1000       }
1001       return scale(0, max, cost);
1002     }
1003   }
1004 
1005   /**
1006    * Base class the allows writing costs functions from rolling average of some
1007    * number from RegionLoad.
1008    */
1009   abstract static class CostFromRegionLoadFunction extends CostFunction {
1010 
1011     private ClusterStatus clusterStatus = null;
1012     private Map<String, Deque<RegionLoad>> loads = null;
1013     private double[] stats = null;
1014     CostFromRegionLoadFunction(Configuration conf) {
1015       super(conf);
1016     }
1017 
1018     void setClusterStatus(ClusterStatus status) {
1019       this.clusterStatus = status;
1020     }
1021 
1022     void setLoads(Map<String, Deque<RegionLoad>> l) {
1023       this.loads = l;
1024     }
1025 
1026     @Override
1027     double cost() {
1028       if (clusterStatus == null || loads == null) {
1029         return 0;
1030       }
1031 
1032       if (stats == null || stats.length != cluster.numServers) {
1033         stats = new double[cluster.numServers];
1034       }
1035 
1036       for (int i =0; i < stats.length; i++) {
1037         //Cost this server has from RegionLoad
1038         long cost = 0;
1039 
1040         // for every region on this server get the rl
1041         for(int regionIndex:cluster.regionsPerServer[i]) {
1042           Collection<RegionLoad> regionLoadList =  cluster.regionLoads[regionIndex];
1043 
1044           // Now if we found a region load get the type of cost that was requested.
1045           if (regionLoadList != null) {
1046             cost += getRegionLoadCost(regionLoadList);
1047           }
1048         }
1049 
1050         // Add the total cost to the stats.
1051         stats[i] = cost;
1052       }
1053 
1054       // Now return the scaled cost from data held in the stats object.
1055       return costFromArray(stats);
1056     }
1057 
1058     protected double getRegionLoadCost(Collection<RegionLoad> regionLoadList) {
1059       double cost = 0;
1060 
1061       for (RegionLoad rl : regionLoadList) {
1062         double toAdd = getCostFromRl(rl);
1063 
1064         if (cost == 0) {
1065           cost = toAdd;
1066         } else {
1067           cost = (.5 * cost) + (.5 * toAdd);
1068         }
1069       }
1070 
1071       return cost;
1072     }
1073 
1074     protected abstract double getCostFromRl(RegionLoad rl);
1075   }
1076 
1077   /**
1078    * Compute the cost of total number of read requests  The more unbalanced the higher the
1079    * computed cost will be.  This uses a rolling average of regionload.
1080    */
1081 
1082   static class ReadRequestCostFunction extends CostFromRegionLoadFunction {
1083 
1084     private static final String READ_REQUEST_COST_KEY =
1085         "hbase.master.balancer.stochastic.readRequestCost";
1086     private static final float DEFAULT_READ_REQUEST_COST = 5;
1087 
1088     ReadRequestCostFunction(Configuration conf) {
1089       super(conf);
1090       this.setMultiplier(conf.getFloat(READ_REQUEST_COST_KEY, DEFAULT_READ_REQUEST_COST));
1091     }
1092 
1093 
1094     @Override
1095     protected double getCostFromRl(RegionLoad rl) {
1096       return rl.getReadRequestsCount();
1097     }
1098   }
1099 
1100   /**
1101    * Compute the cost of total number of write requests.  The more unbalanced the higher the
1102    * computed cost will be.  This uses a rolling average of regionload.
1103    */
1104   static class WriteRequestCostFunction extends CostFromRegionLoadFunction {
1105 
1106     private static final String WRITE_REQUEST_COST_KEY =
1107         "hbase.master.balancer.stochastic.writeRequestCost";
1108     private static final float DEFAULT_WRITE_REQUEST_COST = 5;
1109 
1110     WriteRequestCostFunction(Configuration conf) {
1111       super(conf);
1112       this.setMultiplier(conf.getFloat(WRITE_REQUEST_COST_KEY, DEFAULT_WRITE_REQUEST_COST));
1113     }
1114 
1115     @Override
1116     protected double getCostFromRl(RegionLoad rl) {
1117       return rl.getWriteRequestsCount();
1118     }
1119   }
1120 
1121   /**
1122    * A cost function for region replicas. We give a very high cost to hosting
1123    * replicas of the same region in the same host. We do not prevent the case
1124    * though, since if numReplicas > numRegionServers, we still want to keep the
1125    * replica open.
1126    */
1127   static class RegionReplicaHostCostFunction extends CostFunction {
1128     private static final String REGION_REPLICA_HOST_COST_KEY =
1129         "hbase.master.balancer.stochastic.regionReplicaHostCostKey";
1130     private static final float DEFAULT_REGION_REPLICA_HOST_COST_KEY = 100000;
1131 
1132     long maxCost = 0;
1133     long[] costsPerGroup; // group is either server, host or rack
1134     int[][] primariesOfRegionsPerGroup;
1135 
1136     public RegionReplicaHostCostFunction(Configuration conf) {
1137       super(conf);
1138       this.setMultiplier(conf.getFloat(REGION_REPLICA_HOST_COST_KEY,
1139         DEFAULT_REGION_REPLICA_HOST_COST_KEY));
1140     }
1141 
1142     @Override
1143     void init(Cluster cluster) {
1144       super.init(cluster);
1145       // max cost is the case where every region replica is hosted together regardless of host
1146       maxCost = cluster.numHosts > 1 ? getMaxCost(cluster) : 0;
1147       costsPerGroup = new long[cluster.numHosts];
1148       primariesOfRegionsPerGroup = cluster.multiServersPerHost // either server based or host based
1149           ? cluster.primariesOfRegionsPerHost
1150           : cluster.primariesOfRegionsPerServer;
1151       for (int i = 0 ; i < primariesOfRegionsPerGroup.length; i++) {
1152         costsPerGroup[i] = costPerGroup(primariesOfRegionsPerGroup[i]);
1153       }
1154     }
1155 
1156     long getMaxCost(Cluster cluster) {
1157       if (!cluster.hasRegionReplicas) {
1158         return 0; // short circuit
1159       }
1160       // max cost is the case where every region replica is hosted together regardless of host
1161       int[] primariesOfRegions = new int[cluster.numRegions];
1162       for (int i = 0; i < cluster.regions.length; i++) {
1163         // assume all regions are hosted by only one server
1164         int primaryIndex = cluster.regionIndexToPrimaryIndex[i];
1165         primariesOfRegions[i] = primaryIndex;
1166       }
1167 
1168       Arrays.sort(primariesOfRegions);
1169 
1170       // compute numReplicas from the sorted array
1171       return costPerGroup(primariesOfRegions);
1172     }
1173 
1174     @Override
1175     double cost() {
1176       if (maxCost <= 0) {
1177         return 0;
1178       }
1179 
1180       long totalCost = 0;
1181       for (int i = 0 ; i < costsPerGroup.length; i++) {
1182         totalCost += costsPerGroup[i];
1183       }
1184       return scale(0, maxCost, totalCost);
1185     }
1186 
1187     /**
1188      * For each primary region, it computes the total number of replicas in the array (numReplicas)
1189      * and returns a sum of numReplicas-1 squared. For example, if the server hosts
1190      * regions a, b, c, d, e, f where a and b are same replicas, and c,d,e are same replicas, it
1191      * returns (2-1) * (2-1) + (3-1) * (3-1) + (1-1) * (1-1).
1192      * @param primariesOfRegions a sorted array of primary regions ids for the regions hosted
1193      * @return a sum of numReplicas-1 squared for each primary region in the group.
1194      */
1195     protected long costPerGroup(int[] primariesOfRegions) {
1196       long cost = 0;
1197       int currentPrimary = -1;
1198       int currentPrimaryIndex = -1;
1199       // primariesOfRegions is a sorted array of primary ids of regions. Replicas of regions
1200       // sharing the same primary will have consecutive numbers in the array.
1201       for (int j = 0 ; j <= primariesOfRegions.length; j++) {
1202         int primary = j < primariesOfRegions.length ? primariesOfRegions[j] : -1;
1203         if (primary != currentPrimary) { // we see a new primary
1204           int numReplicas = j - currentPrimaryIndex;
1205           // square the cost
1206           if (numReplicas > 1) { // means consecutive primaries, indicating co-location
1207             cost += (numReplicas - 1) * (numReplicas - 1);
1208           }
1209           currentPrimary = primary;
1210           currentPrimaryIndex = j;
1211         }
1212       }
1213 
1214       return cost;
1215     }
1216 
1217     @Override
1218     protected void regionMoved(int region, int oldServer, int newServer) {
1219       if (maxCost <= 0) {
1220         return; // no need to compute
1221       }
1222       if (cluster.multiServersPerHost) {
1223         int oldHost = cluster.serverIndexToHostIndex[oldServer];
1224         int newHost = cluster.serverIndexToHostIndex[newServer];
1225         if (newHost != oldHost) {
1226           costsPerGroup[oldHost] = costPerGroup(cluster.primariesOfRegionsPerHost[oldHost]);
1227           costsPerGroup[newHost] = costPerGroup(cluster.primariesOfRegionsPerHost[newHost]);
1228         }
1229       } else {
1230         costsPerGroup[oldServer] = costPerGroup(cluster.primariesOfRegionsPerServer[oldServer]);
1231         costsPerGroup[newServer] = costPerGroup(cluster.primariesOfRegionsPerServer[newServer]);
1232       }
1233     }
1234   }
1235 
1236   /**
1237    * A cost function for region replicas for the rack distribution. We give a relatively high
1238    * cost to hosting replicas of the same region in the same rack. We do not prevent the case
1239    * though.
1240    */
1241   static class RegionReplicaRackCostFunction extends RegionReplicaHostCostFunction {
1242     private static final String REGION_REPLICA_RACK_COST_KEY =
1243         "hbase.master.balancer.stochastic.regionReplicaRackCostKey";
1244     private static final float DEFAULT_REGION_REPLICA_RACK_COST_KEY = 10000;
1245 
1246     public RegionReplicaRackCostFunction(Configuration conf) {
1247       super(conf);
1248       this.setMultiplier(conf.getFloat(REGION_REPLICA_RACK_COST_KEY, DEFAULT_REGION_REPLICA_RACK_COST_KEY));
1249     }
1250 
1251     @Override
1252     void init(Cluster cluster) {
1253       this.cluster = cluster;
1254       if (cluster.numRacks <= 1) {
1255         maxCost = 0;
1256         return; // disabled for 1 rack
1257       }
1258       // max cost is the case where every region replica is hosted together regardless of rack
1259       maxCost = getMaxCost(cluster);
1260       costsPerGroup = new long[cluster.numRacks];
1261       for (int i = 0 ; i < cluster.primariesOfRegionsPerRack.length; i++) {
1262         costsPerGroup[i] = costPerGroup(cluster.primariesOfRegionsPerRack[i]);
1263       }
1264     }
1265 
1266     @Override
1267     protected void regionMoved(int region, int oldServer, int newServer) {
1268       if (maxCost <= 0) {
1269         return; // no need to compute
1270       }
1271       int oldRack = cluster.serverIndexToRackIndex[oldServer];
1272       int newRack = cluster.serverIndexToRackIndex[newServer];
1273       if (newRack != oldRack) {
1274         costsPerGroup[oldRack] = costPerGroup(cluster.primariesOfRegionsPerRack[oldRack]);
1275         costsPerGroup[newRack] = costPerGroup(cluster.primariesOfRegionsPerRack[newRack]);
1276       }
1277     }
1278   }
1279 
1280   /**
1281    * Compute the cost of total memstore size.  The more unbalanced the higher the
1282    * computed cost will be.  This uses a rolling average of regionload.
1283    */
1284   static class MemstoreSizeCostFunction extends CostFromRegionLoadFunction {
1285 
1286     private static final String MEMSTORE_SIZE_COST_KEY =
1287         "hbase.master.balancer.stochastic.memstoreSizeCost";
1288     private static final float DEFAULT_MEMSTORE_SIZE_COST = 5;
1289 
1290     MemstoreSizeCostFunction(Configuration conf) {
1291       super(conf);
1292       this.setMultiplier(conf.getFloat(MEMSTORE_SIZE_COST_KEY, DEFAULT_MEMSTORE_SIZE_COST));
1293     }
1294 
1295     @Override
1296     protected double getCostFromRl(RegionLoad rl) {
1297       return rl.getMemStoreSizeMB();
1298     }
1299   }
1300   /**
1301    * Compute the cost of total open storefiles size.  The more unbalanced the higher the
1302    * computed cost will be.  This uses a rolling average of regionload.
1303    */
1304   static class StoreFileCostFunction extends CostFromRegionLoadFunction {
1305 
1306     private static final String STOREFILE_SIZE_COST_KEY =
1307         "hbase.master.balancer.stochastic.storefileSizeCost";
1308     private static final float DEFAULT_STOREFILE_SIZE_COST = 5;
1309 
1310     StoreFileCostFunction(Configuration conf) {
1311       super(conf);
1312       this.setMultiplier(conf.getFloat(STOREFILE_SIZE_COST_KEY, DEFAULT_STOREFILE_SIZE_COST));
1313     }
1314 
1315     @Override
1316     protected double getCostFromRl(RegionLoad rl) {
1317       return rl.getStorefileSizeMB();
1318     }
1319   }
1320 }