View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayDeque;
21  import java.util.Arrays;
22  import java.util.Collection;
23  import java.util.Deque;
24  import java.util.HashMap;
25  import java.util.LinkedList;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.Map.Entry;
29  import java.util.Random;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.hbase.ClusterStatus;
36  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
37  import org.apache.hadoop.hbase.HRegionInfo;
38  import org.apache.hadoop.hbase.RegionLoad;
39  import org.apache.hadoop.hbase.ServerLoad;
40  import org.apache.hadoop.hbase.ServerName;
41  import org.apache.hadoop.hbase.master.MasterServices;
42  import org.apache.hadoop.hbase.master.RegionPlan;
43  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action;
44  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
45  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.AssignRegionAction;
46  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.MoveRegionAction;
47  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.SwapRegionsAction;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
50  
51  /**
52   * <p>This is a best effort load balancer. Given a Cost function F(C) => x It will
53   * randomly try and mutate the cluster to Cprime. If F(Cprime) < F(C) then the
54   * new cluster state becomes the plan. It includes costs functions to compute the cost of:</p>
55   * <ul>
56   * <li>Region Load</li>
57   * <li>Table Load</li>
58   * <li>Data Locality</li>
59   * <li>Memstore Sizes</li>
60   * <li>Storefile Sizes</li>
61   * </ul>
62   *
63   *
64   * <p>Every cost function returns a number between 0 and 1 inclusive; where 0 is the lowest cost
65   * best solution, and 1 is the highest possible cost and the worst solution.  The computed costs are
66   * scaled by their respective multipliers:</p>
67   *
68   * <ul>
69   *   <li>hbase.master.balancer.stochastic.regionLoadCost</li>
70   *   <li>hbase.master.balancer.stochastic.moveCost</li>
71   *   <li>hbase.master.balancer.stochastic.tableLoadCost</li>
72   *   <li>hbase.master.balancer.stochastic.localityCost</li>
73   *   <li>hbase.master.balancer.stochastic.memstoreSizeCost</li>
74   *   <li>hbase.master.balancer.stochastic.storefileSizeCost</li>
75   * </ul>
76   *
77   * <p>In addition to the above configurations, the balancer can be tuned by the following
78   * configuration values:</p>
79   * <ul>
80   *   <li>hbase.master.balancer.stochastic.maxMoveRegions which
81   *   controls what the max number of regions that can be moved in a single invocation of this
82   *   balancer.</li>
83   *   <li>hbase.master.balancer.stochastic.stepsPerRegion is the coefficient by which the number of
84   *   regions is multiplied to try and get the number of times the balancer will
85   *   mutate all servers.</li>
86   *   <li>hbase.master.balancer.stochastic.maxSteps which controls the maximum number of times that
87   *   the balancer will try and mutate all the servers. The balancer will use the minimum of this
88   *   value and the above computation.</li>
89   * </ul>
90   *
91   * <p>This balancer is best used with hbase.master.loadbalance.bytable set to false
92   * so that the balancer gets the full picture of all loads on the cluster.</p>
93   */
94  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
95  public class StochasticLoadBalancer extends BaseLoadBalancer {
96  
97    protected static final String STEPS_PER_REGION_KEY =
98        "hbase.master.balancer.stochastic.stepsPerRegion";
99    protected static final String MAX_STEPS_KEY =
100       "hbase.master.balancer.stochastic.maxSteps";
101   protected static final String MAX_RUNNING_TIME_KEY =
102       "hbase.master.balancer.stochastic.maxRunningTime";
103   protected static final String KEEP_REGION_LOADS =
104       "hbase.master.balancer.stochastic.numRegionLoadsToRemember";
105 
106   private static final Random RANDOM = new Random(System.currentTimeMillis());
107   private static final Log LOG = LogFactory.getLog(StochasticLoadBalancer.class);
108 
109   Map<String, Deque<RegionLoad>> loads = new HashMap<String, Deque<RegionLoad>>();
110 
111   // values are defaults
112   private int maxSteps = 1000000;
113   private int stepsPerRegion = 800;
114   private long maxRunningTime = 30 * 1000 * 1; // 30 seconds.
115   private int numRegionLoadsToRemember = 15;
116 
117   private CandidateGenerator[] candidateGenerators;
118   private CostFromRegionLoadFunction[] regionLoadFunctions;
119   private CostFunction[] costFunctions;
120   // Keep locality based picker and cost function to alert them
121   // when new services are offered
122   private LocalityBasedCandidateGenerator localityCandidateGenerator;
123   private LocalityCostFunction localityCost;
124   private RegionReplicaHostCostFunction regionReplicaHostCostFunction;
125   private RegionReplicaRackCostFunction regionReplicaRackCostFunction;
126 
127   @Override
128   public void onConfigurationChange(Configuration conf) {
129     setConf(conf);
130   }
131 
132   @Override
133   public synchronized void setConf(Configuration conf) {
134     super.setConf(conf);
135     LOG.info("loading config");
136 
137     maxSteps = conf.getInt(MAX_STEPS_KEY, maxSteps);
138 
139     stepsPerRegion = conf.getInt(STEPS_PER_REGION_KEY, stepsPerRegion);
140     maxRunningTime = conf.getLong(MAX_RUNNING_TIME_KEY, maxRunningTime);
141 
142     numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember);
143 
144     if (localityCandidateGenerator == null) {
145       localityCandidateGenerator = new LocalityBasedCandidateGenerator(services);
146     }
147     localityCost = new LocalityCostFunction(conf, services);
148 
149     if (candidateGenerators == null) {
150       candidateGenerators = new CandidateGenerator[] {
151           new RandomCandidateGenerator(),
152           new LoadCandidateGenerator(),
153           localityCandidateGenerator,
154           new RegionReplicaRackCandidateGenerator(),
155       };
156     }
157 
158     regionLoadFunctions = new CostFromRegionLoadFunction[] {
159       new ReadRequestCostFunction(conf),
160       new WriteRequestCostFunction(conf),
161       new MemstoreSizeCostFunction(conf),
162       new StoreFileCostFunction(conf)
163     };
164 
165     regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
166     regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
167 
168     costFunctions = new CostFunction[]{
169       new RegionCountSkewCostFunction(conf),
170       new MoveCostFunction(conf),
171       localityCost,
172       new TableSkewCostFunction(conf),
173       regionReplicaHostCostFunction,
174       regionReplicaRackCostFunction,
175       regionLoadFunctions[0],
176       regionLoadFunctions[1],
177       regionLoadFunctions[2],
178       regionLoadFunctions[3],
179     };
180   }
181 
182   @Override
183   protected void setSlop(Configuration conf) {
184     this.slop = conf.getFloat("hbase.regions.slop", 0.001F);
185   }
186 
187   @Override
188   public synchronized void setClusterStatus(ClusterStatus st) {
189     super.setClusterStatus(st);
190     updateRegionLoad();
191     for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
192       cost.setClusterStatus(st);
193     }
194   }
195 
196   @Override
197   public synchronized void setMasterServices(MasterServices masterServices) {
198     super.setMasterServices(masterServices);
199     this.localityCost.setServices(masterServices);
200     this.localityCandidateGenerator.setServices(masterServices);
201 
202   }
203 
204   @Override
205   protected synchronized boolean areSomeRegionReplicasColocated(Cluster c) {
206     regionReplicaHostCostFunction.init(c);
207     if (regionReplicaHostCostFunction.cost() > 0) return true;
208     regionReplicaRackCostFunction.init(c);
209     if (regionReplicaRackCostFunction.cost() > 0) return true;
210     return false;
211   }
212 
213   /**
214    * Given the cluster state this will try and approach an optimal balance. This
215    * should always approach the optimal state given enough steps.
216    */
217   @Override
218   public synchronized List<RegionPlan> balanceCluster(Map<ServerName,
219     List<HRegionInfo>> clusterState) {
220     List<RegionPlan> plans = balanceMasterRegions(clusterState);
221     if (plans != null || clusterState == null || clusterState.size() <= 1) {
222       return plans;
223     }
224     if (masterServerName != null && clusterState.containsKey(masterServerName)) {
225       if (clusterState.size() <= 2) {
226         return null;
227       }
228       clusterState = new HashMap<ServerName, List<HRegionInfo>>(clusterState);
229       clusterState.remove(masterServerName);
230     }
231 
232     //The clusterState that is given to this method contains the state
233     //of all the regions in the table(s) (that's true today)
234     // Keep track of servers to iterate through them.
235     Cluster cluster = new Cluster(clusterState, loads, regionFinder, rackManager);
236     if (!needsBalance(cluster)) {
237       return null;
238     }
239 
240     long startTime = EnvironmentEdgeManager.currentTime();
241 
242     initCosts(cluster);
243 
244     double currentCost = computeCost(cluster, Double.MAX_VALUE);
245 
246     double initCost = currentCost;
247     double newCost = currentCost;
248 
249     long computedMaxSteps = Math.min(this.maxSteps,
250         ((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers));
251     // Perform a stochastic walk to see if we can get a good fit.
252     long step;
253 
254     for (step = 0; step < computedMaxSteps; step++) {
255       int generatorIdx = RANDOM.nextInt(candidateGenerators.length);
256       CandidateGenerator p = candidateGenerators[generatorIdx];
257       Cluster.Action action = p.generate(cluster);
258 
259       if (action.type == Type.NULL) {
260         continue;
261       }
262 
263       cluster.doAction(action);
264       updateCostsWithAction(cluster, action);
265 
266       newCost = computeCost(cluster, currentCost);
267 
268       // Should this be kept?
269       if (newCost < currentCost) {
270         currentCost = newCost;
271       } else {
272         // Put things back the way they were before.
273         // TODO: undo by remembering old values
274         Action undoAction = action.undoAction();
275         cluster.doAction(undoAction);
276         updateCostsWithAction(cluster, undoAction);
277       }
278 
279       if (EnvironmentEdgeManager.currentTime() - startTime >
280           maxRunningTime) {
281         break;
282       }
283     }
284 
285     long endTime = EnvironmentEdgeManager.currentTime();
286 
287     metricsBalancer.balanceCluster(endTime - startTime);
288 
289     if (initCost > currentCost) {
290       plans = createRegionPlans(cluster);
291       if (LOG.isDebugEnabled()) {
292         LOG.debug("Finished computing new load balance plan.  Computation took "
293             + (endTime - startTime) + "ms to try " + step
294             + " different iterations.  Found a solution that moves "
295             + plans.size() + " regions; Going from a computed cost of "
296             + initCost + " to a new cost of " + currentCost);
297       }
298       return plans;
299     }
300     if (LOG.isDebugEnabled()) {
301       LOG.debug("Could not find a better load balance plan.  Tried "
302           + step + " different configurations in " + (endTime - startTime)
303           + "ms, and did not find anything with a computed cost less than " + initCost);
304     }
305     return null;
306   }
307 
308   /**
309    * Create all of the RegionPlan's needed to move from the initial cluster state to the desired
310    * state.
311    *
312    * @param cluster The state of the cluster
313    * @return List of RegionPlan's that represent the moves needed to get to desired final state.
314    */
315   private List<RegionPlan> createRegionPlans(Cluster cluster) {
316     List<RegionPlan> plans = new LinkedList<RegionPlan>();
317     for (int regionIndex = 0;
318          regionIndex < cluster.regionIndexToServerIndex.length; regionIndex++) {
319       int initialServerIndex = cluster.initialRegionIndexToServerIndex[regionIndex];
320       int newServerIndex = cluster.regionIndexToServerIndex[regionIndex];
321 
322       if (initialServerIndex != newServerIndex) {
323         HRegionInfo region = cluster.regions[regionIndex];
324         ServerName initialServer = cluster.servers[initialServerIndex];
325         ServerName newServer = cluster.servers[newServerIndex];
326 
327         if (LOG.isTraceEnabled()) {
328           LOG.trace("Moving Region " + region.getEncodedName() + " from server "
329               + initialServer.getHostname() + " to " + newServer.getHostname());
330         }
331         RegionPlan rp = new RegionPlan(region, initialServer, newServer);
332         plans.add(rp);
333       }
334     }
335     return plans;
336   }
337 
338   /**
339    * Store the current region loads.
340    */
341   private synchronized void updateRegionLoad() {
342     // We create a new hashmap so that regions that are no longer there are removed.
343     // However we temporarily need the old loads so we can use them to keep the rolling average.
344     Map<String, Deque<RegionLoad>> oldLoads = loads;
345     loads = new HashMap<String, Deque<RegionLoad>>();
346 
347     for (ServerName sn : clusterStatus.getServers()) {
348       ServerLoad sl = clusterStatus.getLoad(sn);
349       if (sl == null) {
350         continue;
351       }
352       for (Entry<byte[], RegionLoad> entry : sl.getRegionsLoad().entrySet()) {
353         Deque<RegionLoad> rLoads = oldLoads.get(Bytes.toString(entry.getKey()));
354         if (rLoads == null) {
355           // There was nothing there
356           rLoads = new ArrayDeque<RegionLoad>();
357         } else if (rLoads.size() >= numRegionLoadsToRemember) {
358           rLoads.remove();
359         }
360         rLoads.add(entry.getValue());
361         loads.put(Bytes.toString(entry.getKey()), rLoads);
362 
363       }
364     }
365 
366     for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
367       cost.setLoads(loads);
368     }
369   }
370 
371   protected void initCosts(Cluster cluster) {
372     for (CostFunction c:costFunctions) {
373       c.init(cluster);
374     }
375   }
376 
377   protected void updateCostsWithAction(Cluster cluster, Action action) {
378     for (CostFunction c : costFunctions) {
379       c.postAction(action);
380     }
381   }
382 
383   /**
384    * This is the main cost function.  It will compute a cost associated with a proposed cluster
385    * state.  All different costs will be combined with their multipliers to produce a double cost.
386    *
387    * @param cluster The state of the cluster
388    * @param previousCost the previous cost. This is used as an early out.
389    * @return a double of a cost associated with the proposed cluster state.  This cost is an
390    *         aggregate of all individual cost functions.
391    */
392   protected double computeCost(Cluster cluster, double previousCost) {
393     double total = 0;
394 
395     for (CostFunction c:costFunctions) {
396       if (c.getMultiplier() <= 0) {
397         continue;
398       }
399 
400       total += c.getMultiplier() * c.cost();
401 
402       if (total > previousCost) {
403         return total;
404       }
405     }
406     return total;
407   }
408 
409   /** Generates a candidate action to be applied to the cluster for cost function search */
410   abstract static class CandidateGenerator {
411     abstract Cluster.Action generate(Cluster cluster);
412 
413     /**
414      * From a list of regions pick a random one. Null can be returned which
415      * {@link StochasticLoadBalancer#balanceCluster(Map)} recognize as signal to try a region move
416      * rather than swap.
417      *
418      * @param cluster        The state of the cluster
419      * @param server         index of the server
420      * @param chanceOfNoSwap Chance that this will decide to try a move rather
421      *                       than a swap.
422      * @return a random {@link HRegionInfo} or null if an asymmetrical move is
423      *         suggested.
424      */
425     protected int pickRandomRegion(Cluster cluster, int server, double chanceOfNoSwap) {
426       // Check to see if this is just a move.
427       if (cluster.regionsPerServer[server].length == 0 || RANDOM.nextFloat() < chanceOfNoSwap) {
428         // signal a move only.
429         return -1;
430       }
431       int rand = RANDOM.nextInt(cluster.regionsPerServer[server].length);
432       return cluster.regionsPerServer[server][rand];
433 
434     }
435     protected int pickRandomServer(Cluster cluster) {
436       if (cluster.numServers < 1) {
437         return -1;
438       }
439 
440       return RANDOM.nextInt(cluster.numServers);
441     }
442 
443     protected int pickRandomRack(Cluster cluster) {
444       if (cluster.numRacks < 1) {
445         return -1;
446       }
447 
448       return RANDOM.nextInt(cluster.numRacks);
449     }
450 
451     protected int pickOtherRandomServer(Cluster cluster, int serverIndex) {
452       if (cluster.numServers < 2) {
453         return -1;
454       }
455       while (true) {
456         int otherServerIndex = pickRandomServer(cluster);
457         if (otherServerIndex != serverIndex) {
458           return otherServerIndex;
459         }
460       }
461     }
462 
463     protected int pickOtherRandomRack(Cluster cluster, int rackIndex) {
464       if (cluster.numRacks < 2) {
465         return -1;
466       }
467       while (true) {
468         int otherRackIndex = pickRandomRack(cluster);
469         if (otherRackIndex != rackIndex) {
470           return otherRackIndex;
471         }
472       }
473     }
474 
475     protected Cluster.Action pickRandomRegions(Cluster cluster,
476                                                        int thisServer,
477                                                        int otherServer) {
478       if (thisServer < 0 || otherServer < 0) {
479         return Cluster.NullAction;
480       }
481 
482       // Decide who is most likely to need another region
483       int thisRegionCount = cluster.getNumRegions(thisServer);
484       int otherRegionCount = cluster.getNumRegions(otherServer);
485 
486       // Assign the chance based upon the above
487       double thisChance = (thisRegionCount > otherRegionCount) ? 0 : 0.5;
488       double otherChance = (thisRegionCount <= otherRegionCount) ? 0 : 0.5;
489 
490       int thisRegion = pickRandomRegion(cluster, thisServer, thisChance);
491       int otherRegion = pickRandomRegion(cluster, otherServer, otherChance);
492 
493       return getAction(thisServer, thisRegion, otherServer, otherRegion);
494     }
495 
496     protected Cluster.Action getAction (int fromServer, int fromRegion,
497         int toServer, int toRegion) {
498       if (fromServer < 0 || toServer < 0) {
499         return Cluster.NullAction;
500       }
501       if (fromRegion > 0 && toRegion > 0) {
502         return new Cluster.SwapRegionsAction(fromServer, fromRegion,
503           toServer, toRegion);
504       } else if (fromRegion > 0) {
505         return new Cluster.MoveRegionAction(fromRegion, fromServer, toServer);
506       } else if (toRegion > 0) {
507         return new Cluster.MoveRegionAction(toRegion, toServer, fromServer);
508       } else {
509         return Cluster.NullAction;
510       }
511     }
512   }
513 
514   static class RandomCandidateGenerator extends CandidateGenerator {
515 
516     @Override
517     Cluster.Action generate(Cluster cluster) {
518 
519       int thisServer = pickRandomServer(cluster);
520 
521       // Pick the other server
522       int otherServer = pickOtherRandomServer(cluster, thisServer);
523 
524       return pickRandomRegions(cluster, thisServer, otherServer);
525     }
526   }
527 
528   static class LoadCandidateGenerator extends CandidateGenerator {
529 
530     @Override
531     Cluster.Action generate(Cluster cluster) {
532       cluster.sortServersByRegionCount();
533       int thisServer = pickMostLoadedServer(cluster, -1);
534       int otherServer = pickLeastLoadedServer(cluster, thisServer);
535 
536       return pickRandomRegions(cluster, thisServer, otherServer);
537     }
538 
539     private int pickLeastLoadedServer(final Cluster cluster, int thisServer) {
540       Integer[] servers = cluster.serverIndicesSortedByRegionCount;
541 
542       int index = 0;
543       while (servers[index] == null || servers[index] == thisServer) {
544         index++;
545         if (index == servers.length) {
546           return -1;
547         }
548       }
549       return servers[index];
550     }
551 
552     private int pickMostLoadedServer(final Cluster cluster, int thisServer) {
553       Integer[] servers = cluster.serverIndicesSortedByRegionCount;
554 
555       int index = servers.length - 1;
556       while (servers[index] == null || servers[index] == thisServer) {
557         index--;
558         if (index < 0) {
559           return -1;
560         }
561       }
562       return servers[index];
563     }
564   }
565 
566   static class LocalityBasedCandidateGenerator extends CandidateGenerator {
567 
568     private MasterServices masterServices;
569 
570     LocalityBasedCandidateGenerator(MasterServices masterServices) {
571       this.masterServices = masterServices;
572     }
573 
574     @Override
575     Cluster.Action generate(Cluster cluster) {
576       if (this.masterServices == null) {
577         return Cluster.NullAction;
578       }
579       // Pick a random region server
580       int thisServer = pickRandomServer(cluster);
581 
582       // Pick a random region on this server
583       int thisRegion = pickRandomRegion(cluster, thisServer, 0.0f);
584 
585       if (thisRegion == -1) {
586         return Cluster.NullAction;
587       }
588 
589       // Pick the server with the highest locality
590       int otherServer = pickHighestLocalityServer(cluster, thisServer, thisRegion);
591 
592       if (otherServer == -1) {
593         return Cluster.NullAction;
594       }
595 
596       // pick an region on the other server to potentially swap
597       int otherRegion = this.pickRandomRegion(cluster, otherServer, 0.5f);
598 
599       return getAction(thisServer, thisRegion, otherServer, otherRegion);
600     }
601 
602     private int pickHighestLocalityServer(Cluster cluster, int thisServer, int thisRegion) {
603       int[] regionLocations = cluster.regionLocations[thisRegion];
604 
605       if (regionLocations == null || regionLocations.length <= 1) {
606         return pickOtherRandomServer(cluster, thisServer);
607       }
608 
609       for (int loc : regionLocations) {
610         if (loc >= 0 && loc != thisServer) { // find the first suitable server
611           return loc;
612         }
613       }
614 
615       // no location found
616       return pickOtherRandomServer(cluster, thisServer);
617     }
618 
619     void setServices(MasterServices services) {
620       this.masterServices = services;
621     }
622   }
623 
624   /**
625    * Generates candidates which moves the replicas out of the region server for
626    * co-hosted region replicas
627    */
628   static class RegionReplicaCandidateGenerator extends CandidateGenerator {
629 
630     RandomCandidateGenerator randomGenerator = new RandomCandidateGenerator();
631 
632     /**
633      * Randomly select one regionIndex out of all region replicas co-hosted in the same group
634      * (a group is a server, host or rack)
635      * @param primariesOfRegionsPerGroup either Cluster.primariesOfRegionsPerServer,
636      * primariesOfRegionsPerHost or primariesOfRegionsPerRack
637      * @param regionsPerGroup either Cluster.regionsPerServer, regionsPerHost or regionsPerRack
638      * @param regionIndexToPrimaryIndex Cluster.regionsIndexToPrimaryIndex
639      * @return a regionIndex for the selected primary or -1 if there is no co-locating
640      */
641     int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[] regionsPerGroup
642         , int[] regionIndexToPrimaryIndex) {
643       int currentPrimary = -1;
644       int currentPrimaryIndex = -1;
645       int selectedPrimaryIndex = -1;
646       double currentLargestRandom = -1;
647       // primariesOfRegionsPerGroup is a sorted array. Since it contains the primary region
648       // ids for the regions hosted in server, a consecutive repetition means that replicas
649       // are co-hosted
650       for (int j = 0; j <= primariesOfRegionsPerGroup.length; j++) {
651         int primary = j < primariesOfRegionsPerGroup.length
652             ? primariesOfRegionsPerGroup[j] : -1;
653         if (primary != currentPrimary) { // check for whether we see a new primary
654           int numReplicas = j - currentPrimaryIndex;
655           if (numReplicas > 1) { // means consecutive primaries, indicating co-location
656             // decide to select this primary region id or not
657             double currentRandom = RANDOM.nextDouble();
658             // we don't know how many region replicas are co-hosted, we will randomly select one
659             // using reservoir sampling (http://gregable.com/2007/10/reservoir-sampling.html)
660             if (currentRandom > currentLargestRandom) {
661               selectedPrimaryIndex = currentPrimary;
662               currentLargestRandom = currentRandom;
663             }
664           }
665           currentPrimary = primary;
666           currentPrimaryIndex = j;
667         }
668       }
669 
670       // we have found the primary id for the region to move. Now find the actual regionIndex
671       // with the given primary, prefer to move the secondary region.
672       for (int j = 0; j < regionsPerGroup.length; j++) {
673         int regionIndex = regionsPerGroup[j];
674         if (selectedPrimaryIndex == regionIndexToPrimaryIndex[regionIndex]) {
675           // always move the secondary, not the primary
676           if (selectedPrimaryIndex != regionIndex) {
677             return regionIndex;
678           }
679         }
680       }
681       return -1;
682     }
683 
684     @Override
685     Cluster.Action generate(Cluster cluster) {
686       int serverIndex = pickRandomServer(cluster);
687       if (cluster.numServers <= 1 || serverIndex == -1) {
688         return Cluster.NullAction;
689       }
690 
691       int regionIndex = selectCoHostedRegionPerGroup(
692         cluster.primariesOfRegionsPerServer[serverIndex],
693         cluster.regionsPerServer[serverIndex],
694         cluster.regionIndexToPrimaryIndex);
695 
696       // if there are no pairs of region replicas co-hosted, default to random generator
697       if (regionIndex == -1) {
698         // default to randompicker
699         return randomGenerator.generate(cluster);
700       }
701 
702       int toServerIndex = pickOtherRandomServer(cluster, serverIndex);
703       int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f);
704       return getAction (serverIndex, regionIndex, toServerIndex, toRegionIndex);
705     }
706   }
707 
708   /**
709    * Generates candidates which moves the replicas out of the rack for
710    * co-hosted region replicas in the same rack
711    */
712   static class RegionReplicaRackCandidateGenerator extends RegionReplicaCandidateGenerator {
713     @Override
714     Cluster.Action generate(Cluster cluster) {
715       int rackIndex = pickRandomRack(cluster);
716       if (cluster.numRacks <= 1 || rackIndex == -1) {
717         return super.generate(cluster);
718       }
719 
720       int regionIndex = selectCoHostedRegionPerGroup(
721         cluster.primariesOfRegionsPerRack[rackIndex],
722         cluster.regionsPerRack[rackIndex],
723         cluster.regionIndexToPrimaryIndex);
724 
725       // if there are no pairs of region replicas co-hosted, default to random generator
726       if (regionIndex == -1) {
727         // default to randompicker
728         return randomGenerator.generate(cluster);
729       }
730 
731       int serverIndex = cluster.regionIndexToServerIndex[regionIndex];
732       int toRackIndex = pickOtherRandomRack(cluster, rackIndex);
733 
734       int rand = RANDOM.nextInt(cluster.serversPerRack[toRackIndex].length);
735       int toServerIndex = cluster.serversPerRack[toRackIndex][rand];
736       int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f);
737       return getAction (serverIndex, regionIndex, toServerIndex, toRegionIndex);
738     }
739   }
740 
741   /**
742    * Base class of StochasticLoadBalancer's Cost Functions.
743    */
744   abstract static class CostFunction {
745 
746     private float multiplier = 0;
747 
748     protected Cluster cluster;
749 
750     CostFunction(Configuration c) {
751 
752     }
753 
754     float getMultiplier() {
755       return multiplier;
756     }
757 
758     void setMultiplier(float m) {
759       this.multiplier = m;
760     }
761 
762     /** Called once per LB invocation to give the cost function
763      * to initialize it's state, and perform any costly calculation.
764      */
765     void init(Cluster cluster) {
766       this.cluster = cluster;
767     }
768 
769     /** Called once per cluster Action to give the cost function
770      * an opportunity to update it's state. postAction() is always
771      * called at least once before cost() is called with the cluster
772      * that this action is performed on. */
773     void postAction(Action action) {
774       switch (action.type) {
775       case NULL: break;
776       case ASSIGN_REGION:
777         AssignRegionAction ar = (AssignRegionAction) action;
778         regionMoved(ar.region, -1, ar.server);
779         break;
780       case MOVE_REGION:
781         MoveRegionAction mra = (MoveRegionAction) action;
782         regionMoved(mra.region, mra.fromServer, mra.toServer);
783         break;
784       case SWAP_REGIONS:
785         SwapRegionsAction a = (SwapRegionsAction) action;
786         regionMoved(a.fromRegion, a.fromServer, a.toServer);
787         regionMoved(a.toRegion, a.toServer, a.fromServer);
788         break;
789       default:
790         throw new RuntimeException("Uknown action:" + action.type);
791       }
792     }
793 
794     protected void regionMoved(int region, int oldServer, int newServer) {
795     }
796 
797     abstract double cost();
798 
799     /**
800      * Function to compute a scaled cost using {@link DescriptiveStatistics}. It
801      * assumes that this is a zero sum set of costs.  It assumes that the worst case
802      * possible is all of the elements in one region server and the rest having 0.
803      *
804      * @param stats the costs
805      * @return a scaled set of costs.
806      */
807     protected double costFromArray(double[] stats) {
808       double totalCost = 0;
809       double total = getSum(stats);
810 
811       double count = stats.length;
812       double mean = total/count;
813 
814       // Compute max as if all region servers had 0 and one had the sum of all costs.  This must be
815       // a zero sum cost for this to make sense.
816       double max = ((count - 1) * mean) + (total - mean);
817 
818       // It's possible that there aren't enough regions to go around
819       double min;
820       if (count > total) {
821         min = ((count - total) * mean) + ((1 - mean) * total);
822       } else {
823         // Some will have 1 more than everything else.
824         int numHigh = (int) (total - (Math.floor(mean) * count));
825         int numLow = (int) (count - numHigh);
826 
827         min = (numHigh * (Math.ceil(mean) - mean)) + (numLow * (mean - Math.floor(mean)));
828 
829       }
830       min = Math.max(0, min);
831       for (int i=0; i<stats.length; i++) {
832         double n = stats[i];
833         double diff = Math.abs(mean - n);
834         totalCost += diff;
835       }
836 
837       double scaled =  scale(min, max, totalCost);
838       return scaled;
839     }
840 
841     private double getSum(double[] stats) {
842       double total = 0;
843       for(double s:stats) {
844         total += s;
845       }
846       return total;
847     }
848 
849     /**
850      * Scale the value between 0 and 1.
851      *
852      * @param min   Min value
853      * @param max   The Max value
854      * @param value The value to be scaled.
855      * @return The scaled value.
856      */
857     protected double scale(double min, double max, double value) {
858       if (max <= min || value <= min) {
859         return 0;
860       }
861       if ((max - min) == 0) return 0;
862 
863       return Math.max(0d, Math.min(1d, (value - min) / (max - min)));
864     }
865   }
866 
867   /**
868    * Given the starting state of the regions and a potential ending state
869    * compute cost based upon the number of regions that have moved.
870    */
871   static class MoveCostFunction extends CostFunction {
872     private static final String MOVE_COST_KEY = "hbase.master.balancer.stochastic.moveCost";
873     private static final String MAX_MOVES_PERCENT_KEY =
874         "hbase.master.balancer.stochastic.maxMovePercent";
875     private static final float DEFAULT_MOVE_COST = 100;
876     private static final int DEFAULT_MAX_MOVES = 600;
877     private static final float DEFAULT_MAX_MOVE_PERCENT = 0.25f;
878 
879     private final float maxMovesPercent;
880 
881     MoveCostFunction(Configuration conf) {
882       super(conf);
883 
884       // Move cost multiplier should be the same cost or higher than the rest of the costs to ensure
885       // that large benefits are need to overcome the cost of a move.
886       this.setMultiplier(conf.getFloat(MOVE_COST_KEY, DEFAULT_MOVE_COST));
887       // What percent of the number of regions a single run of the balancer can move.
888       maxMovesPercent = conf.getFloat(MAX_MOVES_PERCENT_KEY, DEFAULT_MAX_MOVE_PERCENT);
889     }
890 
891     @Override
892     double cost() {
893       // Try and size the max number of Moves, but always be prepared to move some.
894       int maxMoves = Math.max((int) (cluster.numRegions * maxMovesPercent),
895           DEFAULT_MAX_MOVES);
896 
897       double moveCost = cluster.numMovedRegions;
898 
899       // Don't let this single balance move more than the max moves.
900       // This allows better scaling to accurately represent the actual cost of a move.
901       if (moveCost > maxMoves) {
902         return 1000000;   // return a number much greater than any of the other cost
903       }
904 
905       return scale(0, cluster.numRegions, moveCost);
906     }
907   }
908 
909   /**
910    * Compute the cost of a potential cluster state from skew in number of
911    * regions on a cluster.
912    */
913   static class RegionCountSkewCostFunction extends CostFunction {
914     private static final String REGION_COUNT_SKEW_COST_KEY =
915         "hbase.master.balancer.stochastic.regionCountCost";
916     private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
917 
918     private double[] stats = null;
919 
920     RegionCountSkewCostFunction(Configuration conf) {
921       super(conf);
922       // Load multiplier should be the greatest as it is the most general way to balance data.
923       this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
924     }
925 
926     @Override
927     double cost() {
928       if (stats == null || stats.length != cluster.numServers) {
929         stats = new double[cluster.numServers];
930       }
931 
932       for (int i =0; i < cluster.numServers; i++) {
933         stats[i] = cluster.regionsPerServer[i].length;
934       }
935 
936       return costFromArray(stats);
937     }
938   }
939 
940   /**
941    * Compute the cost of a potential cluster configuration based upon how evenly
942    * distributed tables are.
943    */
944   static class TableSkewCostFunction extends CostFunction {
945 
946     private static final String TABLE_SKEW_COST_KEY =
947         "hbase.master.balancer.stochastic.tableSkewCost";
948     private static final float DEFAULT_TABLE_SKEW_COST = 35;
949 
950     TableSkewCostFunction(Configuration conf) {
951       super(conf);
952       this.setMultiplier(conf.getFloat(TABLE_SKEW_COST_KEY, DEFAULT_TABLE_SKEW_COST));
953     }
954 
955     @Override
956     double cost() {
957       double max = cluster.numRegions;
958       double min = ((double) cluster.numRegions) / cluster.numServers;
959       double value = 0;
960 
961       for (int i = 0; i < cluster.numMaxRegionsPerTable.length; i++) {
962         value += cluster.numMaxRegionsPerTable[i];
963       }
964 
965       return scale(min, max, value);
966     }
967   }
968 
969   /**
970    * Compute a cost of a potential cluster configuration based upon where
971    * {@link org.apache.hadoop.hbase.regionserver.StoreFile}s are located.
972    */
973   static class LocalityCostFunction extends CostFunction {
974 
975     private static final String LOCALITY_COST_KEY = "hbase.master.balancer.stochastic.localityCost";
976     private static final float DEFAULT_LOCALITY_COST = 25;
977 
978     private MasterServices services;
979 
980     LocalityCostFunction(Configuration conf, MasterServices srv) {
981       super(conf);
982       this.setMultiplier(conf.getFloat(LOCALITY_COST_KEY, DEFAULT_LOCALITY_COST));
983       this.services = srv;
984     }
985 
986     void setServices(MasterServices srvc) {
987       this.services = srvc;
988     }
989 
990     @Override
991     double cost() {
992       double max = 0;
993       double cost = 0;
994 
995       // If there's no master so there's no way anything else works.
996       if (this.services == null) {
997         return cost;
998       }
999 
1000       for (int i = 0; i < cluster.regionLocations.length; i++) {
1001         max += 1;
1002         int serverIndex = cluster.regionIndexToServerIndex[i];
1003         int[] regionLocations = cluster.regionLocations[i];
1004 
1005         // If we can't find where the data is getTopBlock returns null.
1006         // so count that as being the best possible.
1007         if (regionLocations == null) {
1008           continue;
1009         }
1010 
1011         int index = -1;
1012         for (int j = 0; j < regionLocations.length; j++) {
1013           if (regionLocations[j] >= 0 && regionLocations[j] == serverIndex) {
1014             index = j;
1015             break;
1016           }
1017         }
1018 
1019         if (index < 0) {
1020           cost += 1;
1021         } else {
1022           cost += (double) index / (double) regionLocations.length;
1023         }
1024       }
1025       return scale(0, max, cost);
1026     }
1027   }
1028 
1029   /**
1030    * Base class the allows writing costs functions from rolling average of some
1031    * number from RegionLoad.
1032    */
1033   abstract static class CostFromRegionLoadFunction extends CostFunction {
1034 
1035     private ClusterStatus clusterStatus = null;
1036     private Map<String, Deque<RegionLoad>> loads = null;
1037     private double[] stats = null;
1038     CostFromRegionLoadFunction(Configuration conf) {
1039       super(conf);
1040     }
1041 
1042     void setClusterStatus(ClusterStatus status) {
1043       this.clusterStatus = status;
1044     }
1045 
1046     void setLoads(Map<String, Deque<RegionLoad>> l) {
1047       this.loads = l;
1048     }
1049 
1050     @Override
1051     double cost() {
1052       if (clusterStatus == null || loads == null) {
1053         return 0;
1054       }
1055 
1056       if (stats == null || stats.length != cluster.numServers) {
1057         stats = new double[cluster.numServers];
1058       }
1059 
1060       for (int i =0; i < stats.length; i++) {
1061         //Cost this server has from RegionLoad
1062         long cost = 0;
1063 
1064         // for every region on this server get the rl
1065         for(int regionIndex:cluster.regionsPerServer[i]) {
1066           Collection<RegionLoad> regionLoadList =  cluster.regionLoads[regionIndex];
1067 
1068           // Now if we found a region load get the type of cost that was requested.
1069           if (regionLoadList != null) {
1070             cost += getRegionLoadCost(regionLoadList);
1071           }
1072         }
1073 
1074         // Add the total cost to the stats.
1075         stats[i] = cost;
1076       }
1077 
1078       // Now return the scaled cost from data held in the stats object.
1079       return costFromArray(stats);
1080     }
1081 
1082     protected double getRegionLoadCost(Collection<RegionLoad> regionLoadList) {
1083       double cost = 0;
1084 
1085       for (RegionLoad rl : regionLoadList) {
1086         double toAdd = getCostFromRl(rl);
1087 
1088         if (cost == 0) {
1089           cost = toAdd;
1090         } else {
1091           cost = (.5 * cost) + (.5 * toAdd);
1092         }
1093       }
1094 
1095       return cost;
1096     }
1097 
1098     protected abstract double getCostFromRl(RegionLoad rl);
1099   }
1100 
1101   /**
1102    * Compute the cost of total number of read requests  The more unbalanced the higher the
1103    * computed cost will be.  This uses a rolling average of regionload.
1104    */
1105 
1106   static class ReadRequestCostFunction extends CostFromRegionLoadFunction {
1107 
1108     private static final String READ_REQUEST_COST_KEY =
1109         "hbase.master.balancer.stochastic.readRequestCost";
1110     private static final float DEFAULT_READ_REQUEST_COST = 5;
1111 
1112     ReadRequestCostFunction(Configuration conf) {
1113       super(conf);
1114       this.setMultiplier(conf.getFloat(READ_REQUEST_COST_KEY, DEFAULT_READ_REQUEST_COST));
1115     }
1116 
1117 
1118     @Override
1119     protected double getCostFromRl(RegionLoad rl) {
1120       return rl.getReadRequestsCount();
1121     }
1122   }
1123 
1124   /**
1125    * Compute the cost of total number of write requests.  The more unbalanced the higher the
1126    * computed cost will be.  This uses a rolling average of regionload.
1127    */
1128   static class WriteRequestCostFunction extends CostFromRegionLoadFunction {
1129 
1130     private static final String WRITE_REQUEST_COST_KEY =
1131         "hbase.master.balancer.stochastic.writeRequestCost";
1132     private static final float DEFAULT_WRITE_REQUEST_COST = 5;
1133 
1134     WriteRequestCostFunction(Configuration conf) {
1135       super(conf);
1136       this.setMultiplier(conf.getFloat(WRITE_REQUEST_COST_KEY, DEFAULT_WRITE_REQUEST_COST));
1137     }
1138 
1139     @Override
1140     protected double getCostFromRl(RegionLoad rl) {
1141       return rl.getWriteRequestsCount();
1142     }
1143   }
1144 
1145   /**
1146    * A cost function for region replicas. We give a very high cost to hosting
1147    * replicas of the same region in the same host. We do not prevent the case
1148    * though, since if numReplicas > numRegionServers, we still want to keep the
1149    * replica open.
1150    */
1151   static class RegionReplicaHostCostFunction extends CostFunction {
1152     private static final String REGION_REPLICA_HOST_COST_KEY =
1153         "hbase.master.balancer.stochastic.regionReplicaHostCostKey";
1154     private static final float DEFAULT_REGION_REPLICA_HOST_COST_KEY = 100000;
1155 
1156     long maxCost = 0;
1157     long[] costsPerGroup; // group is either server, host or rack
1158     int[][] primariesOfRegionsPerGroup;
1159 
1160     public RegionReplicaHostCostFunction(Configuration conf) {
1161       super(conf);
1162       this.setMultiplier(conf.getFloat(REGION_REPLICA_HOST_COST_KEY,
1163         DEFAULT_REGION_REPLICA_HOST_COST_KEY));
1164     }
1165 
1166     @Override
1167     void init(Cluster cluster) {
1168       super.init(cluster);
1169       // max cost is the case where every region replica is hosted together regardless of host
1170       maxCost = cluster.numHosts > 1 ? getMaxCost(cluster) : 0;
1171       costsPerGroup = new long[cluster.numHosts];
1172       primariesOfRegionsPerGroup = cluster.multiServersPerHost // either server based or host based
1173           ? cluster.primariesOfRegionsPerHost
1174           : cluster.primariesOfRegionsPerServer;
1175       for (int i = 0 ; i < primariesOfRegionsPerGroup.length; i++) {
1176         costsPerGroup[i] = costPerGroup(primariesOfRegionsPerGroup[i]);
1177       }
1178     }
1179 
1180     long getMaxCost(Cluster cluster) {
1181       if (!cluster.hasRegionReplicas) {
1182         return 0; // short circuit
1183       }
1184       // max cost is the case where every region replica is hosted together regardless of host
1185       int[] primariesOfRegions = new int[cluster.numRegions];
1186       System.arraycopy(cluster.regionIndexToPrimaryIndex, 0, primariesOfRegions, 0,
1187           cluster.regions.length);
1188 
1189       Arrays.sort(primariesOfRegions);
1190 
1191       // compute numReplicas from the sorted array
1192       return costPerGroup(primariesOfRegions);
1193     }
1194 
1195     @Override
1196     double cost() {
1197       if (maxCost <= 0) {
1198         return 0;
1199       }
1200 
1201       long totalCost = 0;
1202       for (int i = 0 ; i < costsPerGroup.length; i++) {
1203         totalCost += costsPerGroup[i];
1204       }
1205       return scale(0, maxCost, totalCost);
1206     }
1207 
1208     /**
1209      * For each primary region, it computes the total number of replicas in the array (numReplicas)
1210      * and returns a sum of numReplicas-1 squared. For example, if the server hosts
1211      * regions a, b, c, d, e, f where a and b are same replicas, and c,d,e are same replicas, it
1212      * returns (2-1) * (2-1) + (3-1) * (3-1) + (1-1) * (1-1).
1213      * @param primariesOfRegions a sorted array of primary regions ids for the regions hosted
1214      * @return a sum of numReplicas-1 squared for each primary region in the group.
1215      */
1216     protected long costPerGroup(int[] primariesOfRegions) {
1217       long cost = 0;
1218       int currentPrimary = -1;
1219       int currentPrimaryIndex = -1;
1220       // primariesOfRegions is a sorted array of primary ids of regions. Replicas of regions
1221       // sharing the same primary will have consecutive numbers in the array.
1222       for (int j = 0 ; j <= primariesOfRegions.length; j++) {
1223         int primary = j < primariesOfRegions.length ? primariesOfRegions[j] : -1;
1224         if (primary != currentPrimary) { // we see a new primary
1225           int numReplicas = j - currentPrimaryIndex;
1226           // square the cost
1227           if (numReplicas > 1) { // means consecutive primaries, indicating co-location
1228             cost += (numReplicas - 1) * (numReplicas - 1);
1229           }
1230           currentPrimary = primary;
1231           currentPrimaryIndex = j;
1232         }
1233       }
1234 
1235       return cost;
1236     }
1237 
1238     @Override
1239     protected void regionMoved(int region, int oldServer, int newServer) {
1240       if (maxCost <= 0) {
1241         return; // no need to compute
1242       }
1243       if (cluster.multiServersPerHost) {
1244         int oldHost = cluster.serverIndexToHostIndex[oldServer];
1245         int newHost = cluster.serverIndexToHostIndex[newServer];
1246         if (newHost != oldHost) {
1247           costsPerGroup[oldHost] = costPerGroup(cluster.primariesOfRegionsPerHost[oldHost]);
1248           costsPerGroup[newHost] = costPerGroup(cluster.primariesOfRegionsPerHost[newHost]);
1249         }
1250       } else {
1251         costsPerGroup[oldServer] = costPerGroup(cluster.primariesOfRegionsPerServer[oldServer]);
1252         costsPerGroup[newServer] = costPerGroup(cluster.primariesOfRegionsPerServer[newServer]);
1253       }
1254     }
1255   }
1256 
1257   /**
1258    * A cost function for region replicas for the rack distribution. We give a relatively high
1259    * cost to hosting replicas of the same region in the same rack. We do not prevent the case
1260    * though.
1261    */
1262   static class RegionReplicaRackCostFunction extends RegionReplicaHostCostFunction {
1263     private static final String REGION_REPLICA_RACK_COST_KEY =
1264         "hbase.master.balancer.stochastic.regionReplicaRackCostKey";
1265     private static final float DEFAULT_REGION_REPLICA_RACK_COST_KEY = 10000;
1266 
1267     public RegionReplicaRackCostFunction(Configuration conf) {
1268       super(conf);
1269       this.setMultiplier(conf.getFloat(REGION_REPLICA_RACK_COST_KEY, DEFAULT_REGION_REPLICA_RACK_COST_KEY));
1270     }
1271 
1272     @Override
1273     void init(Cluster cluster) {
1274       this.cluster = cluster;
1275       if (cluster.numRacks <= 1) {
1276         maxCost = 0;
1277         return; // disabled for 1 rack
1278       }
1279       // max cost is the case where every region replica is hosted together regardless of rack
1280       maxCost = getMaxCost(cluster);
1281       costsPerGroup = new long[cluster.numRacks];
1282       for (int i = 0 ; i < cluster.primariesOfRegionsPerRack.length; i++) {
1283         costsPerGroup[i] = costPerGroup(cluster.primariesOfRegionsPerRack[i]);
1284       }
1285     }
1286 
1287     @Override
1288     protected void regionMoved(int region, int oldServer, int newServer) {
1289       if (maxCost <= 0) {
1290         return; // no need to compute
1291       }
1292       int oldRack = cluster.serverIndexToRackIndex[oldServer];
1293       int newRack = cluster.serverIndexToRackIndex[newServer];
1294       if (newRack != oldRack) {
1295         costsPerGroup[oldRack] = costPerGroup(cluster.primariesOfRegionsPerRack[oldRack]);
1296         costsPerGroup[newRack] = costPerGroup(cluster.primariesOfRegionsPerRack[newRack]);
1297       }
1298     }
1299   }
1300 
1301   /**
1302    * Compute the cost of total memstore size.  The more unbalanced the higher the
1303    * computed cost will be.  This uses a rolling average of regionload.
1304    */
1305   static class MemstoreSizeCostFunction extends CostFromRegionLoadFunction {
1306 
1307     private static final String MEMSTORE_SIZE_COST_KEY =
1308         "hbase.master.balancer.stochastic.memstoreSizeCost";
1309     private static final float DEFAULT_MEMSTORE_SIZE_COST = 5;
1310 
1311     MemstoreSizeCostFunction(Configuration conf) {
1312       super(conf);
1313       this.setMultiplier(conf.getFloat(MEMSTORE_SIZE_COST_KEY, DEFAULT_MEMSTORE_SIZE_COST));
1314     }
1315 
1316     @Override
1317     protected double getCostFromRl(RegionLoad rl) {
1318       return rl.getMemStoreSizeMB();
1319     }
1320   }
1321   /**
1322    * Compute the cost of total open storefiles size.  The more unbalanced the higher the
1323    * computed cost will be.  This uses a rolling average of regionload.
1324    */
1325   static class StoreFileCostFunction extends CostFromRegionLoadFunction {
1326 
1327     private static final String STOREFILE_SIZE_COST_KEY =
1328         "hbase.master.balancer.stochastic.storefileSizeCost";
1329     private static final float DEFAULT_STOREFILE_SIZE_COST = 5;
1330 
1331     StoreFileCostFunction(Configuration conf) {
1332       super(conf);
1333       this.setMultiplier(conf.getFloat(STOREFILE_SIZE_COST_KEY, DEFAULT_STOREFILE_SIZE_COST));
1334     }
1335 
1336     @Override
1337     protected double getCostFromRl(RegionLoad rl) {
1338       return rl.getStorefileSizeMB();
1339     }
1340   }
1341 }