View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayDeque;
21  import java.util.Arrays;
22  import java.util.Collection;
23  import java.util.Deque;
24  import java.util.HashMap;
25  import java.util.LinkedList;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.Map.Entry;
29  import java.util.Random;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
34  import org.apache.hadoop.hbase.classification.InterfaceAudience;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.hbase.ClusterStatus;
37  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.RegionLoad;
40  import org.apache.hadoop.hbase.ServerLoad;
41  import org.apache.hadoop.hbase.ServerName;
42  import org.apache.hadoop.hbase.master.MasterServices;
43  import org.apache.hadoop.hbase.master.RegionPlan;
44  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action;
45  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
46  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.AssignRegionAction;
47  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.MoveRegionAction;
48  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.SwapRegionsAction;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
51  
52  /**
53   * <p>This is a best effort load balancer. Given a Cost function F(C) => x It will
54   * randomly try and mutate the cluster to Cprime. If F(Cprime) < F(C) then the
55   * new cluster state becomes the plan. It includes costs functions to compute the cost of:</p>
56   * <ul>
57   * <li>Region Load</li>
58   * <li>Table Load</li>
59   * <li>Data Locality</li>
60   * <li>Memstore Sizes</li>
61   * <li>Storefile Sizes</li>
62   * </ul>
63   *
64   *
65   * <p>Every cost function returns a number between 0 and 1 inclusive; where 0 is the lowest cost
66   * best solution, and 1 is the highest possible cost and the worst solution.  The computed costs are
67   * scaled by their respective multipliers:</p>
68   *
69   * <ul>
70   *   <li>hbase.master.balancer.stochastic.regionLoadCost</li>
71   *   <li>hbase.master.balancer.stochastic.moveCost</li>
72   *   <li>hbase.master.balancer.stochastic.tableLoadCost</li>
73   *   <li>hbase.master.balancer.stochastic.localityCost</li>
74   *   <li>hbase.master.balancer.stochastic.memstoreSizeCost</li>
75   *   <li>hbase.master.balancer.stochastic.storefileSizeCost</li>
76   * </ul>
77   *
78   * <p>In addition to the above configurations, the balancer can be tuned by the following
79   * configuration values:</p>
80   * <ul>
81   *   <li>hbase.master.balancer.stochastic.maxMoveRegions which
82   *   controls what the max number of regions that can be moved in a single invocation of this
83   *   balancer.</li>
84   *   <li>hbase.master.balancer.stochastic.stepsPerRegion is the coefficient by which the number of
85   *   regions is multiplied to try and get the number of times the balancer will
86   *   mutate all servers.</li>
87   *   <li>hbase.master.balancer.stochastic.maxSteps which controls the maximum number of times that
88   *   the balancer will try and mutate all the servers. The balancer will use the minimum of this
89   *   value and the above computation.</li>
90   * </ul>
91   *
92   * <p>This balancer is best used with hbase.master.loadbalance.bytable set to false
93   * so that the balancer gets the full picture of all loads on the cluster.</p>
94   */
95  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
96  public class StochasticLoadBalancer extends BaseLoadBalancer {
97  
98    protected static final String STEPS_PER_REGION_KEY =
99        "hbase.master.balancer.stochastic.stepsPerRegion";
100   protected static final String MAX_STEPS_KEY =
101       "hbase.master.balancer.stochastic.maxSteps";
102   protected static final String MAX_RUNNING_TIME_KEY =
103       "hbase.master.balancer.stochastic.maxRunningTime";
104   protected static final String KEEP_REGION_LOADS =
105       "hbase.master.balancer.stochastic.numRegionLoadsToRemember";
106 
107   private static final Random RANDOM = new Random(System.currentTimeMillis());
108   private static final Log LOG = LogFactory.getLog(StochasticLoadBalancer.class);
109 
110   Map<String, Deque<RegionLoad>> loads = new HashMap<String, Deque<RegionLoad>>();
111 
112   // values are defaults
113   private int maxSteps = 1000000;
114   private int stepsPerRegion = 800;
115   private long maxRunningTime = 30 * 1000 * 1; // 30 seconds.
116   private int numRegionLoadsToRemember = 15;
117 
118   private CandidateGenerator[] candidateGenerators;
119   private CostFromRegionLoadFunction[] regionLoadFunctions;
120   private CostFunction[] costFunctions;
121   // Keep locality based picker and cost function to alert them
122   // when new services are offered
123   private LocalityBasedCandidateGenerator localityCandidateGenerator;
124   private LocalityCostFunction localityCost;
125   private RegionReplicaHostCostFunction regionReplicaHostCostFunction;
126   private RegionReplicaRackCostFunction regionReplicaRackCostFunction;
127 
128   @Override
129   public void setConf(Configuration conf) {
130     super.setConf(conf);
131 
132     maxSteps = conf.getInt(MAX_STEPS_KEY, maxSteps);
133 
134     stepsPerRegion = conf.getInt(STEPS_PER_REGION_KEY, stepsPerRegion);
135     maxRunningTime = conf.getLong(MAX_RUNNING_TIME_KEY, maxRunningTime);
136 
137     numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember);
138 
139     localityCandidateGenerator = new LocalityBasedCandidateGenerator(services);
140     localityCost = new LocalityCostFunction(conf, services);
141 
142     candidateGenerators = new CandidateGenerator[] {
143       new RandomCandidateGenerator(),
144       new LoadCandidateGenerator(),
145       localityCandidateGenerator,
146       new RegionReplicaRackCandidateGenerator(),
147     };
148 
149     regionLoadFunctions = new CostFromRegionLoadFunction[] {
150       new ReadRequestCostFunction(conf),
151       new WriteRequestCostFunction(conf),
152       new MemstoreSizeCostFunction(conf),
153       new StoreFileCostFunction(conf)
154     };
155 
156     regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
157     regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
158 
159     costFunctions = new CostFunction[]{
160       new RegionCountSkewCostFunction(conf),
161       new MoveCostFunction(conf),
162       localityCost,
163       new TableSkewCostFunction(conf),
164       regionReplicaHostCostFunction,
165       regionReplicaRackCostFunction,
166       regionLoadFunctions[0],
167       regionLoadFunctions[1],
168       regionLoadFunctions[2],
169       regionLoadFunctions[3],
170     };
171   }
172 
173   @Override
174   protected void setSlop(Configuration conf) {
175     this.slop = conf.getFloat("hbase.regions.slop", 0.001F);
176   }
177 
178   @Override
179   public void setClusterStatus(ClusterStatus st) {
180     super.setClusterStatus(st);
181     updateRegionLoad();
182     for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
183       cost.setClusterStatus(st);
184     }
185   }
186 
187   @Override
188   public void setMasterServices(MasterServices masterServices) {
189     super.setMasterServices(masterServices);
190     this.localityCost.setServices(masterServices);
191     this.localityCandidateGenerator.setServices(masterServices);
192 
193   }
194 
195   @Override
196   protected boolean areSomeRegionReplicasColocated(Cluster c) {
197     regionReplicaHostCostFunction.init(c);
198     if (regionReplicaHostCostFunction.cost() > 0) return true;
199     regionReplicaRackCostFunction.init(c);
200     if (regionReplicaRackCostFunction.cost() > 0) return true;
201     return false;
202   }
203 
204   /**
205    * Given the cluster state this will try and approach an optimal balance. This
206    * should always approach the optimal state given enough steps.
207    */
208   @Override
209   public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
210     List<RegionPlan> plans = balanceMasterRegions(clusterState);
211     if (plans != null || clusterState == null || clusterState.size() <= 1) {
212       return plans;
213     }
214     if (masterServerName != null && clusterState.containsKey(masterServerName)) {
215       if (clusterState.size() <= 2) {
216         return null;
217       }
218       clusterState = new HashMap<ServerName, List<HRegionInfo>>(clusterState);
219       clusterState.remove(masterServerName);
220     }
221 
222     //The clusterState that is given to this method contains the state
223     //of all the regions in the table(s) (that's true today)
224     // Keep track of servers to iterate through them.
225     Cluster cluster = new Cluster(clusterState, loads, regionFinder, rackManager);
226     if (!needsBalance(cluster)) {
227       return null;
228     }
229 
230     long startTime = EnvironmentEdgeManager.currentTime();
231 
232     initCosts(cluster);
233 
234     double currentCost = computeCost(cluster, Double.MAX_VALUE);
235 
236     double initCost = currentCost;
237     double newCost = currentCost;
238 
239     long computedMaxSteps = Math.min(this.maxSteps,
240         ((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers));
241     // Perform a stochastic walk to see if we can get a good fit.
242     long step;
243 
244     for (step = 0; step < computedMaxSteps; step++) {
245       int generatorIdx = RANDOM.nextInt(candidateGenerators.length);
246       CandidateGenerator p = candidateGenerators[generatorIdx];
247       Cluster.Action action = p.generate(cluster);
248 
249       if (action.type == Type.NULL) {
250         continue;
251       }
252 
253       cluster.doAction(action);
254       updateCostsWithAction(cluster, action);
255 
256       newCost = computeCost(cluster, currentCost);
257 
258       // Should this be kept?
259       if (newCost < currentCost) {
260         currentCost = newCost;
261       } else {
262         // Put things back the way they were before.
263         // TODO: undo by remembering old values
264         Action undoAction = action.undoAction();
265         cluster.doAction(undoAction);
266         updateCostsWithAction(cluster, undoAction);
267       }
268 
269       if (EnvironmentEdgeManager.currentTime() - startTime >
270           maxRunningTime) {
271         break;
272       }
273     }
274 
275     long endTime = EnvironmentEdgeManager.currentTime();
276 
277     metricsBalancer.balanceCluster(endTime - startTime);
278 
279     if (initCost > currentCost) {
280       plans = createRegionPlans(cluster);
281       if (LOG.isDebugEnabled()) {
282         LOG.debug("Finished computing new load balance plan.  Computation took "
283             + (endTime - startTime) + "ms to try " + step
284             + " different iterations.  Found a solution that moves "
285             + plans.size() + " regions; Going from a computed cost of "
286             + initCost + " to a new cost of " + currentCost);
287       }
288       return plans;
289     }
290     if (LOG.isDebugEnabled()) {
291       LOG.debug("Could not find a better load balance plan.  Tried "
292           + step + " different configurations in " + (endTime - startTime)
293           + "ms, and did not find anything with a computed cost less than " + initCost);
294     }
295     return null;
296   }
297 
298   /**
299    * Create all of the RegionPlan's needed to move from the initial cluster state to the desired
300    * state.
301    *
302    * @param cluster The state of the cluster
303    * @return List of RegionPlan's that represent the moves needed to get to desired final state.
304    */
305   private List<RegionPlan> createRegionPlans(Cluster cluster) {
306     List<RegionPlan> plans = new LinkedList<RegionPlan>();
307     for (int regionIndex = 0;
308          regionIndex < cluster.regionIndexToServerIndex.length; regionIndex++) {
309       int initialServerIndex = cluster.initialRegionIndexToServerIndex[regionIndex];
310       int newServerIndex = cluster.regionIndexToServerIndex[regionIndex];
311 
312       if (initialServerIndex != newServerIndex) {
313         HRegionInfo region = cluster.regions[regionIndex];
314         ServerName initialServer = cluster.servers[initialServerIndex];
315         ServerName newServer = cluster.servers[newServerIndex];
316 
317         if (LOG.isTraceEnabled()) {
318           LOG.trace("Moving Region " + region.getEncodedName() + " from server "
319               + initialServer.getHostname() + " to " + newServer.getHostname());
320         }
321         RegionPlan rp = new RegionPlan(region, initialServer, newServer);
322         plans.add(rp);
323       }
324     }
325     return plans;
326   }
327 
328   /**
329    * Store the current region loads.
330    */
331   private synchronized void updateRegionLoad() {
332     // We create a new hashmap so that regions that are no longer there are removed.
333     // However we temporarily need the old loads so we can use them to keep the rolling average.
334     Map<String, Deque<RegionLoad>> oldLoads = loads;
335     loads = new HashMap<String, Deque<RegionLoad>>();
336 
337     for (ServerName sn : clusterStatus.getServers()) {
338       ServerLoad sl = clusterStatus.getLoad(sn);
339       if (sl == null) {
340         continue;
341       }
342       for (Entry<byte[], RegionLoad> entry : sl.getRegionsLoad().entrySet()) {
343         Deque<RegionLoad> rLoads = oldLoads.get(Bytes.toString(entry.getKey()));
344         if (rLoads == null) {
345           // There was nothing there
346           rLoads = new ArrayDeque<RegionLoad>();
347         } else if (rLoads.size() >= numRegionLoadsToRemember) {
348           rLoads.remove();
349         }
350         rLoads.add(entry.getValue());
351         loads.put(Bytes.toString(entry.getKey()), rLoads);
352 
353       }
354     }
355 
356     for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
357       cost.setLoads(loads);
358     }
359   }
360 
361   protected void initCosts(Cluster cluster) {
362     for (CostFunction c:costFunctions) {
363       c.init(cluster);
364     }
365   }
366 
367   protected void updateCostsWithAction(Cluster cluster, Action action) {
368     for (CostFunction c : costFunctions) {
369       c.postAction(action);
370     }
371   }
372 
373   /**
374    * This is the main cost function.  It will compute a cost associated with a proposed cluster
375    * state.  All different costs will be combined with their multipliers to produce a double cost.
376    *
377    * @param cluster The state of the cluster
378    * @param previousCost the previous cost. This is used as an early out.
379    * @return a double of a cost associated with the proposed cluster state.  This cost is an
380    *         aggregate of all individual cost functions.
381    */
382   protected double computeCost(Cluster cluster, double previousCost) {
383     double total = 0;
384 
385     for (CostFunction c:costFunctions) {
386       if (c.getMultiplier() <= 0) {
387         continue;
388       }
389 
390       total += c.getMultiplier() * c.cost();
391 
392       if (total > previousCost) {
393         return total;
394       }
395     }
396     return total;
397   }
398 
399   /** Generates a candidate action to be applied to the cluster for cost function search */
400   abstract static class CandidateGenerator {
401     abstract Cluster.Action generate(Cluster cluster);
402 
403     /**
404      * From a list of regions pick a random one. Null can be returned which
405      * {@link StochasticLoadBalancer#balanceCluster(Map)} recognize as signal to try a region move
406      * rather than swap.
407      *
408      * @param cluster        The state of the cluster
409      * @param server         index of the server
410      * @param chanceOfNoSwap Chance that this will decide to try a move rather
411      *                       than a swap.
412      * @return a random {@link HRegionInfo} or null if an asymmetrical move is
413      *         suggested.
414      */
415     protected int pickRandomRegion(Cluster cluster, int server, double chanceOfNoSwap) {
416       // Check to see if this is just a move.
417       if (cluster.regionsPerServer[server].length == 0 || RANDOM.nextFloat() < chanceOfNoSwap) {
418         // signal a move only.
419         return -1;
420       }
421       int rand = RANDOM.nextInt(cluster.regionsPerServer[server].length);
422       return cluster.regionsPerServer[server][rand];
423 
424     }
425     protected int pickRandomServer(Cluster cluster) {
426       if (cluster.numServers < 1) {
427         return -1;
428       }
429 
430       return RANDOM.nextInt(cluster.numServers);
431     }
432 
433     protected int pickRandomRack(Cluster cluster) {
434       if (cluster.numRacks < 1) {
435         return -1;
436       }
437 
438       return RANDOM.nextInt(cluster.numRacks);
439     }
440 
441     protected int pickOtherRandomServer(Cluster cluster, int serverIndex) {
442       if (cluster.numServers < 2) {
443         return -1;
444       }
445       while (true) {
446         int otherServerIndex = pickRandomServer(cluster);
447         if (otherServerIndex != serverIndex) {
448           return otherServerIndex;
449         }
450       }
451     }
452 
453     protected int pickOtherRandomRack(Cluster cluster, int rackIndex) {
454       if (cluster.numRacks < 2) {
455         return -1;
456       }
457       while (true) {
458         int otherRackIndex = pickRandomRack(cluster);
459         if (otherRackIndex != rackIndex) {
460           return otherRackIndex;
461         }
462       }
463     }
464 
465     protected Cluster.Action pickRandomRegions(Cluster cluster,
466                                                        int thisServer,
467                                                        int otherServer) {
468       if (thisServer < 0 || otherServer < 0) {
469         return Cluster.NullAction;
470       }
471 
472       // Decide who is most likely to need another region
473       int thisRegionCount = cluster.getNumRegions(thisServer);
474       int otherRegionCount = cluster.getNumRegions(otherServer);
475 
476       // Assign the chance based upon the above
477       double thisChance = (thisRegionCount > otherRegionCount) ? 0 : 0.5;
478       double otherChance = (thisRegionCount <= otherRegionCount) ? 0 : 0.5;
479 
480       int thisRegion = pickRandomRegion(cluster, thisServer, thisChance);
481       int otherRegion = pickRandomRegion(cluster, otherServer, otherChance);
482 
483       return getAction(thisServer, thisRegion, otherServer, otherRegion);
484     }
485 
486     protected Cluster.Action getAction (int fromServer, int fromRegion,
487         int toServer, int toRegion) {
488       if (fromServer < 0 || toServer < 0) {
489         return Cluster.NullAction;
490       }
491       if (fromRegion > 0 && toRegion > 0) {
492         return new Cluster.SwapRegionsAction(fromServer, fromRegion,
493           toServer, toRegion);
494       } else if (fromRegion > 0) {
495         return new Cluster.MoveRegionAction(fromRegion, fromServer, toServer);
496       } else if (toRegion > 0) {
497         return new Cluster.MoveRegionAction(toRegion, toServer, fromServer);
498       } else {
499         return Cluster.NullAction;
500       }
501     }
502   }
503 
504   static class RandomCandidateGenerator extends CandidateGenerator {
505 
506     @Override
507     Cluster.Action generate(Cluster cluster) {
508 
509       int thisServer = pickRandomServer(cluster);
510 
511       // Pick the other server
512       int otherServer = pickOtherRandomServer(cluster, thisServer);
513 
514       return pickRandomRegions(cluster, thisServer, otherServer);
515     }
516   }
517 
518   static class LoadCandidateGenerator extends CandidateGenerator {
519 
520     @Override
521     Cluster.Action generate(Cluster cluster) {
522       cluster.sortServersByRegionCount();
523       int thisServer = pickMostLoadedServer(cluster, -1);
524       int otherServer = pickLeastLoadedServer(cluster, thisServer);
525 
526       return pickRandomRegions(cluster, thisServer, otherServer);
527     }
528 
529     private int pickLeastLoadedServer(final Cluster cluster, int thisServer) {
530       Integer[] servers = cluster.serverIndicesSortedByRegionCount;
531 
532       int index = 0;
533       while (servers[index] == null || servers[index] == thisServer) {
534         index++;
535         if (index == servers.length) {
536           return -1;
537         }
538       }
539       return servers[index];
540     }
541 
542     private int pickMostLoadedServer(final Cluster cluster, int thisServer) {
543       Integer[] servers = cluster.serverIndicesSortedByRegionCount;
544 
545       int index = servers.length - 1;
546       while (servers[index] == null || servers[index] == thisServer) {
547         index--;
548         if (index < 0) {
549           return -1;
550         }
551       }
552       return servers[index];
553     }
554   }
555 
556   static class LocalityBasedCandidateGenerator extends CandidateGenerator {
557 
558     private MasterServices masterServices;
559 
560     LocalityBasedCandidateGenerator(MasterServices masterServices) {
561       this.masterServices = masterServices;
562     }
563 
564     @Override
565     Cluster.Action generate(Cluster cluster) {
566       if (this.masterServices == null) {
567         return Cluster.NullAction;
568       }
569       // Pick a random region server
570       int thisServer = pickRandomServer(cluster);
571 
572       // Pick a random region on this server
573       int thisRegion = pickRandomRegion(cluster, thisServer, 0.0f);
574 
575       if (thisRegion == -1) {
576         return Cluster.NullAction;
577       }
578 
579       // Pick the server with the highest locality
580       int otherServer = pickHighestLocalityServer(cluster, thisServer, thisRegion);
581 
582       if (otherServer == -1) {
583         return Cluster.NullAction;
584       }
585 
586       // pick an region on the other server to potentially swap
587       int otherRegion = this.pickRandomRegion(cluster, otherServer, 0.5f);
588 
589       return getAction(thisServer, thisRegion, otherServer, otherRegion);
590     }
591 
592     private int pickHighestLocalityServer(Cluster cluster, int thisServer, int thisRegion) {
593       int[] regionLocations = cluster.regionLocations[thisRegion];
594 
595       if (regionLocations == null || regionLocations.length <= 1) {
596         return pickOtherRandomServer(cluster, thisServer);
597       }
598 
599       for (int loc : regionLocations) {
600         if (loc >= 0 && loc != thisServer) { // find the first suitable server
601           return loc;
602         }
603       }
604 
605       // no location found
606       return pickOtherRandomServer(cluster, thisServer);
607     }
608 
609     void setServices(MasterServices services) {
610       this.masterServices = services;
611     }
612   }
613 
614   /**
615    * Generates candidates which moves the replicas out of the region server for
616    * co-hosted region replicas
617    */
618   static class RegionReplicaCandidateGenerator extends CandidateGenerator {
619 
620     RandomCandidateGenerator randomGenerator = new RandomCandidateGenerator();
621 
622     /**
623      * Randomly select one regionIndex out of all region replicas co-hosted in the same group
624      * (a group is a server, host or rack)
625      * @param primariesOfRegionsPerGroup either Cluster.primariesOfRegionsPerServer,
626      * primariesOfRegionsPerHost or primariesOfRegionsPerRack
627      * @param regionsPerGroup either Cluster.regionsPerServer, regionsPerHost or regionsPerRack
628      * @param regionIndexToPrimaryIndex Cluster.regionsIndexToPrimaryIndex
629      * @return a regionIndex for the selected primary or -1 if there is no co-locating
630      */
631     int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[] regionsPerGroup
632         , int[] regionIndexToPrimaryIndex) {
633       int currentPrimary = -1;
634       int currentPrimaryIndex = -1;
635       int selectedPrimaryIndex = -1;
636       double currentLargestRandom = -1;
637       // primariesOfRegionsPerGroup is a sorted array. Since it contains the primary region
638       // ids for the regions hosted in server, a consecutive repetition means that replicas
639       // are co-hosted
640       for (int j = 0; j <= primariesOfRegionsPerGroup.length; j++) {
641         int primary = j < primariesOfRegionsPerGroup.length
642             ? primariesOfRegionsPerGroup[j] : -1;
643         if (primary != currentPrimary) { // check for whether we see a new primary
644           int numReplicas = j - currentPrimaryIndex;
645           if (numReplicas > 1) { // means consecutive primaries, indicating co-location
646             // decide to select this primary region id or not
647             double currentRandom = RANDOM.nextDouble();
648             // we don't know how many region replicas are co-hosted, we will randomly select one
649             // using reservoir sampling (http://gregable.com/2007/10/reservoir-sampling.html)
650             if (currentRandom > currentLargestRandom) {
651               selectedPrimaryIndex = currentPrimary;
652               currentLargestRandom = currentRandom;
653             }
654           }
655           currentPrimary = primary;
656           currentPrimaryIndex = j;
657         }
658       }
659 
660       // we have found the primary id for the region to move. Now find the actual regionIndex
661       // with the given primary, prefer to move the secondary region.
662       for (int j = 0; j < regionsPerGroup.length; j++) {
663         int regionIndex = regionsPerGroup[j];
664         if (selectedPrimaryIndex == regionIndexToPrimaryIndex[regionIndex]) {
665           // always move the secondary, not the primary
666           if (selectedPrimaryIndex != regionIndex) {
667             return regionIndex;
668           }
669         }
670       }
671       return -1;
672     }
673 
674     @Override
675     Cluster.Action generate(Cluster cluster) {
676       int serverIndex = pickRandomServer(cluster);
677       if (cluster.numServers <= 1 || serverIndex == -1) {
678         return Cluster.NullAction;
679       }
680 
681       int regionIndex = selectCoHostedRegionPerGroup(
682         cluster.primariesOfRegionsPerServer[serverIndex],
683         cluster.regionsPerServer[serverIndex],
684         cluster.regionIndexToPrimaryIndex);
685 
686       // if there are no pairs of region replicas co-hosted, default to random generator
687       if (regionIndex == -1) {
688         // default to randompicker
689         return randomGenerator.generate(cluster);
690       }
691 
692       int toServerIndex = pickOtherRandomServer(cluster, serverIndex);
693       int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f);
694       return getAction (serverIndex, regionIndex, toServerIndex, toRegionIndex);
695     }
696   }
697 
698   /**
699    * Generates candidates which moves the replicas out of the rack for
700    * co-hosted region replicas in the same rack
701    */
702   static class RegionReplicaRackCandidateGenerator extends RegionReplicaCandidateGenerator {
703     @Override
704     Cluster.Action generate(Cluster cluster) {
705       int rackIndex = pickRandomRack(cluster);
706       if (cluster.numRacks <= 1 || rackIndex == -1) {
707         return super.generate(cluster);
708       }
709 
710       int regionIndex = selectCoHostedRegionPerGroup(
711         cluster.primariesOfRegionsPerRack[rackIndex],
712         cluster.regionsPerRack[rackIndex],
713         cluster.regionIndexToPrimaryIndex);
714 
715       // if there are no pairs of region replicas co-hosted, default to random generator
716       if (regionIndex == -1) {
717         // default to randompicker
718         return randomGenerator.generate(cluster);
719       }
720 
721       int serverIndex = cluster.regionIndexToServerIndex[regionIndex];
722       int toRackIndex = pickOtherRandomRack(cluster, rackIndex);
723 
724       int rand = RANDOM.nextInt(cluster.serversPerRack[toRackIndex].length);
725       int toServerIndex = cluster.serversPerRack[toRackIndex][rand];
726       int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f);
727       return getAction (serverIndex, regionIndex, toServerIndex, toRegionIndex);
728     }
729   }
730 
731   /**
732    * Base class of StochasticLoadBalancer's Cost Functions.
733    */
734   abstract static class CostFunction {
735 
736     private float multiplier = 0;
737 
738     protected Cluster cluster;
739 
740     CostFunction(Configuration c) {
741 
742     }
743 
744     float getMultiplier() {
745       return multiplier;
746     }
747 
748     void setMultiplier(float m) {
749       this.multiplier = m;
750     }
751 
752     /** Called once per LB invocation to give the cost function
753      * to initialize it's state, and perform any costly calculation.
754      */
755     void init(Cluster cluster) {
756       this.cluster = cluster;
757     }
758 
759     /** Called once per cluster Action to give the cost function
760      * an opportunity to update it's state. postAction() is always
761      * called at least once before cost() is called with the cluster
762      * that this action is performed on. */
763     void postAction(Action action) {
764       switch (action.type) {
765       case NULL: break;
766       case ASSIGN_REGION:
767         AssignRegionAction ar = (AssignRegionAction) action;
768         regionMoved(ar.region, -1, ar.server);
769         break;
770       case MOVE_REGION:
771         MoveRegionAction mra = (MoveRegionAction) action;
772         regionMoved(mra.region, mra.fromServer, mra.toServer);
773         break;
774       case SWAP_REGIONS:
775         SwapRegionsAction a = (SwapRegionsAction) action;
776         regionMoved(a.fromRegion, a.fromServer, a.toServer);
777         regionMoved(a.toRegion, a.toServer, a.fromServer);
778         break;
779       default:
780         throw new RuntimeException("Uknown action:" + action.type);
781       }
782     }
783 
784     protected void regionMoved(int region, int oldServer, int newServer) {
785     }
786 
787     abstract double cost();
788 
789     /**
790      * Function to compute a scaled cost using {@link DescriptiveStatistics}. It
791      * assumes that this is a zero sum set of costs.  It assumes that the worst case
792      * possible is all of the elements in one region server and the rest having 0.
793      *
794      * @param stats the costs
795      * @return a scaled set of costs.
796      */
797     protected double costFromArray(double[] stats) {
798       double totalCost = 0;
799       double total = getSum(stats);
800 
801       double count = stats.length;
802       double mean = total/count;
803 
804       // Compute max as if all region servers had 0 and one had the sum of all costs.  This must be
805       // a zero sum cost for this to make sense.
806       double max = ((count - 1) * mean) + (total - mean);
807 
808       // It's possible that there aren't enough regions to go around
809       double min;
810       if (count > total) {
811         min = ((count - total) * mean) + ((1 - mean) * total);
812       } else {
813         // Some will have 1 more than everything else.
814         int numHigh = (int) (total - (Math.floor(mean) * count));
815         int numLow = (int) (count - numHigh);
816 
817         min = (numHigh * (Math.ceil(mean) - mean)) + (numLow * (mean - Math.floor(mean)));
818 
819       }
820       min = Math.max(0, min);
821       for (int i=0; i<stats.length; i++) {
822         double n = stats[i];
823         double diff = Math.abs(mean - n);
824         totalCost += diff;
825       }
826 
827       double scaled =  scale(min, max, totalCost);
828       return scaled;
829     }
830 
831     private double getSum(double[] stats) {
832       double total = 0;
833       for(double s:stats) {
834         total += s;
835       }
836       return total;
837     }
838 
839     /**
840      * Scale the value between 0 and 1.
841      *
842      * @param min   Min value
843      * @param max   The Max value
844      * @param value The value to be scaled.
845      * @return The scaled value.
846      */
847     protected double scale(double min, double max, double value) {
848       if (max <= min || value <= min) {
849         return 0;
850       }
851       if ((max - min) == 0) return 0;
852 
853       return Math.max(0d, Math.min(1d, (value - min) / (max - min)));
854     }
855   }
856 
857   /**
858    * Given the starting state of the regions and a potential ending state
859    * compute cost based upon the number of regions that have moved.
860    */
861   static class MoveCostFunction extends CostFunction {
862     private static final String MOVE_COST_KEY = "hbase.master.balancer.stochastic.moveCost";
863     private static final String MAX_MOVES_PERCENT_KEY =
864         "hbase.master.balancer.stochastic.maxMovePercent";
865     private static final float DEFAULT_MOVE_COST = 100;
866     private static final int DEFAULT_MAX_MOVES = 600;
867     private static final float DEFAULT_MAX_MOVE_PERCENT = 0.25f;
868 
869     private final float maxMovesPercent;
870 
871     MoveCostFunction(Configuration conf) {
872       super(conf);
873 
874       // Move cost multiplier should be the same cost or higher than the rest of the costs to ensure
875       // that large benefits are need to overcome the cost of a move.
876       this.setMultiplier(conf.getFloat(MOVE_COST_KEY, DEFAULT_MOVE_COST));
877       // What percent of the number of regions a single run of the balancer can move.
878       maxMovesPercent = conf.getFloat(MAX_MOVES_PERCENT_KEY, DEFAULT_MAX_MOVE_PERCENT);
879     }
880 
881     @Override
882     double cost() {
883       // Try and size the max number of Moves, but always be prepared to move some.
884       int maxMoves = Math.max((int) (cluster.numRegions * maxMovesPercent),
885           DEFAULT_MAX_MOVES);
886 
887       double moveCost = cluster.numMovedRegions;
888 
889       // Don't let this single balance move more than the max moves.
890       // This allows better scaling to accurately represent the actual cost of a move.
891       if (moveCost > maxMoves) {
892         return 1000000;   // return a number much greater than any of the other cost
893       }
894 
895       return scale(0, cluster.numRegions, moveCost);
896     }
897   }
898 
899   /**
900    * Compute the cost of a potential cluster state from skew in number of
901    * regions on a cluster.
902    */
903   static class RegionCountSkewCostFunction extends CostFunction {
904     private static final String REGION_COUNT_SKEW_COST_KEY =
905         "hbase.master.balancer.stochastic.regionCountCost";
906     private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
907 
908     private double[] stats = null;
909 
910     RegionCountSkewCostFunction(Configuration conf) {
911       super(conf);
912       // Load multiplier should be the greatest as it is the most general way to balance data.
913       this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
914     }
915 
916     @Override
917     double cost() {
918       if (stats == null || stats.length != cluster.numServers) {
919         stats = new double[cluster.numServers];
920       }
921 
922       for (int i =0; i < cluster.numServers; i++) {
923         stats[i] = cluster.regionsPerServer[i].length;
924       }
925 
926       return costFromArray(stats);
927     }
928   }
929 
930   /**
931    * Compute the cost of a potential cluster configuration based upon how evenly
932    * distributed tables are.
933    */
934   static class TableSkewCostFunction extends CostFunction {
935 
936     private static final String TABLE_SKEW_COST_KEY =
937         "hbase.master.balancer.stochastic.tableSkewCost";
938     private static final float DEFAULT_TABLE_SKEW_COST = 35;
939 
940     TableSkewCostFunction(Configuration conf) {
941       super(conf);
942       this.setMultiplier(conf.getFloat(TABLE_SKEW_COST_KEY, DEFAULT_TABLE_SKEW_COST));
943     }
944 
945     @Override
946     double cost() {
947       double max = cluster.numRegions;
948       double min = ((double) cluster.numRegions) / cluster.numServers;
949       double value = 0;
950 
951       for (int i = 0; i < cluster.numMaxRegionsPerTable.length; i++) {
952         value += cluster.numMaxRegionsPerTable[i];
953       }
954 
955       return scale(min, max, value);
956     }
957   }
958 
959   /**
960    * Compute a cost of a potential cluster configuration based upon where
961    * {@link org.apache.hadoop.hbase.regionserver.StoreFile}s are located.
962    */
963   static class LocalityCostFunction extends CostFunction {
964 
965     private static final String LOCALITY_COST_KEY = "hbase.master.balancer.stochastic.localityCost";
966     private static final float DEFAULT_LOCALITY_COST = 25;
967 
968     private MasterServices services;
969 
970     LocalityCostFunction(Configuration conf, MasterServices srv) {
971       super(conf);
972       this.setMultiplier(conf.getFloat(LOCALITY_COST_KEY, DEFAULT_LOCALITY_COST));
973       this.services = srv;
974     }
975 
976     void setServices(MasterServices srvc) {
977       this.services = srvc;
978     }
979 
980     @Override
981     double cost() {
982       double max = 0;
983       double cost = 0;
984 
985       // If there's no master so there's no way anything else works.
986       if (this.services == null) {
987         return cost;
988       }
989 
990       for (int i = 0; i < cluster.regionLocations.length; i++) {
991         max += 1;
992         int serverIndex = cluster.regionIndexToServerIndex[i];
993         int[] regionLocations = cluster.regionLocations[i];
994 
995         // If we can't find where the data is getTopBlock returns null.
996         // so count that as being the best possible.
997         if (regionLocations == null) {
998           continue;
999         }
1000 
1001         int index = -1;
1002         for (int j = 0; j < regionLocations.length; j++) {
1003           if (regionLocations[j] >= 0 && regionLocations[j] == serverIndex) {
1004             index = j;
1005             break;
1006           }
1007         }
1008 
1009         if (index < 0) {
1010           cost += 1;
1011         } else {
1012           cost += (double) index / (double) regionLocations.length;
1013         }
1014       }
1015       return scale(0, max, cost);
1016     }
1017   }
1018 
1019   /**
1020    * Base class the allows writing costs functions from rolling average of some
1021    * number from RegionLoad.
1022    */
1023   abstract static class CostFromRegionLoadFunction extends CostFunction {
1024 
1025     private ClusterStatus clusterStatus = null;
1026     private Map<String, Deque<RegionLoad>> loads = null;
1027     private double[] stats = null;
1028     CostFromRegionLoadFunction(Configuration conf) {
1029       super(conf);
1030     }
1031 
1032     void setClusterStatus(ClusterStatus status) {
1033       this.clusterStatus = status;
1034     }
1035 
1036     void setLoads(Map<String, Deque<RegionLoad>> l) {
1037       this.loads = l;
1038     }
1039 
1040     @Override
1041     double cost() {
1042       if (clusterStatus == null || loads == null) {
1043         return 0;
1044       }
1045 
1046       if (stats == null || stats.length != cluster.numServers) {
1047         stats = new double[cluster.numServers];
1048       }
1049 
1050       for (int i =0; i < stats.length; i++) {
1051         //Cost this server has from RegionLoad
1052         long cost = 0;
1053 
1054         // for every region on this server get the rl
1055         for(int regionIndex:cluster.regionsPerServer[i]) {
1056           Collection<RegionLoad> regionLoadList =  cluster.regionLoads[regionIndex];
1057 
1058           // Now if we found a region load get the type of cost that was requested.
1059           if (regionLoadList != null) {
1060             cost += getRegionLoadCost(regionLoadList);
1061           }
1062         }
1063 
1064         // Add the total cost to the stats.
1065         stats[i] = cost;
1066       }
1067 
1068       // Now return the scaled cost from data held in the stats object.
1069       return costFromArray(stats);
1070     }
1071 
1072     protected double getRegionLoadCost(Collection<RegionLoad> regionLoadList) {
1073       double cost = 0;
1074 
1075       for (RegionLoad rl : regionLoadList) {
1076         double toAdd = getCostFromRl(rl);
1077 
1078         if (cost == 0) {
1079           cost = toAdd;
1080         } else {
1081           cost = (.5 * cost) + (.5 * toAdd);
1082         }
1083       }
1084 
1085       return cost;
1086     }
1087 
1088     protected abstract double getCostFromRl(RegionLoad rl);
1089   }
1090 
1091   /**
1092    * Compute the cost of total number of read requests  The more unbalanced the higher the
1093    * computed cost will be.  This uses a rolling average of regionload.
1094    */
1095 
1096   static class ReadRequestCostFunction extends CostFromRegionLoadFunction {
1097 
1098     private static final String READ_REQUEST_COST_KEY =
1099         "hbase.master.balancer.stochastic.readRequestCost";
1100     private static final float DEFAULT_READ_REQUEST_COST = 5;
1101 
1102     ReadRequestCostFunction(Configuration conf) {
1103       super(conf);
1104       this.setMultiplier(conf.getFloat(READ_REQUEST_COST_KEY, DEFAULT_READ_REQUEST_COST));
1105     }
1106 
1107 
1108     @Override
1109     protected double getCostFromRl(RegionLoad rl) {
1110       return rl.getReadRequestsCount();
1111     }
1112   }
1113 
1114   /**
1115    * Compute the cost of total number of write requests.  The more unbalanced the higher the
1116    * computed cost will be.  This uses a rolling average of regionload.
1117    */
1118   static class WriteRequestCostFunction extends CostFromRegionLoadFunction {
1119 
1120     private static final String WRITE_REQUEST_COST_KEY =
1121         "hbase.master.balancer.stochastic.writeRequestCost";
1122     private static final float DEFAULT_WRITE_REQUEST_COST = 5;
1123 
1124     WriteRequestCostFunction(Configuration conf) {
1125       super(conf);
1126       this.setMultiplier(conf.getFloat(WRITE_REQUEST_COST_KEY, DEFAULT_WRITE_REQUEST_COST));
1127     }
1128 
1129     @Override
1130     protected double getCostFromRl(RegionLoad rl) {
1131       return rl.getWriteRequestsCount();
1132     }
1133   }
1134 
1135   /**
1136    * A cost function for region replicas. We give a very high cost to hosting
1137    * replicas of the same region in the same host. We do not prevent the case
1138    * though, since if numReplicas > numRegionServers, we still want to keep the
1139    * replica open.
1140    */
1141   static class RegionReplicaHostCostFunction extends CostFunction {
1142     private static final String REGION_REPLICA_HOST_COST_KEY =
1143         "hbase.master.balancer.stochastic.regionReplicaHostCostKey";
1144     private static final float DEFAULT_REGION_REPLICA_HOST_COST_KEY = 100000;
1145 
1146     long maxCost = 0;
1147     long[] costsPerGroup; // group is either server, host or rack
1148     int[][] primariesOfRegionsPerGroup;
1149 
1150     public RegionReplicaHostCostFunction(Configuration conf) {
1151       super(conf);
1152       this.setMultiplier(conf.getFloat(REGION_REPLICA_HOST_COST_KEY,
1153         DEFAULT_REGION_REPLICA_HOST_COST_KEY));
1154     }
1155 
1156     @Override
1157     void init(Cluster cluster) {
1158       super.init(cluster);
1159       // max cost is the case where every region replica is hosted together regardless of host
1160       maxCost = cluster.numHosts > 1 ? getMaxCost(cluster) : 0;
1161       costsPerGroup = new long[cluster.numHosts];
1162       primariesOfRegionsPerGroup = cluster.multiServersPerHost // either server based or host based
1163           ? cluster.primariesOfRegionsPerHost
1164           : cluster.primariesOfRegionsPerServer;
1165       for (int i = 0 ; i < primariesOfRegionsPerGroup.length; i++) {
1166         costsPerGroup[i] = costPerGroup(primariesOfRegionsPerGroup[i]);
1167       }
1168     }
1169 
1170     long getMaxCost(Cluster cluster) {
1171       if (!cluster.hasRegionReplicas) {
1172         return 0; // short circuit
1173       }
1174       // max cost is the case where every region replica is hosted together regardless of host
1175       int[] primariesOfRegions = new int[cluster.numRegions];
1176       System.arraycopy(cluster.regionIndexToPrimaryIndex, 0, primariesOfRegions, 0,
1177           cluster.regions.length);
1178 
1179       Arrays.sort(primariesOfRegions);
1180 
1181       // compute numReplicas from the sorted array
1182       return costPerGroup(primariesOfRegions);
1183     }
1184 
1185     @Override
1186     double cost() {
1187       if (maxCost <= 0) {
1188         return 0;
1189       }
1190 
1191       long totalCost = 0;
1192       for (int i = 0 ; i < costsPerGroup.length; i++) {
1193         totalCost += costsPerGroup[i];
1194       }
1195       return scale(0, maxCost, totalCost);
1196     }
1197 
1198     /**
1199      * For each primary region, it computes the total number of replicas in the array (numReplicas)
1200      * and returns a sum of numReplicas-1 squared. For example, if the server hosts
1201      * regions a, b, c, d, e, f where a and b are same replicas, and c,d,e are same replicas, it
1202      * returns (2-1) * (2-1) + (3-1) * (3-1) + (1-1) * (1-1).
1203      * @param primariesOfRegions a sorted array of primary regions ids for the regions hosted
1204      * @return a sum of numReplicas-1 squared for each primary region in the group.
1205      */
1206     protected long costPerGroup(int[] primariesOfRegions) {
1207       long cost = 0;
1208       int currentPrimary = -1;
1209       int currentPrimaryIndex = -1;
1210       // primariesOfRegions is a sorted array of primary ids of regions. Replicas of regions
1211       // sharing the same primary will have consecutive numbers in the array.
1212       for (int j = 0 ; j <= primariesOfRegions.length; j++) {
1213         int primary = j < primariesOfRegions.length ? primariesOfRegions[j] : -1;
1214         if (primary != currentPrimary) { // we see a new primary
1215           int numReplicas = j - currentPrimaryIndex;
1216           // square the cost
1217           if (numReplicas > 1) { // means consecutive primaries, indicating co-location
1218             cost += (numReplicas - 1) * (numReplicas - 1);
1219           }
1220           currentPrimary = primary;
1221           currentPrimaryIndex = j;
1222         }
1223       }
1224 
1225       return cost;
1226     }
1227 
1228     @Override
1229     protected void regionMoved(int region, int oldServer, int newServer) {
1230       if (maxCost <= 0) {
1231         return; // no need to compute
1232       }
1233       if (cluster.multiServersPerHost) {
1234         int oldHost = cluster.serverIndexToHostIndex[oldServer];
1235         int newHost = cluster.serverIndexToHostIndex[newServer];
1236         if (newHost != oldHost) {
1237           costsPerGroup[oldHost] = costPerGroup(cluster.primariesOfRegionsPerHost[oldHost]);
1238           costsPerGroup[newHost] = costPerGroup(cluster.primariesOfRegionsPerHost[newHost]);
1239         }
1240       } else {
1241         costsPerGroup[oldServer] = costPerGroup(cluster.primariesOfRegionsPerServer[oldServer]);
1242         costsPerGroup[newServer] = costPerGroup(cluster.primariesOfRegionsPerServer[newServer]);
1243       }
1244     }
1245   }
1246 
1247   /**
1248    * A cost function for region replicas for the rack distribution. We give a relatively high
1249    * cost to hosting replicas of the same region in the same rack. We do not prevent the case
1250    * though.
1251    */
1252   static class RegionReplicaRackCostFunction extends RegionReplicaHostCostFunction {
1253     private static final String REGION_REPLICA_RACK_COST_KEY =
1254         "hbase.master.balancer.stochastic.regionReplicaRackCostKey";
1255     private static final float DEFAULT_REGION_REPLICA_RACK_COST_KEY = 10000;
1256 
1257     public RegionReplicaRackCostFunction(Configuration conf) {
1258       super(conf);
1259       this.setMultiplier(conf.getFloat(REGION_REPLICA_RACK_COST_KEY, DEFAULT_REGION_REPLICA_RACK_COST_KEY));
1260     }
1261 
1262     @Override
1263     void init(Cluster cluster) {
1264       this.cluster = cluster;
1265       if (cluster.numRacks <= 1) {
1266         maxCost = 0;
1267         return; // disabled for 1 rack
1268       }
1269       // max cost is the case where every region replica is hosted together regardless of rack
1270       maxCost = getMaxCost(cluster);
1271       costsPerGroup = new long[cluster.numRacks];
1272       for (int i = 0 ; i < cluster.primariesOfRegionsPerRack.length; i++) {
1273         costsPerGroup[i] = costPerGroup(cluster.primariesOfRegionsPerRack[i]);
1274       }
1275     }
1276 
1277     @Override
1278     protected void regionMoved(int region, int oldServer, int newServer) {
1279       if (maxCost <= 0) {
1280         return; // no need to compute
1281       }
1282       int oldRack = cluster.serverIndexToRackIndex[oldServer];
1283       int newRack = cluster.serverIndexToRackIndex[newServer];
1284       if (newRack != oldRack) {
1285         costsPerGroup[oldRack] = costPerGroup(cluster.primariesOfRegionsPerRack[oldRack]);
1286         costsPerGroup[newRack] = costPerGroup(cluster.primariesOfRegionsPerRack[newRack]);
1287       }
1288     }
1289   }
1290 
1291   /**
1292    * Compute the cost of total memstore size.  The more unbalanced the higher the
1293    * computed cost will be.  This uses a rolling average of regionload.
1294    */
1295   static class MemstoreSizeCostFunction extends CostFromRegionLoadFunction {
1296 
1297     private static final String MEMSTORE_SIZE_COST_KEY =
1298         "hbase.master.balancer.stochastic.memstoreSizeCost";
1299     private static final float DEFAULT_MEMSTORE_SIZE_COST = 5;
1300 
1301     MemstoreSizeCostFunction(Configuration conf) {
1302       super(conf);
1303       this.setMultiplier(conf.getFloat(MEMSTORE_SIZE_COST_KEY, DEFAULT_MEMSTORE_SIZE_COST));
1304     }
1305 
1306     @Override
1307     protected double getCostFromRl(RegionLoad rl) {
1308       return rl.getMemStoreSizeMB();
1309     }
1310   }
1311   /**
1312    * Compute the cost of total open storefiles size.  The more unbalanced the higher the
1313    * computed cost will be.  This uses a rolling average of regionload.
1314    */
1315   static class StoreFileCostFunction extends CostFromRegionLoadFunction {
1316 
1317     private static final String STOREFILE_SIZE_COST_KEY =
1318         "hbase.master.balancer.stochastic.storefileSizeCost";
1319     private static final float DEFAULT_STOREFILE_SIZE_COST = 5;
1320 
1321     StoreFileCostFunction(Configuration conf) {
1322       super(conf);
1323       this.setMultiplier(conf.getFloat(STOREFILE_SIZE_COST_KEY, DEFAULT_STOREFILE_SIZE_COST));
1324     }
1325 
1326     @Override
1327     protected double getCostFromRl(RegionLoad rl) {
1328       return rl.getStorefileSizeMB();
1329     }
1330   }
1331 }