View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import java.util.ArrayDeque;
21  import java.util.Arrays;
22  import java.util.Collection;
23  import java.util.Deque;
24  import java.util.HashMap;
25  import java.util.LinkedList;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.Map.Entry;
29  import java.util.Random;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
34  import org.apache.hadoop.classification.InterfaceAudience;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.hbase.ClusterStatus;
37  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.RegionLoad;
40  import org.apache.hadoop.hbase.ServerLoad;
41  import org.apache.hadoop.hbase.ServerName;
42  import org.apache.hadoop.hbase.master.MasterServices;
43  import org.apache.hadoop.hbase.master.RegionPlan;
44  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action;
45  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
46  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.AssignRegionAction;
47  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.MoveRegionAction;
48  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.SwapRegionsAction;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
51  
52  /**
53   * <p>This is a best effort load balancer. Given a Cost function F(C) => x It will
54   * randomly try and mutate the cluster to Cprime. If F(Cprime) < F(C) then the
55   * new cluster state becomes the plan. It includes costs functions to compute the cost of:</p>
56   * <ul>
57   * <li>Region Load</li>
58   * <li>Table Load</li>
59   * <li>Data Locality</li>
60   * <li>Memstore Sizes</li>
61   * <li>Storefile Sizes</li>
62   * </ul>
63   *
64   *
65   * <p>Every cost function returns a number between 0 and 1 inclusive; where 0 is the lowest cost
66   * best solution, and 1 is the highest possible cost and the worst solution.  The computed costs are
67   * scaled by their respective multipliers:</p>
68   *
69   * <ul>
70   *   <li>hbase.master.balancer.stochastic.regionLoadCost</li>
71   *   <li>hbase.master.balancer.stochastic.moveCost</li>
72   *   <li>hbase.master.balancer.stochastic.tableLoadCost</li>
73   *   <li>hbase.master.balancer.stochastic.localityCost</li>
74   *   <li>hbase.master.balancer.stochastic.memstoreSizeCost</li>
75   *   <li>hbase.master.balancer.stochastic.storefileSizeCost</li>
76   * </ul>
77   *
78   * <p>In addition to the above configurations, the balancer can be tuned by the following
79   * configuration values:</p>
80   * <ul>
81   *   <li>hbase.master.balancer.stochastic.maxMoveRegions which
82   *   controls what the max number of regions that can be moved in a single invocation of this
83   *   balancer.</li>
84   *   <li>hbase.master.balancer.stochastic.stepsPerRegion is the coefficient by which the number of
85   *   regions is multiplied to try and get the number of times the balancer will
86   *   mutate all servers.</li>
87   *   <li>hbase.master.balancer.stochastic.maxSteps which controls the maximum number of times that
88   *   the balancer will try and mutate all the servers. The balancer will use the minimum of this
89   *   value and the above computation.</li>
90   * </ul>
91   *
92   * <p>This balancer is best used with hbase.master.loadbalance.bytable set to false
93   * so that the balancer gets the full picture of all loads on the cluster.</p>
94   */
95  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
96  public class StochasticLoadBalancer extends BaseLoadBalancer {
97  
98    protected static final String STEPS_PER_REGION_KEY =
99        "hbase.master.balancer.stochastic.stepsPerRegion";
100   protected static final String MAX_STEPS_KEY =
101       "hbase.master.balancer.stochastic.maxSteps";
102   protected static final String MAX_RUNNING_TIME_KEY =
103       "hbase.master.balancer.stochastic.maxRunningTime";
104   protected static final String KEEP_REGION_LOADS =
105       "hbase.master.balancer.stochastic.numRegionLoadsToRemember";
106 
107   private static final Random RANDOM = new Random(System.currentTimeMillis());
108   private static final Log LOG = LogFactory.getLog(StochasticLoadBalancer.class);
109 
110   Map<String, Deque<RegionLoad>> loads = new HashMap<String, Deque<RegionLoad>>();
111 
112   // values are defaults
113   private int maxSteps = 1000000;
114   private int stepsPerRegion = 800;
115   private long maxRunningTime = 30 * 1000 * 1; // 30 seconds.
116   private int numRegionLoadsToRemember = 15;
117 
118   private CandidateGenerator[] candidateGenerators;
119   private CostFromRegionLoadFunction[] regionLoadFunctions;
120   private CostFunction[] costFunctions;
121   // Keep locality based picker and cost function to alert them
122   // when new services are offered
123   private LocalityBasedCandidateGenerator localityCandidateGenerator;
124   private LocalityCostFunction localityCost;
125   private RegionReplicaHostCostFunction regionReplicaHostCostFunction;
126   private RegionReplicaRackCostFunction regionReplicaRackCostFunction;
127 
128   @Override
129   public void setConf(Configuration conf) {
130     super.setConf(conf);
131 
132     maxSteps = conf.getInt(MAX_STEPS_KEY, maxSteps);
133 
134     stepsPerRegion = conf.getInt(STEPS_PER_REGION_KEY, stepsPerRegion);
135     maxRunningTime = conf.getLong(MAX_RUNNING_TIME_KEY, maxRunningTime);
136 
137     numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember);
138 
139     localityCandidateGenerator = new LocalityBasedCandidateGenerator(services);
140     localityCost = new LocalityCostFunction(conf, services);
141 
142     candidateGenerators = new CandidateGenerator[] {
143       new RandomCandidateGenerator(),
144       new LoadCandidateGenerator(),
145       localityCandidateGenerator,
146       new RegionReplicaRackCandidateGenerator(),
147     };
148 
149     regionLoadFunctions = new CostFromRegionLoadFunction[] {
150       new ReadRequestCostFunction(conf),
151       new WriteRequestCostFunction(conf),
152       new MemstoreSizeCostFunction(conf),
153       new StoreFileCostFunction(conf)
154     };
155 
156     regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
157     regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
158 
159     costFunctions = new CostFunction[]{
160       new RegionCountSkewCostFunction(conf, activeMasterWeight, backupMasterWeight),
161       new MoveCostFunction(conf),
162       localityCost,
163       new TableSkewCostFunction(conf),
164       regionReplicaHostCostFunction,
165       regionReplicaRackCostFunction,
166       regionLoadFunctions[0],
167       regionLoadFunctions[1],
168       regionLoadFunctions[2],
169       regionLoadFunctions[3],
170     };
171   }
172 
173   @Override
174   protected void setSlop(Configuration conf) {
175     this.slop = conf.getFloat("hbase.regions.slop", 0.001F);
176   }
177 
178   @Override
179   public void setClusterStatus(ClusterStatus st) {
180     super.setClusterStatus(st);
181     updateRegionLoad();
182     for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
183       cost.setClusterStatus(st);
184     }
185   }
186 
187   @Override
188   public void setMasterServices(MasterServices masterServices) {
189     super.setMasterServices(masterServices);
190     this.localityCost.setServices(masterServices);
191     this.localityCandidateGenerator.setServices(masterServices);
192 
193   }
194 
195   @Override
196   protected boolean areSomeRegionReplicasColocated(Cluster c) {
197     regionReplicaHostCostFunction.init(c);
198     if (regionReplicaHostCostFunction.cost() > 0) return true;
199     regionReplicaRackCostFunction.init(c);
200     if (regionReplicaRackCostFunction.cost() > 0) return true;
201     return false;
202   }
203 
204   /**
205    * Given the cluster state this will try and approach an optimal balance. This
206    * should always approach the optimal state given enough steps.
207    */
208   @Override
209   public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
210     List<RegionPlan> plans = balanceMasterRegions(clusterState);
211     if (plans != null) {
212       return plans;
213     }
214     filterExcludedServers(clusterState);
215     //The clusterState that is given to this method contains the state
216     //of all the regions in the table(s) (that's true today)
217     // Keep track of servers to iterate through them.
218     Cluster cluster = new Cluster(masterServerName,
219       clusterState, loads, regionFinder, getBackupMasters(), tablesOnMaster, rackManager);
220     if (!needsBalance(cluster)) {
221       return null;
222     }
223 
224     long startTime = EnvironmentEdgeManager.currentTime();
225 
226     initCosts(cluster);
227 
228     double currentCost = computeCost(cluster, Double.MAX_VALUE);
229 
230     double initCost = currentCost;
231     double newCost = currentCost;
232 
233     long computedMaxSteps = Math.min(this.maxSteps,
234         ((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers));
235     // Perform a stochastic walk to see if we can get a good fit.
236     long step;
237 
238     for (step = 0; step < computedMaxSteps; step++) {
239       int generatorIdx = RANDOM.nextInt(candidateGenerators.length);
240       CandidateGenerator p = candidateGenerators[generatorIdx];
241       Cluster.Action action = p.generate(cluster);
242 
243       if (action.type == Type.NULL) {
244         continue;
245       }
246 
247       cluster.doAction(action);
248       updateCostsWithAction(cluster, action);
249 
250       newCost = computeCost(cluster, currentCost);
251 
252       // Should this be kept?
253       if (newCost < currentCost) {
254         currentCost = newCost;
255       } else {
256         // Put things back the way they were before.
257         // TODO: undo by remembering old values
258         Action undoAction = action.undoAction();
259         cluster.doAction(undoAction);
260         updateCostsWithAction(cluster, undoAction);
261       }
262 
263       if (EnvironmentEdgeManager.currentTime() - startTime >
264           maxRunningTime) {
265         break;
266       }
267     }
268 
269     long endTime = EnvironmentEdgeManager.currentTime();
270 
271     metricsBalancer.balanceCluster(endTime - startTime);
272 
273     if (initCost > currentCost) {
274       plans = createRegionPlans(cluster);
275       if (LOG.isDebugEnabled()) {
276         LOG.debug("Finished computing new load balance plan.  Computation took "
277             + (endTime - startTime) + "ms to try " + step
278             + " different iterations.  Found a solution that moves "
279             + plans.size() + " regions; Going from a computed cost of "
280             + initCost + " to a new cost of " + currentCost);
281       }
282       return plans;
283     }
284     if (LOG.isDebugEnabled()) {
285       LOG.debug("Could not find a better load balance plan.  Tried "
286           + step + " different configurations in " + (endTime - startTime)
287           + "ms, and did not find anything with a computed cost less than " + initCost);
288     }
289     return null;
290   }
291 
292   /**
293    * Create all of the RegionPlan's needed to move from the initial cluster state to the desired
294    * state.
295    *
296    * @param cluster The state of the cluster
297    * @return List of RegionPlan's that represent the moves needed to get to desired final state.
298    */
299   private List<RegionPlan> createRegionPlans(Cluster cluster) {
300     List<RegionPlan> plans = new LinkedList<RegionPlan>();
301     for (int regionIndex = 0;
302          regionIndex < cluster.regionIndexToServerIndex.length; regionIndex++) {
303       int initialServerIndex = cluster.initialRegionIndexToServerIndex[regionIndex];
304       int newServerIndex = cluster.regionIndexToServerIndex[regionIndex];
305 
306       if (initialServerIndex != newServerIndex) {
307         HRegionInfo region = cluster.regions[regionIndex];
308         ServerName initialServer = cluster.servers[initialServerIndex];
309         ServerName newServer = cluster.servers[newServerIndex];
310 
311         if (LOG.isTraceEnabled()) {
312           LOG.trace("Moving Region " + region.getEncodedName() + " from server "
313               + initialServer.getHostname() + " to " + newServer.getHostname());
314         }
315         RegionPlan rp = new RegionPlan(region, initialServer, newServer);
316         plans.add(rp);
317       }
318     }
319     return plans;
320   }
321 
322   /**
323    * Store the current region loads.
324    */
325   private synchronized void updateRegionLoad() {
326     // We create a new hashmap so that regions that are no longer there are removed.
327     // However we temporarily need the old loads so we can use them to keep the rolling average.
328     Map<String, Deque<RegionLoad>> oldLoads = loads;
329     loads = new HashMap<String, Deque<RegionLoad>>();
330 
331     for (ServerName sn : clusterStatus.getServers()) {
332       ServerLoad sl = clusterStatus.getLoad(sn);
333       if (sl == null) {
334         continue;
335       }
336       for (Entry<byte[], RegionLoad> entry : sl.getRegionsLoad().entrySet()) {
337         Deque<RegionLoad> rLoads = oldLoads.get(Bytes.toString(entry.getKey()));
338         if (rLoads == null) {
339           // There was nothing there
340           rLoads = new ArrayDeque<RegionLoad>();
341         } else if (rLoads.size() >= 15) {
342           rLoads.remove();
343         }
344         rLoads.add(entry.getValue());
345         loads.put(Bytes.toString(entry.getKey()), rLoads);
346 
347       }
348     }
349 
350     for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
351       cost.setLoads(loads);
352     }
353   }
354 
355   protected void initCosts(Cluster cluster) {
356     for (CostFunction c:costFunctions) {
357       c.init(cluster);
358     }
359   }
360 
361   protected void updateCostsWithAction(Cluster cluster, Action action) {
362     for (CostFunction c : costFunctions) {
363       c.postAction(action);
364     }
365   }
366 
367   /**
368    * This is the main cost function.  It will compute a cost associated with a proposed cluster
369    * state.  All different costs will be combined with their multipliers to produce a double cost.
370    *
371    * @param cluster The state of the cluster
372    * @param previousCost the previous cost. This is used as an early out.
373    * @return a double of a cost associated with the proposed cluster state.  This cost is an
374    *         aggregate of all individual cost functions.
375    */
376   protected double computeCost(Cluster cluster, double previousCost) {
377     double total = 0;
378 
379     for (CostFunction c:costFunctions) {
380       if (c.getMultiplier() <= 0) {
381         continue;
382       }
383 
384       total += c.getMultiplier() * c.cost();
385 
386       if (total > previousCost) {
387         return total;
388       }
389     }
390     return total;
391   }
392 
393   /** Generates a candidate action to be applied to the cluster for cost function search */
394   abstract static class CandidateGenerator {
395     abstract Cluster.Action generate(Cluster cluster);
396 
397     /**
398      * From a list of regions pick a random one. Null can be returned which
399      * {@link StochasticLoadBalancer#balanceCluster(Map)} recognize as signal to try a region move
400      * rather than swap.
401      *
402      * @param cluster        The state of the cluster
403      * @param server         index of the server
404      * @param chanceOfNoSwap Chance that this will decide to try a move rather
405      *                       than a swap.
406      * @return a random {@link HRegionInfo} or null if an asymmetrical move is
407      *         suggested.
408      */
409     protected int pickRandomRegion(Cluster cluster, int server, double chanceOfNoSwap) {
410       // Check to see if this is just a move.
411       if (cluster.regionsPerServer[server].length == 0 || RANDOM.nextFloat() < chanceOfNoSwap) {
412         // signal a move only.
413         return -1;
414       }
415       int rand = RANDOM.nextInt(cluster.regionsPerServer[server].length);
416       return cluster.regionsPerServer[server][rand];
417 
418     }
419     protected int pickRandomServer(Cluster cluster) {
420       if (cluster.numServers < 1) {
421         return -1;
422       }
423 
424       return RANDOM.nextInt(cluster.numServers);
425     }
426 
427     protected int pickRandomRack(Cluster cluster) {
428       if (cluster.numRacks < 1) {
429         return -1;
430       }
431 
432       return RANDOM.nextInt(cluster.numRacks);
433     }
434 
435     protected int pickOtherRandomServer(Cluster cluster, int serverIndex) {
436       if (cluster.numServers < 2) {
437         return -1;
438       }
439       while (true) {
440         int otherServerIndex = pickRandomServer(cluster);
441         if (otherServerIndex != serverIndex) {
442           return otherServerIndex;
443         }
444       }
445     }
446 
447     protected int pickOtherRandomRack(Cluster cluster, int rackIndex) {
448       if (cluster.numRacks < 2) {
449         return -1;
450       }
451       while (true) {
452         int otherRackIndex = pickRandomRack(cluster);
453         if (otherRackIndex != rackIndex) {
454           return otherRackIndex;
455         }
456       }
457     }
458 
459     protected Cluster.Action pickRandomRegions(Cluster cluster,
460                                                        int thisServer,
461                                                        int otherServer) {
462       if (thisServer < 0 || otherServer < 0) {
463         return Cluster.NullAction;
464       }
465 
466       // Decide who is most likely to need another region
467       int thisRegionCount = cluster.getNumRegions(thisServer);
468       int otherRegionCount = cluster.getNumRegions(otherServer);
469 
470       // Assign the chance based upon the above
471       double thisChance = (thisRegionCount > otherRegionCount) ? 0 : 0.5;
472       double otherChance = (thisRegionCount <= otherRegionCount) ? 0 : 0.5;
473 
474       int thisRegion = pickRandomRegion(cluster, thisServer, thisChance);
475       int otherRegion = pickRandomRegion(cluster, otherServer, otherChance);
476 
477       return getAction(thisServer, thisRegion, otherServer, otherRegion);
478     }
479 
480     protected Cluster.Action getAction (int fromServer, int fromRegion,
481         int toServer, int toRegion) {
482       if (fromServer < 0 || toServer < 0) {
483         return Cluster.NullAction;
484       }
485       if (fromRegion > 0 && toRegion > 0) {
486         return new Cluster.SwapRegionsAction(fromServer, fromRegion,
487           toServer, toRegion);
488       } else if (fromRegion > 0) {
489         return new Cluster.MoveRegionAction(fromRegion, fromServer, toServer);
490       } else if (toRegion > 0) {
491         return new Cluster.MoveRegionAction(toRegion, toServer, fromServer);
492       } else {
493         return Cluster.NullAction;
494       }
495     }
496   }
497 
498   static class RandomCandidateGenerator extends CandidateGenerator {
499 
500     @Override
501     Cluster.Action generate(Cluster cluster) {
502 
503       int thisServer = pickRandomServer(cluster);
504 
505       // Pick the other server
506       int otherServer = pickOtherRandomServer(cluster, thisServer);
507 
508       return pickRandomRegions(cluster, thisServer, otherServer);
509     }
510   }
511 
512   static class LoadCandidateGenerator extends CandidateGenerator {
513 
514     @Override
515     Cluster.Action generate(Cluster cluster) {
516       cluster.sortServersByRegionCount();
517       int thisServer = pickMostLoadedServer(cluster, -1);
518       int otherServer = pickLeastLoadedServer(cluster, thisServer);
519 
520       return pickRandomRegions(cluster, thisServer, otherServer);
521     }
522 
523     private int pickLeastLoadedServer(final Cluster cluster, int thisServer) {
524       Integer[] servers = cluster.serverIndicesSortedByRegionCount;
525 
526       int index = 0;
527       while (servers[index] == null || servers[index] == thisServer) {
528         index++;
529         if (index == servers.length) {
530           return -1;
531         }
532       }
533       return servers[index];
534     }
535 
536     private int pickMostLoadedServer(final Cluster cluster, int thisServer) {
537       Integer[] servers = cluster.serverIndicesSortedByRegionCount;
538 
539       int index = servers.length - 1;
540       while (servers[index] == null || servers[index] == thisServer) {
541         index--;
542         if (index < 0) {
543           return -1;
544         }
545       }
546       return servers[index];
547     }
548   }
549 
550   static class LocalityBasedCandidateGenerator extends CandidateGenerator {
551 
552     private MasterServices masterServices;
553 
554     LocalityBasedCandidateGenerator(MasterServices masterServices) {
555       this.masterServices = masterServices;
556     }
557 
558     @Override
559     Cluster.Action generate(Cluster cluster) {
560       if (this.masterServices == null) {
561         return Cluster.NullAction;
562       }
563       // Pick a random region server
564       int thisServer = pickRandomServer(cluster);
565 
566       // Pick a random region on this server
567       int thisRegion = pickRandomRegion(cluster, thisServer, 0.0f);
568 
569       if (thisRegion == -1) {
570         return Cluster.NullAction;
571       }
572 
573       // Pick the server with the highest locality
574       int otherServer = pickHighestLocalityServer(cluster, thisServer, thisRegion);
575 
576       // pick an region on the other server to potentially swap
577       int otherRegion = this.pickRandomRegion(cluster, otherServer, 0.5f);
578 
579       return getAction(thisServer, thisRegion, otherServer, otherRegion);
580     }
581 
582     private int pickHighestLocalityServer(Cluster cluster, int thisServer, int thisRegion) {
583       int[] regionLocations = cluster.regionLocations[thisRegion];
584 
585       if (regionLocations == null || regionLocations.length <= 1) {
586         return pickOtherRandomServer(cluster, thisServer);
587       }
588 
589       for (int loc : regionLocations) {
590         if (loc >= 0 && loc != thisServer) { // find the first suitable server
591           return loc;
592         }
593       }
594 
595       // no location found
596       return pickOtherRandomServer(cluster, thisServer);
597     }
598 
599     void setServices(MasterServices services) {
600       this.masterServices = services;
601     }
602   }
603 
604   /**
605    * Generates candidates which moves the replicas out of the region server for
606    * co-hosted region replicas
607    */
608   static class RegionReplicaCandidateGenerator extends CandidateGenerator {
609 
610     RandomCandidateGenerator randomGenerator = new RandomCandidateGenerator();
611 
612     /**
613      * Randomly select one regionIndex out of all region replicas co-hosted in the same group
614      * (a group is a server, host or rack)
615      * @param primariesOfRegionsPerGroup either Cluster.primariesOfRegionsPerServer,
616      * primariesOfRegionsPerHost or primariesOfRegionsPerRack
617      * @param regionsPerGroup either Cluster.regionsPerServer, regionsPerHost or regionsPerRack
618      * @param regionIndexToPrimaryIndex Cluster.regionsIndexToPrimaryIndex
619      * @return a regionIndex for the selected primary or -1 if there is no co-locating
620      */
621     int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[] regionsPerGroup
622         , int[] regionIndexToPrimaryIndex) {
623       int currentPrimary = -1;
624       int currentPrimaryIndex = -1;
625       int selectedPrimaryIndex = -1;
626       double currentLargestRandom = -1;
627       // primariesOfRegionsPerGroup is a sorted array. Since it contains the primary region
628       // ids for the regions hosted in server, a consecutive repetition means that replicas
629       // are co-hosted
630       for (int j = 0; j <= primariesOfRegionsPerGroup.length; j++) {
631         int primary = j < primariesOfRegionsPerGroup.length
632             ? primariesOfRegionsPerGroup[j] : -1;
633         if (primary != currentPrimary) { // check for whether we see a new primary
634           int numReplicas = j - currentPrimaryIndex;
635           if (numReplicas > 1) { // means consecutive primaries, indicating co-location
636             // decide to select this primary region id or not
637             double currentRandom = RANDOM.nextDouble();
638             // we don't know how many region replicas are co-hosted, we will randomly select one
639             // using reservoir sampling (http://gregable.com/2007/10/reservoir-sampling.html)
640             if (currentRandom > currentLargestRandom) {
641               selectedPrimaryIndex = currentPrimary;
642               currentLargestRandom = currentRandom;
643             }
644           }
645           currentPrimary = primary;
646           currentPrimaryIndex = j;
647         }
648       }
649 
650       // we have found the primary id for the region to move. Now find the actual regionIndex
651       // with the given primary, prefer to move the secondary region.
652       for (int j = 0; j < regionsPerGroup.length; j++) {
653         int regionIndex = regionsPerGroup[j];
654         if (selectedPrimaryIndex == regionIndexToPrimaryIndex[regionIndex]) {
655           // always move the secondary, not the primary
656           if (selectedPrimaryIndex != regionIndex) {
657             return regionIndex;
658           }
659         }
660       }
661       return -1;
662     }
663 
664     @Override
665     Cluster.Action generate(Cluster cluster) {
666       int serverIndex = pickRandomServer(cluster);
667       if (cluster.numServers <= 1 || serverIndex == -1) {
668         return Cluster.NullAction;
669       }
670 
671       int regionIndex = selectCoHostedRegionPerGroup(
672         cluster.primariesOfRegionsPerServer[serverIndex],
673         cluster.regionsPerServer[serverIndex],
674         cluster.regionIndexToPrimaryIndex);
675 
676       // if there are no pairs of region replicas co-hosted, default to random generator
677       if (regionIndex == -1) {
678         // default to randompicker
679         return randomGenerator.generate(cluster);
680       }
681 
682       int toServerIndex = pickOtherRandomServer(cluster, serverIndex);
683       int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f);
684       return getAction (serverIndex, regionIndex, toServerIndex, toRegionIndex);
685     }
686   }
687 
688   /**
689    * Generates candidates which moves the replicas out of the rack for
690    * co-hosted region replicas in the same rack
691    */
692   static class RegionReplicaRackCandidateGenerator extends RegionReplicaCandidateGenerator {
693     @Override
694     Cluster.Action generate(Cluster cluster) {
695       int rackIndex = pickRandomRack(cluster);
696       if (cluster.numRacks <= 1 || rackIndex == -1) {
697         return super.generate(cluster);
698       }
699 
700       int regionIndex = selectCoHostedRegionPerGroup(
701         cluster.primariesOfRegionsPerRack[rackIndex],
702         cluster.regionsPerRack[rackIndex],
703         cluster.regionIndexToPrimaryIndex);
704 
705       // if there are no pairs of region replicas co-hosted, default to random generator
706       if (regionIndex == -1) {
707         // default to randompicker
708         return randomGenerator.generate(cluster);
709       }
710 
711       int serverIndex = cluster.regionIndexToServerIndex[regionIndex];
712       int toRackIndex = pickOtherRandomRack(cluster, rackIndex);
713 
714       int rand = RANDOM.nextInt(cluster.serversPerRack[toRackIndex].length);
715       int toServerIndex = cluster.serversPerRack[toRackIndex][rand];
716       int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f);
717       return getAction (serverIndex, regionIndex, toServerIndex, toRegionIndex);
718     }
719   }
720 
721   /**
722    * Base class of StochasticLoadBalancer's Cost Functions.
723    */
724   abstract static class CostFunction {
725 
726     private float multiplier = 0;
727 
728     protected Cluster cluster;
729 
730     CostFunction(Configuration c) {
731 
732     }
733 
734     float getMultiplier() {
735       return multiplier;
736     }
737 
738     void setMultiplier(float m) {
739       this.multiplier = m;
740     }
741 
742     /** Called once per LB invocation to give the cost function
743      * to initialize it's state, and perform any costly calculation.
744      */
745     void init(Cluster cluster) {
746       this.cluster = cluster;
747     }
748 
749     /** Called once per cluster Action to give the cost function
750      * an opportunity to update it's state. postAction() is always
751      * called at least once before cost() is called with the cluster
752      * that this action is performed on. */
753     void postAction(Action action) {
754       switch (action.type) {
755       case NULL: break;
756       case ASSIGN_REGION:
757         AssignRegionAction ar = (AssignRegionAction) action;
758         regionMoved(ar.region, -1, ar.server);
759         break;
760       case MOVE_REGION:
761         MoveRegionAction mra = (MoveRegionAction) action;
762         regionMoved(mra.region, mra.fromServer, mra.toServer);
763         break;
764       case SWAP_REGIONS:
765         SwapRegionsAction a = (SwapRegionsAction) action;
766         regionMoved(a.fromRegion, a.fromServer, a.toServer);
767         regionMoved(a.toRegion, a.toServer, a.fromServer);
768         break;
769       default:
770         throw new RuntimeException("Uknown action:" + action.type);
771       }
772     }
773 
774     protected void regionMoved(int region, int oldServer, int newServer) {
775     }
776 
777     abstract double cost();
778 
779     /**
780      * Function to compute a scaled cost using {@link DescriptiveStatistics}. It
781      * assumes that this is a zero sum set of costs.  It assumes that the worst case
782      * possible is all of the elements in one region server and the rest having 0.
783      *
784      * @param stats the costs
785      * @return a scaled set of costs.
786      */
787     protected double costFromArray(double[] stats) {
788       double totalCost = 0;
789       double total = getSum(stats);
790       double mean = total/((double)stats.length);
791       double count = stats.length;
792 
793       // Compute max as if all region servers had 0 and one had the sum of all costs.  This must be
794       // a zero sum cost for this to make sense.
795       // TODO: Should we make this sum of square errors?
796       double max = ((count - 1) * mean) + (total - mean);
797       for (double n : stats) {
798         double diff = Math.abs(mean - n);
799         totalCost += diff;
800       }
801 
802       double scaled =  scale(0, max, totalCost);
803       return scaled;
804     }
805 
806     private double getSum(double[] stats) {
807       double total = 0;
808       for(double s:stats) {
809         total += s;
810       }
811       return total;
812     }
813 
814     /**
815      * Scale the value between 0 and 1.
816      *
817      * @param min   Min value
818      * @param max   The Max value
819      * @param value The value to be scaled.
820      * @return The scaled value.
821      */
822     protected double scale(double min, double max, double value) {
823       if (max <= min || value <= min) {
824         return 0;
825       }
826 
827       return Math.max(0d, Math.min(1d, (value - min) / (max - min)));
828     }
829   }
830 
831   /**
832    * Given the starting state of the regions and a potential ending state
833    * compute cost based upon the number of regions that have moved.
834    */
835   static class MoveCostFunction extends CostFunction {
836     private static final String MOVE_COST_KEY = "hbase.master.balancer.stochastic.moveCost";
837     private static final String MAX_MOVES_PERCENT_KEY =
838         "hbase.master.balancer.stochastic.maxMovePercent";
839     private static final float DEFAULT_MOVE_COST = 100;
840     private static final int DEFAULT_MAX_MOVES = 600;
841     private static final float DEFAULT_MAX_MOVE_PERCENT = 0.25f;
842 
843     private final float maxMovesPercent;
844 
845     MoveCostFunction(Configuration conf) {
846       super(conf);
847 
848       // Move cost multiplier should be the same cost or higher than the rest of the costs to ensure
849       // that large benefits are need to overcome the cost of a move.
850       this.setMultiplier(conf.getFloat(MOVE_COST_KEY, DEFAULT_MOVE_COST));
851       // What percent of the number of regions a single run of the balancer can move.
852       maxMovesPercent = conf.getFloat(MAX_MOVES_PERCENT_KEY, DEFAULT_MAX_MOVE_PERCENT);
853     }
854 
855     @Override
856     double cost() {
857       // Try and size the max number of Moves, but always be prepared to move some.
858       int maxMoves = Math.max((int) (cluster.numRegions * maxMovesPercent),
859           DEFAULT_MAX_MOVES);
860 
861       double moveCost = cluster.numMovedRegions;
862 
863       // Don't let this single balance move more than the max moves,
864       // or move a region that should be on master away from the master.
865       // It is ok to move any master hosted region back to the master.
866       // This allows better scaling to accurately represent the actual cost of a move.
867       if (moveCost > maxMoves || cluster.numMovedMasterHostedRegions > 0) {
868         return 1000000;   // return a number much greater than any of the other cost
869       }
870 
871       return scale(0, cluster.numRegions, moveCost);
872     }
873   }
874 
875   /**
876    * Compute the cost of a potential cluster state from skew in number of
877    * regions on a cluster.
878    */
879   static class RegionCountSkewCostFunction extends CostFunction {
880     private static final String REGION_COUNT_SKEW_COST_KEY =
881         "hbase.master.balancer.stochastic.regionCountCost";
882     private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
883 
884     private double activeMasterWeight;
885     private double backupMasterWeight;
886     private double[] stats = null;
887 
888     RegionCountSkewCostFunction(Configuration conf,
889         double activeMasterWeight, double backupMasterWeight) {
890       super(conf);
891       // Load multiplier should be the greatest as it is the most general way to balance data.
892       this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
893       this.activeMasterWeight = activeMasterWeight;
894       this.backupMasterWeight = backupMasterWeight;
895     }
896 
897     @Override
898     double cost() {
899       if (stats == null || stats.length != cluster.numServers) {
900         stats = new double[cluster.numServers];
901       }
902 
903       for (int i =0; i < cluster.numServers; i++) {
904         stats[i] = cluster.regionsPerServer[i].length;
905         // Use some weight on regions assigned to active/backup masters,
906         // so that they won't carry as many regions as normal regionservers.
907         if (cluster.isActiveMaster(i)) {
908           stats[i] += cluster.numUserRegionsOnMaster * (activeMasterWeight - 1);
909         } else if (cluster.isBackupMaster(i)) {
910           stats[i] *= backupMasterWeight;
911         }
912       }
913       return costFromArray(stats);
914     }
915   }
916 
917   /**
918    * Compute the cost of a potential cluster configuration based upon how evenly
919    * distributed tables are.
920    */
921   static class TableSkewCostFunction extends CostFunction {
922 
923     private static final String TABLE_SKEW_COST_KEY =
924         "hbase.master.balancer.stochastic.tableSkewCost";
925     private static final float DEFAULT_TABLE_SKEW_COST = 35;
926 
927     TableSkewCostFunction(Configuration conf) {
928       super(conf);
929       this.setMultiplier(conf.getFloat(TABLE_SKEW_COST_KEY, DEFAULT_TABLE_SKEW_COST));
930     }
931 
932     @Override
933     double cost() {
934       double max = cluster.numRegions;
935       double min = ((double) cluster.numRegions) / cluster.numServers;
936       double value = 0;
937 
938       for (int i = 0; i < cluster.numMaxRegionsPerTable.length; i++) {
939         value += cluster.numMaxRegionsPerTable[i];
940       }
941 
942       return scale(min, max, value);
943     }
944   }
945 
946   /**
947    * Compute a cost of a potential cluster configuration based upon where
948    * {@link org.apache.hadoop.hbase.regionserver.StoreFile}s are located.
949    */
950   static class LocalityCostFunction extends CostFunction {
951 
952     private static final String LOCALITY_COST_KEY = "hbase.master.balancer.stochastic.localityCost";
953     private static final float DEFAULT_LOCALITY_COST = 25;
954 
955     private MasterServices services;
956 
957     LocalityCostFunction(Configuration conf, MasterServices srv) {
958       super(conf);
959       this.setMultiplier(conf.getFloat(LOCALITY_COST_KEY, DEFAULT_LOCALITY_COST));
960       this.services = srv;
961     }
962 
963     void setServices(MasterServices srvc) {
964       this.services = srvc;
965     }
966 
967     @Override
968     double cost() {
969       double max = 0;
970       double cost = 0;
971 
972       // If there's no master so there's no way anything else works.
973       if (this.services == null) {
974         return cost;
975       }
976 
977       for (int i = 0; i < cluster.regionLocations.length; i++) {
978         max += 1;
979         int serverIndex = cluster.regionIndexToServerIndex[i];
980         int[] regionLocations = cluster.regionLocations[i];
981 
982         // If we can't find where the data is getTopBlock returns null.
983         // so count that as being the best possible.
984         if (regionLocations == null) {
985           continue;
986         }
987 
988         int index = -1;
989         for (int j = 0; j < regionLocations.length; j++) {
990           if (regionLocations[j] >= 0 && regionLocations[j] == serverIndex) {
991             index = j;
992             break;
993           }
994         }
995 
996         if (index < 0) {
997           cost += 1;
998         } else {
999           cost += (double) index / (double) regionLocations.length;
1000         }
1001       }
1002       return scale(0, max, cost);
1003     }
1004   }
1005 
1006   /**
1007    * Base class the allows writing costs functions from rolling average of some
1008    * number from RegionLoad.
1009    */
1010   abstract static class CostFromRegionLoadFunction extends CostFunction {
1011 
1012     private ClusterStatus clusterStatus = null;
1013     private Map<String, Deque<RegionLoad>> loads = null;
1014     private double[] stats = null;
1015     CostFromRegionLoadFunction(Configuration conf) {
1016       super(conf);
1017     }
1018 
1019     void setClusterStatus(ClusterStatus status) {
1020       this.clusterStatus = status;
1021     }
1022 
1023     void setLoads(Map<String, Deque<RegionLoad>> l) {
1024       this.loads = l;
1025     }
1026 
1027     @Override
1028     double cost() {
1029       if (clusterStatus == null || loads == null) {
1030         return 0;
1031       }
1032 
1033       if (stats == null || stats.length != cluster.numServers) {
1034         stats = new double[cluster.numServers];
1035       }
1036 
1037       for (int i =0; i < stats.length; i++) {
1038         //Cost this server has from RegionLoad
1039         long cost = 0;
1040 
1041         // for every region on this server get the rl
1042         for(int regionIndex:cluster.regionsPerServer[i]) {
1043           Collection<RegionLoad> regionLoadList =  cluster.regionLoads[regionIndex];
1044 
1045           // Now if we found a region load get the type of cost that was requested.
1046           if (regionLoadList != null) {
1047             cost += getRegionLoadCost(regionLoadList);
1048           }
1049         }
1050 
1051         // Add the total cost to the stats.
1052         stats[i] = cost;
1053       }
1054 
1055       // Now return the scaled cost from data held in the stats object.
1056       return costFromArray(stats);
1057     }
1058 
1059     protected double getRegionLoadCost(Collection<RegionLoad> regionLoadList) {
1060       double cost = 0;
1061 
1062       for (RegionLoad rl : regionLoadList) {
1063         double toAdd = getCostFromRl(rl);
1064 
1065         if (cost == 0) {
1066           cost = toAdd;
1067         } else {
1068           cost = (.5 * cost) + (.5 * toAdd);
1069         }
1070       }
1071 
1072       return cost;
1073     }
1074 
1075     protected abstract double getCostFromRl(RegionLoad rl);
1076   }
1077 
1078   /**
1079    * Compute the cost of total number of read requests  The more unbalanced the higher the
1080    * computed cost will be.  This uses a rolling average of regionload.
1081    */
1082 
1083   static class ReadRequestCostFunction extends CostFromRegionLoadFunction {
1084 
1085     private static final String READ_REQUEST_COST_KEY =
1086         "hbase.master.balancer.stochastic.readRequestCost";
1087     private static final float DEFAULT_READ_REQUEST_COST = 5;
1088 
1089     ReadRequestCostFunction(Configuration conf) {
1090       super(conf);
1091       this.setMultiplier(conf.getFloat(READ_REQUEST_COST_KEY, DEFAULT_READ_REQUEST_COST));
1092     }
1093 
1094 
1095     @Override
1096     protected double getCostFromRl(RegionLoad rl) {
1097       return rl.getReadRequestsCount();
1098     }
1099   }
1100 
1101   /**
1102    * Compute the cost of total number of write requests.  The more unbalanced the higher the
1103    * computed cost will be.  This uses a rolling average of regionload.
1104    */
1105   static class WriteRequestCostFunction extends CostFromRegionLoadFunction {
1106 
1107     private static final String WRITE_REQUEST_COST_KEY =
1108         "hbase.master.balancer.stochastic.writeRequestCost";
1109     private static final float DEFAULT_WRITE_REQUEST_COST = 5;
1110 
1111     WriteRequestCostFunction(Configuration conf) {
1112       super(conf);
1113       this.setMultiplier(conf.getFloat(WRITE_REQUEST_COST_KEY, DEFAULT_WRITE_REQUEST_COST));
1114     }
1115 
1116     @Override
1117     protected double getCostFromRl(RegionLoad rl) {
1118       return rl.getWriteRequestsCount();
1119     }
1120   }
1121 
1122   /**
1123    * A cost function for region replicas. We give a very high cost to hosting
1124    * replicas of the same region in the same host. We do not prevent the case
1125    * though, since if numReplicas > numRegionServers, we still want to keep the
1126    * replica open.
1127    */
1128   static class RegionReplicaHostCostFunction extends CostFunction {
1129     private static final String REGION_REPLICA_HOST_COST_KEY =
1130         "hbase.master.balancer.stochastic.regionReplicaHostCostKey";
1131     private static final float DEFAULT_REGION_REPLICA_HOST_COST_KEY = 100000;
1132 
1133     long maxCost = 0;
1134     long[] costsPerGroup; // group is either server, host or rack
1135     int[][] primariesOfRegionsPerGroup;
1136 
1137     public RegionReplicaHostCostFunction(Configuration conf) {
1138       super(conf);
1139       this.setMultiplier(conf.getFloat(REGION_REPLICA_HOST_COST_KEY,
1140         DEFAULT_REGION_REPLICA_HOST_COST_KEY));
1141     }
1142 
1143     @Override
1144     void init(Cluster cluster) {
1145       super.init(cluster);
1146       // max cost is the case where every region replica is hosted together regardless of host
1147       maxCost = cluster.numHosts > 1 ? getMaxCost(cluster) : 0;
1148       costsPerGroup = new long[cluster.numHosts];
1149       primariesOfRegionsPerGroup = cluster.multiServersPerHost // either server based or host based
1150           ? cluster.primariesOfRegionsPerHost
1151           : cluster.primariesOfRegionsPerServer;
1152       for (int i = 0 ; i < primariesOfRegionsPerGroup.length; i++) {
1153         costsPerGroup[i] = costPerGroup(primariesOfRegionsPerGroup[i]);
1154       }
1155     }
1156 
1157     long getMaxCost(Cluster cluster) {
1158       if (!cluster.hasRegionReplicas) {
1159         return 0; // short circuit
1160       }
1161       // max cost is the case where every region replica is hosted together regardless of host
1162       int[] primariesOfRegions = new int[cluster.numRegions];
1163       for (int i = 0; i < cluster.regions.length; i++) {
1164         // assume all regions are hosted by only one server
1165         int primaryIndex = cluster.regionIndexToPrimaryIndex[i];
1166         primariesOfRegions[i] = primaryIndex;
1167       }
1168 
1169       Arrays.sort(primariesOfRegions);
1170 
1171       // compute numReplicas from the sorted array
1172       return costPerGroup(primariesOfRegions);
1173     }
1174 
1175     @Override
1176     double cost() {
1177       if (maxCost <= 0) {
1178         return 0;
1179       }
1180 
1181       long totalCost = 0;
1182       for (int i = 0 ; i < costsPerGroup.length; i++) {
1183         totalCost += costsPerGroup[i];
1184       }
1185       return scale(0, maxCost, totalCost);
1186     }
1187 
1188     /**
1189      * For each primary region, it computes the total number of replicas in the array (numReplicas)
1190      * and returns a sum of numReplicas-1 squared. For example, if the server hosts
1191      * regions a, b, c, d, e, f where a and b are same replicas, and c,d,e are same replicas, it
1192      * returns (2-1) * (2-1) + (3-1) * (3-1) + (1-1) * (1-1).
1193      * @param primariesOfRegions a sorted array of primary regions ids for the regions hosted
1194      * @return a sum of numReplicas-1 squared for each primary region in the group.
1195      */
1196     protected long costPerGroup(int[] primariesOfRegions) {
1197       long cost = 0;
1198       int currentPrimary = -1;
1199       int currentPrimaryIndex = -1;
1200       // primariesOfRegions is a sorted array of primary ids of regions. Replicas of regions
1201       // sharing the same primary will have consecutive numbers in the array.
1202       for (int j = 0 ; j <= primariesOfRegions.length; j++) {
1203         int primary = j < primariesOfRegions.length ? primariesOfRegions[j] : -1;
1204         if (primary != currentPrimary) { // we see a new primary
1205           int numReplicas = j - currentPrimaryIndex;
1206           // square the cost
1207           if (numReplicas > 1) { // means consecutive primaries, indicating co-location
1208             cost += (numReplicas - 1) * (numReplicas - 1);
1209           }
1210           currentPrimary = primary;
1211           currentPrimaryIndex = j;
1212         }
1213       }
1214 
1215       return cost;
1216     }
1217 
1218     @Override
1219     protected void regionMoved(int region, int oldServer, int newServer) {
1220       if (maxCost <= 0) {
1221         return; // no need to compute
1222       }
1223       if (cluster.multiServersPerHost) {
1224         int oldHost = cluster.serverIndexToHostIndex[oldServer];
1225         int newHost = cluster.serverIndexToHostIndex[newServer];
1226         if (newHost != oldHost) {
1227           costsPerGroup[oldHost] = costPerGroup(cluster.primariesOfRegionsPerHost[oldHost]);
1228           costsPerGroup[newHost] = costPerGroup(cluster.primariesOfRegionsPerHost[newHost]);
1229         }
1230       } else {
1231         costsPerGroup[oldServer] = costPerGroup(cluster.primariesOfRegionsPerServer[oldServer]);
1232         costsPerGroup[newServer] = costPerGroup(cluster.primariesOfRegionsPerServer[newServer]);
1233       }
1234     }
1235   }
1236 
1237   /**
1238    * A cost function for region replicas for the rack distribution. We give a relatively high
1239    * cost to hosting replicas of the same region in the same rack. We do not prevent the case
1240    * though.
1241    */
1242   static class RegionReplicaRackCostFunction extends RegionReplicaHostCostFunction {
1243     private static final String REGION_REPLICA_RACK_COST_KEY =
1244         "hbase.master.balancer.stochastic.regionReplicaRackCostKey";
1245     private static final float DEFAULT_REGION_REPLICA_RACK_COST_KEY = 10000;
1246 
1247     public RegionReplicaRackCostFunction(Configuration conf) {
1248       super(conf);
1249       this.setMultiplier(conf.getFloat(REGION_REPLICA_RACK_COST_KEY, DEFAULT_REGION_REPLICA_RACK_COST_KEY));
1250     }
1251 
1252     @Override
1253     void init(Cluster cluster) {
1254       this.cluster = cluster;
1255       if (cluster.numRacks <= 1) {
1256         maxCost = 0;
1257         return; // disabled for 1 rack
1258       }
1259       // max cost is the case where every region replica is hosted together regardless of rack
1260       maxCost = getMaxCost(cluster);
1261       costsPerGroup = new long[cluster.numRacks];
1262       for (int i = 0 ; i < cluster.primariesOfRegionsPerRack.length; i++) {
1263         costsPerGroup[i] = costPerGroup(cluster.primariesOfRegionsPerRack[i]);
1264       }
1265     }
1266 
1267     @Override
1268     protected void regionMoved(int region, int oldServer, int newServer) {
1269       if (maxCost <= 0) {
1270         return; // no need to compute
1271       }
1272       int oldRack = cluster.serverIndexToRackIndex[oldServer];
1273       int newRack = cluster.serverIndexToRackIndex[newServer];
1274       if (newRack != oldRack) {
1275         costsPerGroup[oldRack] = costPerGroup(cluster.primariesOfRegionsPerRack[oldRack]);
1276         costsPerGroup[newRack] = costPerGroup(cluster.primariesOfRegionsPerRack[newRack]);
1277       }
1278     }
1279   }
1280 
1281   /**
1282    * Compute the cost of total memstore size.  The more unbalanced the higher the
1283    * computed cost will be.  This uses a rolling average of regionload.
1284    */
1285   static class MemstoreSizeCostFunction extends CostFromRegionLoadFunction {
1286 
1287     private static final String MEMSTORE_SIZE_COST_KEY =
1288         "hbase.master.balancer.stochastic.memstoreSizeCost";
1289     private static final float DEFAULT_MEMSTORE_SIZE_COST = 5;
1290 
1291     MemstoreSizeCostFunction(Configuration conf) {
1292       super(conf);
1293       this.setMultiplier(conf.getFloat(MEMSTORE_SIZE_COST_KEY, DEFAULT_MEMSTORE_SIZE_COST));
1294     }
1295 
1296     @Override
1297     protected double getCostFromRl(RegionLoad rl) {
1298       return rl.getMemStoreSizeMB();
1299     }
1300   }
1301   /**
1302    * Compute the cost of total open storefiles size.  The more unbalanced the higher the
1303    * computed cost will be.  This uses a rolling average of regionload.
1304    */
1305   static class StoreFileCostFunction extends CostFromRegionLoadFunction {
1306 
1307     private static final String STOREFILE_SIZE_COST_KEY =
1308         "hbase.master.balancer.stochastic.storefileSizeCost";
1309     private static final float DEFAULT_STOREFILE_SIZE_COST = 5;
1310 
1311     StoreFileCostFunction(Configuration conf) {
1312       super(conf);
1313       this.setMultiplier(conf.getFloat(STOREFILE_SIZE_COST_KEY, DEFAULT_STOREFILE_SIZE_COST));
1314     }
1315 
1316     @Override
1317     protected double getCostFromRl(RegionLoad rl) {
1318       return rl.getStorefileSizeMB();
1319     }
1320   }
1321 }