001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import java.util.function.Consumer;
021import org.apache.yetus.audience.InterfaceAudience;
022
023/**
024 * A helper class to compute a scaled cost using
025 * {@link org.apache.commons.math3.stat.descriptive.DescriptiveStatistics#DescriptiveStatistics()}.
026 * It assumes that this is a zero sum set of costs. It assumes that the worst case possible is all
027 * of the elements in one region server and the rest having 0.
028 */
029@InterfaceAudience.Private
030final class DoubleArrayCost {
031
032  private double[] costs;
033
034  // computeCost call is expensive so we use this flag to indicate whether we need to recalculate
035  // the cost by calling computeCost
036  private boolean costsChanged;
037
038  private double cost;
039
040  void prepare(int length) {
041    if (costs == null || costs.length != length) {
042      costs = new double[length];
043    }
044  }
045
046  /**
047   * We do not want to introduce a getCosts method to let upper layer get the cost array directly,
048   * so here we introduce this method to take a {@link Consumer} as parameter, where we will pass
049   * the actual cost array in, so you can change the element of the cost array in the
050   * {@link Consumer} implementation.
051   * <p/>
052   * Usually, in prepare method, you need to fill all the elements of the cost array, while in
053   * regionMoved method, you just need to update the element for the effect region servers.
054   */
055  void applyCostsChange(Consumer<double[]> consumer) {
056    consumer.accept(costs);
057    costsChanged = true;
058  }
059
060  double cost() {
061    if (costsChanged) {
062      cost = computeCost(costs);
063      costsChanged = false;
064    }
065    return cost;
066  }
067
068  private static double computeCost(double[] stats) {
069    if (stats == null || stats.length == 0) {
070      return 0;
071    }
072    double totalCost = 0;
073    double total = getSum(stats);
074
075    double count = stats.length;
076    double mean = total / count;
077
078    for (int i = 0; i < stats.length; i++) {
079      double n = stats[i];
080      double diff = (mean - n) * (mean - n);
081      totalCost += diff;
082    }
083    // No need to compute standard deviation with division by cluster size when scaling.
084    totalCost = Math.sqrt(totalCost);
085    return CostFunction.scale(getMinSkew(total, count), getMaxSkew(total, count), totalCost);
086  }
087
088  private static double getSum(double[] stats) {
089    double total = 0;
090    for (double s : stats) {
091      total += s;
092    }
093    return total;
094  }
095
096  /**
097   * Return the min skew of distribution
098   * @param total is total number of regions
099   */
100  public static double getMinSkew(double total, double numServers) {
101    if (numServers == 0) {
102      return 0;
103    }
104    double mean = total / numServers;
105    // It's possible that there aren't enough regions to go around
106    double min;
107    if (numServers > total) {
108      min = ((numServers - total) * mean * mean + (1 - mean) * (1 - mean) * total);
109    } else {
110      // Some will have 1 more than everything else.
111      int numHigh = (int) (total - (Math.floor(mean) * numServers));
112      int numLow = (int) (numServers - numHigh);
113      min = numHigh * (Math.ceil(mean) - mean) * (Math.ceil(mean) - mean)
114        + numLow * (mean - Math.floor(mean)) * (mean - Math.floor(mean));
115    }
116    return Math.sqrt(min);
117  }
118
119  /**
120   * Return the max deviation of distribution Compute max as if all region servers had 0 and one had
121   * the sum of all costs. This must be a zero sum cost for this to make sense.
122   * @param total is total number of regions
123   */
124  public static double getMaxSkew(double total, double numServers) {
125    if (numServers == 0) {
126      return 0;
127    }
128    double mean = total / numServers;
129    return Math.sqrt((total - mean) * (total - mean) + (numServers - 1) * mean * mean);
130  }
131}