001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import java.util.function.Consumer;
021import org.apache.yetus.audience.InterfaceAudience;
022
023/**
024 * A helper class to compute a scaled cost using
025 * {@link org.apache.commons.math3.stat.descriptive.DescriptiveStatistics#DescriptiveStatistics()}.
026 * It assumes that this is a zero sum set of costs. It assumes that the worst case possible is all
027 * of the elements in one region server and the rest having 0.
028 */
029@InterfaceAudience.Private
030final class DoubleArrayCost {
031
032  private double[] costs;
033
034  // computeCost call is expensive so we use this flag to indicate whether we need to recalculate
035  // the cost by calling computeCost
036  private boolean costsChanged;
037
038  private double cost;
039
040  void prepare(int length) {
041    if (costs == null || costs.length != length) {
042      costs = new double[length];
043    }
044  }
045
046  /**
047   * We do not want to introduce a getCosts method to let upper layer get the cost array directly,
048   * so here we introduce this method to take a {@link Consumer} as parameter, where we will pass
049   * the actual cost array in, so you can change the element of the cost array in the
050   * {@link Consumer} implementation.
051   * <p/>
052   * Usually, in prepare method, you need to fill all the elements of the cost array, while in
053   * regionMoved method, you just need to update the element for the effect region servers.
054   */
055  void applyCostsChange(Consumer<double[]> consumer) {
056    consumer.accept(costs);
057    costsChanged = true;
058  }
059
060  double cost() {
061    if (costsChanged) {
062      cost = computeCost(costs);
063      costsChanged = false;
064    }
065    return cost;
066  }
067
068  private static double computeCost(double[] stats) {
069    if (stats == null || stats.length == 0) {
070      return 0;
071    }
072    double totalCost = 0;
073    double total = getSum(stats);
074
075    double count = stats.length;
076    double mean = total / count;
077    for (int i = 0; i < stats.length; i++) {
078      double n = stats[i];
079      double diff = (mean - n) * (mean - n);
080      totalCost += diff;
081    }
082    // No need to compute standard deviation with division by cluster size when scaling.
083    totalCost = Math.sqrt(totalCost);
084    return CostFunction.scale(getMinSkew(total, count), getMaxSkew(total, count), totalCost);
085  }
086
087  private static double getSum(double[] stats) {
088    double total = 0;
089    for (double s : stats) {
090      total += s;
091    }
092    return total;
093  }
094
095  /**
096   * Return the min skew of distribution
097   * @param total is total number of regions
098   */
099  public static double getMinSkew(double total, double numServers) {
100    if (numServers == 0) {
101      return 0;
102    }
103    double mean = total / numServers;
104    // It's possible that there aren't enough regions to go around
105    double min;
106    if (numServers > total) {
107      min = ((numServers - total) * mean * mean + (1 - mean) * (1 - mean) * total);
108    } else {
109      // Some will have 1 more than everything else.
110      int numHigh = (int) (total - (Math.floor(mean) * numServers));
111      int numLow = (int) (numServers - numHigh);
112      min = numHigh * (Math.ceil(mean) - mean) * (Math.ceil(mean) - mean)
113        + numLow * (mean - Math.floor(mean)) * (mean - Math.floor(mean));
114    }
115    return Math.sqrt(min);
116  }
117
118  /**
119   * Return the max deviation of distribution Compute max as if all region servers had 0 and one had
120   * the sum of all costs. This must be a zero sum cost for this to make sense.
121   */
122  public static double getMaxSkew(double total, double numServers) {
123    if (numServers == 0) {
124      return 0;
125    }
126    double mean = total / numServers;
127    return Math.sqrt((total - mean) * (total - mean) + (numServers - 1) * mean * mean);
128  }
129}