001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.Collections;
023import java.util.Random;
024
025/**
026 * A class that generates random numbers that follow some distribution.
027 * <p>
028 * Copied from <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
029 * Remove after tfile is committed and use the tfile version of this class instead.
030 * </p>
031 */
032public class RandomDistribution {
033  /**
034   * Interface for discrete (integer) random distributions.
035   */
036  public interface DiscreteRNG {
037    /**
038     * Get the next random number
039     * @return the next random number.
040     */
041    int nextInt();
042  }
043
044  /**
045   * P(i)=1/(max-min)
046   */
047  public static final class Flat implements DiscreteRNG {
048    private final Random random;
049    private final int min;
050    private final int max;
051
052    /**
053     * Generate random integers from min (inclusive) to max (exclusive) following even distribution.
054     * n * The basic random number generator. n * Minimum integer n * maximum integer (exclusive).
055     */
056    public Flat(Random random, int min, int max) {
057      if (min >= max) {
058        throw new IllegalArgumentException("Invalid range");
059      }
060      this.random = random;
061      this.min = min;
062      this.max = max;
063    }
064
065    /**
066     * @see DiscreteRNG#nextInt()
067     */
068    @Override
069    public int nextInt() {
070      return random.nextInt(max - min) + min;
071    }
072  }
073
074  /**
075   * Zipf distribution. The ratio of the probabilities of integer i and j is defined as follows:
076   * P(i)/P(j)=((j-min+1)/(i-min+1))^sigma.
077   */
078  public static final class Zipf implements DiscreteRNG {
079    private static final double DEFAULT_EPSILON = 0.001;
080    private final Random random;
081    private final ArrayList<Integer> k;
082    private final ArrayList<Double> v;
083
084    /**
085     * Constructor n * The random number generator. n * minimum integer (inclusvie) n * maximum
086     * integer (exclusive) n * parameter sigma. (sigma > 1.0)
087     */
088    public Zipf(Random r, int min, int max, double sigma) {
089      this(r, min, max, sigma, DEFAULT_EPSILON);
090    }
091
092    /**
093     * Constructor. n * The random number generator. n * minimum integer (inclusvie) n * maximum
094     * integer (exclusive) n * parameter sigma. (sigma > 1.0) n * Allowable error percentage (0 <
095     * epsilon < 1.0).
096     */
097    public Zipf(Random r, int min, int max, double sigma, double epsilon) {
098      if ((max <= min) || (sigma <= 1) || (epsilon <= 0) || (epsilon >= 0.5)) {
099        throw new IllegalArgumentException("Invalid arguments");
100      }
101      random = r;
102      k = new ArrayList<>();
103      v = new ArrayList<>();
104
105      double sum = 0;
106      int last = -1;
107      for (int i = min; i < max; ++i) {
108        sum += Math.exp(-sigma * Math.log(i - min + 1));
109        if ((last == -1) || i * (1 - epsilon) > last) {
110          k.add(i);
111          v.add(sum);
112          last = i;
113        }
114      }
115
116      if (last != max - 1) {
117        k.add(max - 1);
118        v.add(sum);
119      }
120
121      v.set(v.size() - 1, 1.0);
122
123      for (int i = v.size() - 2; i >= 0; --i) {
124        v.set(i, v.get(i) / sum);
125      }
126    }
127
128    /**
129     * @see DiscreteRNG#nextInt()
130     */
131    @Override
132    public int nextInt() {
133      double d = random.nextDouble();
134      int idx = Collections.binarySearch(v, d);
135
136      if (idx > 0) {
137        ++idx;
138      } else {
139        idx = -(idx + 1);
140      }
141
142      if (idx >= v.size()) {
143        idx = v.size() - 1;
144      }
145
146      if (idx == 0) {
147        return k.get(0);
148      }
149
150      int ceiling = k.get(idx);
151      int lower = k.get(idx - 1);
152
153      return ceiling - random.nextInt(ceiling - lower);
154    }
155  }
156
157  /**
158   * Binomial distribution. P(k)=select(n, k)*p^k*(1-p)^(n-k) (k = 0, 1, ..., n)
159   * P(k)=select(max-min-1, k-min)*p^(k-min)*(1-p)^(k-min)*(1-p)^(max-k-1)
160   */
161  public static final class Binomial implements DiscreteRNG {
162    private final Random random;
163    private final int min;
164    private final int n;
165    private final double[] v;
166
167    private static double select(int n, int k) {
168      double ret = 1.0;
169      for (int i = k + 1; i <= n; ++i) {
170        ret *= (double) i / (i - k);
171      }
172      return ret;
173    }
174
175    private static double power(double p, int k) {
176      return Math.exp(k * Math.log(p));
177    }
178
179    /**
180     * Generate random integers from min (inclusive) to max (exclusive) following Binomial
181     * distribution. n * The basic random number generator. n * Minimum integer n * maximum integer
182     * (exclusive). n * parameter.
183     */
184    public Binomial(Random random, int min, int max, double p) {
185      if (min >= max) {
186        throw new IllegalArgumentException("Invalid range");
187      }
188      this.random = random;
189      this.min = min;
190      this.n = max - min - 1;
191      if (n > 0) {
192        v = new double[n + 1];
193        double sum = 0.0;
194        for (int i = 0; i <= n; ++i) {
195          sum += select(n, i) * power(p, i) * power(1 - p, n - i);
196          v[i] = sum;
197        }
198        for (int i = 0; i <= n; ++i) {
199          v[i] /= sum;
200        }
201      } else {
202        v = null;
203      }
204    }
205
206    /**
207     * @see DiscreteRNG#nextInt()
208     */
209    @Override
210    public int nextInt() {
211      if (v == null) {
212        return min;
213      }
214      double d = random.nextDouble();
215      int idx = Arrays.binarySearch(v, d);
216      if (idx > 0) {
217        ++idx;
218      } else {
219        idx = -(idx + 1);
220      }
221
222      if (idx >= v.length) {
223        idx = v.length - 1;
224      }
225      return idx + min;
226    }
227  }
228}