001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.Collections;
023import java.util.Random;
024
025/**
026 * A class that generates random numbers that follow some distribution.
027 * <p>
028 * Copied from <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
029 * Remove after tfile is committed and use the tfile version of this class instead.
030 * </p>
031 */
032public class RandomDistribution {
033  /**
034   * Interface for discrete (integer) random distributions.
035   */
036  public interface DiscreteRNG {
037    /**
038     * Get the next random number
039     * @return the next random number.
040     */
041    int nextInt();
042  }
043
044  /**
045   * P(i)=1/(max-min)
046   */
047  public static final class Flat implements DiscreteRNG {
048    private final Random random;
049    private final int min;
050    private final int max;
051
052    /**
053     * Generate random integers from min (inclusive) to max (exclusive) following even distribution.
054     * The basic random number generator. Minimum integer maximum integer (exclusive).
055     */
056    public Flat(Random random, int min, int max) {
057      if (min >= max) {
058        throw new IllegalArgumentException("Invalid range");
059      }
060      this.random = random;
061      this.min = min;
062      this.max = max;
063    }
064
065    /**
066     * @see DiscreteRNG#nextInt()
067     */
068    @Override
069    public int nextInt() {
070      return random.nextInt(max - min) + min;
071    }
072  }
073
074  /**
075   * Zipf distribution. The ratio of the probabilities of integer i and j is defined as follows:
076   * P(i)/P(j)=((j-min+1)/(i-min+1))^sigma.
077   */
078  public static final class Zipf implements DiscreteRNG {
079    private static final double DEFAULT_EPSILON = 0.001;
080    private final Random random;
081    private final ArrayList<Integer> k;
082    private final ArrayList<Double> v;
083
084    /**
085     * Constructor The random number generator. minimum integer (inclusvie) maximum integer
086     * (exclusive) parameter sigma. (sigma > 1.0)
087     */
088    public Zipf(Random r, int min, int max, double sigma) {
089      this(r, min, max, sigma, DEFAULT_EPSILON);
090    }
091
092    /**
093     * Constructor. The random number generator. minimum integer (inclusvie) maximum integer
094     * (exclusive) parameter sigma. (sigma > 1.0) Allowable error percentage (0 < epsilon < 1.0).
095     */
096    public Zipf(Random r, int min, int max, double sigma, double epsilon) {
097      if ((max <= min) || (sigma <= 1) || (epsilon <= 0) || (epsilon >= 0.5)) {
098        throw new IllegalArgumentException("Invalid arguments");
099      }
100      random = r;
101      k = new ArrayList<>();
102      v = new ArrayList<>();
103
104      double sum = 0;
105      int last = -1;
106      for (int i = min; i < max; ++i) {
107        sum += Math.exp(-sigma * Math.log(i - min + 1));
108        if ((last == -1) || i * (1 - epsilon) > last) {
109          k.add(i);
110          v.add(sum);
111          last = i;
112        }
113      }
114
115      if (last != max - 1) {
116        k.add(max - 1);
117        v.add(sum);
118      }
119
120      v.set(v.size() - 1, 1.0);
121
122      for (int i = v.size() - 2; i >= 0; --i) {
123        v.set(i, v.get(i) / sum);
124      }
125    }
126
127    /**
128     * @see DiscreteRNG#nextInt()
129     */
130    @Override
131    public int nextInt() {
132      double d = random.nextDouble();
133      int idx = Collections.binarySearch(v, d);
134
135      if (idx > 0) {
136        ++idx;
137      } else {
138        idx = -(idx + 1);
139      }
140
141      if (idx >= v.size()) {
142        idx = v.size() - 1;
143      }
144
145      if (idx == 0) {
146        return k.get(0);
147      }
148
149      int ceiling = k.get(idx);
150      int lower = k.get(idx - 1);
151
152      return ceiling - random.nextInt(ceiling - lower);
153    }
154  }
155
156  /**
157   * Binomial distribution. P(k)=select(n, k)*p^k*(1-p)^(n-k) (k = 0, 1, ..., n)
158   * P(k)=select(max-min-1, k-min)*p^(k-min)*(1-p)^(k-min)*(1-p)^(max-k-1)
159   */
160  public static final class Binomial implements DiscreteRNG {
161    private final Random random;
162    private final int min;
163    private final int n;
164    private final double[] v;
165
166    private static double select(int n, int k) {
167      double ret = 1.0;
168      for (int i = k + 1; i <= n; ++i) {
169        ret *= (double) i / (i - k);
170      }
171      return ret;
172    }
173
174    private static double power(double p, int k) {
175      return Math.exp(k * Math.log(p));
176    }
177
178    /**
179     * Generate random integers from min (inclusive) to max (exclusive) following Binomial
180     * distribution. The basic random number generator. Minimum integer maximum integer (exclusive).
181     * parameter.
182     */
183    public Binomial(Random random, int min, int max, double p) {
184      if (min >= max) {
185        throw new IllegalArgumentException("Invalid range");
186      }
187      this.random = random;
188      this.min = min;
189      this.n = max - min - 1;
190      if (n > 0) {
191        v = new double[n + 1];
192        double sum = 0.0;
193        for (int i = 0; i <= n; ++i) {
194          sum += select(n, i) * power(p, i) * power(1 - p, n - i);
195          v[i] = sum;
196        }
197        for (int i = 0; i <= n; ++i) {
198          v[i] /= sum;
199        }
200      } else {
201        v = null;
202      }
203    }
204
205    /**
206     * @see DiscreteRNG#nextInt()
207     */
208    @Override
209    public int nextInt() {
210      if (v == null) {
211        return min;
212      }
213      double d = random.nextDouble();
214      int idx = Arrays.binarySearch(v, d);
215      if (idx > 0) {
216        ++idx;
217      } else {
218        idx = -(idx + 1);
219      }
220
221      if (idx >= v.length) {
222        idx = v.length - 1;
223      }
224      return idx + min;
225    }
226  }
227}