001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with this
004 * work for additional information regarding copyright ownership. The ASF
005 * licenses this file to you under the Apache License, Version 2.0 (the
006 * "License"); you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
014 * License for the specific language governing permissions and limitations under
015 * the License.
016 */
017package org.apache.hadoop.hbase.io.hfile;
018
019import java.util.ArrayList;
020import java.util.Arrays;
021import java.util.Collections;
022import java.util.Random;
023
024/**
025 * A class that generates random numbers that follow some distribution.
026 * <p>
027 * Copied from
028 * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
029 * Remove after tfile is committed and use the tfile version of this class
030 * instead.</p>
031 */
032public class RandomDistribution {
033  /**
034   * Interface for discrete (integer) random distributions.
035   */
036  public interface DiscreteRNG {
037    /**
038     * Get the next random number
039     *
040     * @return the next random number.
041     */
042    int nextInt();
043  }
044
045  /**
046   * P(i)=1/(max-min)
047   */
048  public static final class Flat implements DiscreteRNG {
049    private final Random random;
050    private final int min;
051    private final int max;
052
053    /**
054     * Generate random integers from min (inclusive) to max (exclusive)
055     * following even distribution.
056     *
057     * @param random
058     *          The basic random number generator.
059     * @param min
060     *          Minimum integer
061     * @param max
062     *          maximum integer (exclusive).
063     *
064     */
065    public Flat(Random random, int min, int max) {
066      if (min >= max) {
067        throw new IllegalArgumentException("Invalid range");
068      }
069      this.random = random;
070      this.min = min;
071      this.max = max;
072    }
073
074    /**
075     * @see DiscreteRNG#nextInt()
076     */
077    @Override
078    public int nextInt() {
079      return random.nextInt(max - min) + min;
080    }
081  }
082
083  /**
084   * Zipf distribution. The ratio of the probabilities of integer i and j is
085   * defined as follows:
086   *
087   * P(i)/P(j)=((j-min+1)/(i-min+1))^sigma.
088   */
089  public static final class Zipf implements DiscreteRNG {
090    private static final double DEFAULT_EPSILON = 0.001;
091    private final Random random;
092    private final ArrayList<Integer> k;
093    private final ArrayList<Double> v;
094
095    /**
096     * Constructor
097     *
098     * @param r
099     *          The random number generator.
100     * @param min
101     *          minimum integer (inclusvie)
102     * @param max
103     *          maximum integer (exclusive)
104     * @param sigma
105     *          parameter sigma. (sigma > 1.0)
106     */
107    public Zipf(Random r, int min, int max, double sigma) {
108      this(r, min, max, sigma, DEFAULT_EPSILON);
109    }
110
111    /**
112     * Constructor.
113     *
114     * @param r
115     *          The random number generator.
116     * @param min
117     *          minimum integer (inclusvie)
118     * @param max
119     *          maximum integer (exclusive)
120     * @param sigma
121     *          parameter sigma. (sigma > 1.0)
122     * @param epsilon
123     *          Allowable error percentage (0 < epsilon < 1.0).
124     */
125    public Zipf(Random r, int min, int max, double sigma, double epsilon) {
126      if ((max <= min) || (sigma <= 1) || (epsilon <= 0)
127          || (epsilon >= 0.5)) {
128        throw new IllegalArgumentException("Invalid arguments");
129      }
130      random = r;
131      k = new ArrayList<>();
132      v = new ArrayList<>();
133
134      double sum = 0;
135      int last = -1;
136      for (int i = min; i < max; ++i) {
137        sum += Math.exp(-sigma * Math.log(i - min + 1));
138        if ((last == -1) || i * (1 - epsilon) > last) {
139          k.add(i);
140          v.add(sum);
141          last = i;
142        }
143      }
144
145      if (last != max - 1) {
146        k.add(max - 1);
147        v.add(sum);
148      }
149
150      v.set(v.size() - 1, 1.0);
151
152      for (int i = v.size() - 2; i >= 0; --i) {
153        v.set(i, v.get(i) / sum);
154      }
155    }
156
157    /**
158     * @see DiscreteRNG#nextInt()
159     */
160    @Override
161    public int nextInt() {
162      double d = random.nextDouble();
163      int idx = Collections.binarySearch(v, d);
164
165      if (idx > 0) {
166        ++idx;
167      }
168      else {
169        idx = -(idx + 1);
170      }
171
172      if (idx >= v.size()) {
173        idx = v.size() - 1;
174      }
175
176      if (idx == 0) {
177        return k.get(0);
178      }
179
180      int ceiling = k.get(idx);
181      int lower = k.get(idx - 1);
182
183      return ceiling - random.nextInt(ceiling - lower);
184    }
185  }
186
187  /**
188   * Binomial distribution.
189   *
190   * P(k)=select(n, k)*p^k*(1-p)^(n-k) (k = 0, 1, ..., n)
191   *
192   * P(k)=select(max-min-1, k-min)*p^(k-min)*(1-p)^(k-min)*(1-p)^(max-k-1)
193   */
194  public static final class Binomial implements DiscreteRNG {
195    private final Random random;
196    private final int min;
197    private final int n;
198    private final double[] v;
199
200    private static double select(int n, int k) {
201      double ret = 1.0;
202      for (int i = k + 1; i <= n; ++i) {
203        ret *= (double) i / (i - k);
204      }
205      return ret;
206    }
207
208    private static double power(double p, int k) {
209      return Math.exp(k * Math.log(p));
210    }
211
212    /**
213     * Generate random integers from min (inclusive) to max (exclusive)
214     * following Binomial distribution.
215     *
216     * @param random
217     *          The basic random number generator.
218     * @param min
219     *          Minimum integer
220     * @param max
221     *          maximum integer (exclusive).
222     * @param p
223     *          parameter.
224     *
225     */
226    public Binomial(Random random, int min, int max, double p) {
227      if (min >= max) {
228        throw new IllegalArgumentException("Invalid range");
229      }
230      this.random = random;
231      this.min = min;
232      this.n = max - min - 1;
233      if (n > 0) {
234        v = new double[n + 1];
235        double sum = 0.0;
236        for (int i = 0; i <= n; ++i) {
237          sum += select(n, i) * power(p, i) * power(1 - p, n - i);
238          v[i] = sum;
239        }
240        for (int i = 0; i <= n; ++i) {
241          v[i] /= sum;
242        }
243      }
244      else {
245        v = null;
246      }
247    }
248
249    /**
250     * @see DiscreteRNG#nextInt()
251     */
252    @Override
253    public int nextInt() {
254      if (v == null) {
255        return min;
256      }
257      double d = random.nextDouble();
258      int idx = Arrays.binarySearch(v, d);
259      if (idx > 0) {
260        ++idx;
261      } else {
262        idx = -(idx + 1);
263      }
264
265      if (idx >= v.length) {
266        idx = v.length - 1;
267      }
268      return idx + min;
269    }
270  }
271}