001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import java.util.ArrayList; 021import java.util.Arrays; 022import java.util.Collections; 023import java.util.Random; 024 025/** 026 * A class that generates random numbers that follow some distribution. 027 * <p> 028 * Copied from <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>. 029 * Remove after tfile is committed and use the tfile version of this class instead. 030 * </p> 031 */ 032public class RandomDistribution { 033 /** 034 * Interface for discrete (integer) random distributions. 035 */ 036 public interface DiscreteRNG { 037 /** 038 * Get the next random number 039 * @return the next random number. 040 */ 041 int nextInt(); 042 } 043 044 /** 045 * P(i)=1/(max-min) 046 */ 047 public static final class Flat implements DiscreteRNG { 048 private final Random random; 049 private final int min; 050 private final int max; 051 052 /** 053 * Generate random integers from min (inclusive) to max (exclusive) following even distribution. 054 * n * The basic random number generator. n * Minimum integer n * maximum integer (exclusive). 055 */ 056 public Flat(Random random, int min, int max) { 057 if (min >= max) { 058 throw new IllegalArgumentException("Invalid range"); 059 } 060 this.random = random; 061 this.min = min; 062 this.max = max; 063 } 064 065 /** 066 * @see DiscreteRNG#nextInt() 067 */ 068 @Override 069 public int nextInt() { 070 return random.nextInt(max - min) + min; 071 } 072 } 073 074 /** 075 * Zipf distribution. The ratio of the probabilities of integer i and j is defined as follows: 076 * P(i)/P(j)=((j-min+1)/(i-min+1))^sigma. 077 */ 078 public static final class Zipf implements DiscreteRNG { 079 private static final double DEFAULT_EPSILON = 0.001; 080 private final Random random; 081 private final ArrayList<Integer> k; 082 private final ArrayList<Double> v; 083 084 /** 085 * Constructor n * The random number generator. n * minimum integer (inclusvie) n * maximum 086 * integer (exclusive) n * parameter sigma. (sigma > 1.0) 087 */ 088 public Zipf(Random r, int min, int max, double sigma) { 089 this(r, min, max, sigma, DEFAULT_EPSILON); 090 } 091 092 /** 093 * Constructor. n * The random number generator. n * minimum integer (inclusvie) n * maximum 094 * integer (exclusive) n * parameter sigma. (sigma > 1.0) n * Allowable error percentage (0 < 095 * epsilon < 1.0). 096 */ 097 public Zipf(Random r, int min, int max, double sigma, double epsilon) { 098 if ((max <= min) || (sigma <= 1) || (epsilon <= 0) || (epsilon >= 0.5)) { 099 throw new IllegalArgumentException("Invalid arguments"); 100 } 101 random = r; 102 k = new ArrayList<>(); 103 v = new ArrayList<>(); 104 105 double sum = 0; 106 int last = -1; 107 for (int i = min; i < max; ++i) { 108 sum += Math.exp(-sigma * Math.log(i - min + 1)); 109 if ((last == -1) || i * (1 - epsilon) > last) { 110 k.add(i); 111 v.add(sum); 112 last = i; 113 } 114 } 115 116 if (last != max - 1) { 117 k.add(max - 1); 118 v.add(sum); 119 } 120 121 v.set(v.size() - 1, 1.0); 122 123 for (int i = v.size() - 2; i >= 0; --i) { 124 v.set(i, v.get(i) / sum); 125 } 126 } 127 128 /** 129 * @see DiscreteRNG#nextInt() 130 */ 131 @Override 132 public int nextInt() { 133 double d = random.nextDouble(); 134 int idx = Collections.binarySearch(v, d); 135 136 if (idx > 0) { 137 ++idx; 138 } else { 139 idx = -(idx + 1); 140 } 141 142 if (idx >= v.size()) { 143 idx = v.size() - 1; 144 } 145 146 if (idx == 0) { 147 return k.get(0); 148 } 149 150 int ceiling = k.get(idx); 151 int lower = k.get(idx - 1); 152 153 return ceiling - random.nextInt(ceiling - lower); 154 } 155 } 156 157 /** 158 * Binomial distribution. P(k)=select(n, k)*p^k*(1-p)^(n-k) (k = 0, 1, ..., n) 159 * P(k)=select(max-min-1, k-min)*p^(k-min)*(1-p)^(k-min)*(1-p)^(max-k-1) 160 */ 161 public static final class Binomial implements DiscreteRNG { 162 private final Random random; 163 private final int min; 164 private final int n; 165 private final double[] v; 166 167 private static double select(int n, int k) { 168 double ret = 1.0; 169 for (int i = k + 1; i <= n; ++i) { 170 ret *= (double) i / (i - k); 171 } 172 return ret; 173 } 174 175 private static double power(double p, int k) { 176 return Math.exp(k * Math.log(p)); 177 } 178 179 /** 180 * Generate random integers from min (inclusive) to max (exclusive) following Binomial 181 * distribution. n * The basic random number generator. n * Minimum integer n * maximum integer 182 * (exclusive). n * parameter. 183 */ 184 public Binomial(Random random, int min, int max, double p) { 185 if (min >= max) { 186 throw new IllegalArgumentException("Invalid range"); 187 } 188 this.random = random; 189 this.min = min; 190 this.n = max - min - 1; 191 if (n > 0) { 192 v = new double[n + 1]; 193 double sum = 0.0; 194 for (int i = 0; i <= n; ++i) { 195 sum += select(n, i) * power(p, i) * power(1 - p, n - i); 196 v[i] = sum; 197 } 198 for (int i = 0; i <= n; ++i) { 199 v[i] /= sum; 200 } 201 } else { 202 v = null; 203 } 204 } 205 206 /** 207 * @see DiscreteRNG#nextInt() 208 */ 209 @Override 210 public int nextInt() { 211 if (v == null) { 212 return min; 213 } 214 double d = random.nextDouble(); 215 int idx = Arrays.binarySearch(v, d); 216 if (idx > 0) { 217 ++idx; 218 } else { 219 idx = -(idx + 1); 220 } 221 222 if (idx >= v.length) { 223 idx = v.length - 1; 224 } 225 return idx + min; 226 } 227 } 228}