001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with this 004 * work for additional information regarding copyright ownership. The ASF 005 * licenses this file to you under the Apache License, Version 2.0 (the 006 * "License"); you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 014 * License for the specific language governing permissions and limitations under 015 * the License. 016 */ 017package org.apache.hadoop.hbase.io.hfile; 018 019import java.util.ArrayList; 020import java.util.Arrays; 021import java.util.Collections; 022import java.util.Random; 023 024/** 025 * A class that generates random numbers that follow some distribution. 026 * <p> 027 * Copied from 028 * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>. 029 * Remove after tfile is committed and use the tfile version of this class 030 * instead.</p> 031 */ 032public class RandomDistribution { 033 /** 034 * Interface for discrete (integer) random distributions. 035 */ 036 public interface DiscreteRNG { 037 /** 038 * Get the next random number 039 * 040 * @return the next random number. 041 */ 042 int nextInt(); 043 } 044 045 /** 046 * P(i)=1/(max-min) 047 */ 048 public static final class Flat implements DiscreteRNG { 049 private final Random random; 050 private final int min; 051 private final int max; 052 053 /** 054 * Generate random integers from min (inclusive) to max (exclusive) 055 * following even distribution. 056 * 057 * @param random 058 * The basic random number generator. 059 * @param min 060 * Minimum integer 061 * @param max 062 * maximum integer (exclusive). 063 * 064 */ 065 public Flat(Random random, int min, int max) { 066 if (min >= max) { 067 throw new IllegalArgumentException("Invalid range"); 068 } 069 this.random = random; 070 this.min = min; 071 this.max = max; 072 } 073 074 /** 075 * @see DiscreteRNG#nextInt() 076 */ 077 @Override 078 public int nextInt() { 079 return random.nextInt(max - min) + min; 080 } 081 } 082 083 /** 084 * Zipf distribution. The ratio of the probabilities of integer i and j is 085 * defined as follows: 086 * 087 * P(i)/P(j)=((j-min+1)/(i-min+1))^sigma. 088 */ 089 public static final class Zipf implements DiscreteRNG { 090 private static final double DEFAULT_EPSILON = 0.001; 091 private final Random random; 092 private final ArrayList<Integer> k; 093 private final ArrayList<Double> v; 094 095 /** 096 * Constructor 097 * 098 * @param r 099 * The random number generator. 100 * @param min 101 * minimum integer (inclusvie) 102 * @param max 103 * maximum integer (exclusive) 104 * @param sigma 105 * parameter sigma. (sigma > 1.0) 106 */ 107 public Zipf(Random r, int min, int max, double sigma) { 108 this(r, min, max, sigma, DEFAULT_EPSILON); 109 } 110 111 /** 112 * Constructor. 113 * 114 * @param r 115 * The random number generator. 116 * @param min 117 * minimum integer (inclusvie) 118 * @param max 119 * maximum integer (exclusive) 120 * @param sigma 121 * parameter sigma. (sigma > 1.0) 122 * @param epsilon 123 * Allowable error percentage (0 < epsilon < 1.0). 124 */ 125 public Zipf(Random r, int min, int max, double sigma, double epsilon) { 126 if ((max <= min) || (sigma <= 1) || (epsilon <= 0) 127 || (epsilon >= 0.5)) { 128 throw new IllegalArgumentException("Invalid arguments"); 129 } 130 random = r; 131 k = new ArrayList<>(); 132 v = new ArrayList<>(); 133 134 double sum = 0; 135 int last = -1; 136 for (int i = min; i < max; ++i) { 137 sum += Math.exp(-sigma * Math.log(i - min + 1)); 138 if ((last == -1) || i * (1 - epsilon) > last) { 139 k.add(i); 140 v.add(sum); 141 last = i; 142 } 143 } 144 145 if (last != max - 1) { 146 k.add(max - 1); 147 v.add(sum); 148 } 149 150 v.set(v.size() - 1, 1.0); 151 152 for (int i = v.size() - 2; i >= 0; --i) { 153 v.set(i, v.get(i) / sum); 154 } 155 } 156 157 /** 158 * @see DiscreteRNG#nextInt() 159 */ 160 @Override 161 public int nextInt() { 162 double d = random.nextDouble(); 163 int idx = Collections.binarySearch(v, d); 164 165 if (idx > 0) { 166 ++idx; 167 } 168 else { 169 idx = -(idx + 1); 170 } 171 172 if (idx >= v.size()) { 173 idx = v.size() - 1; 174 } 175 176 if (idx == 0) { 177 return k.get(0); 178 } 179 180 int ceiling = k.get(idx); 181 int lower = k.get(idx - 1); 182 183 return ceiling - random.nextInt(ceiling - lower); 184 } 185 } 186 187 /** 188 * Binomial distribution. 189 * 190 * P(k)=select(n, k)*p^k*(1-p)^(n-k) (k = 0, 1, ..., n) 191 * 192 * P(k)=select(max-min-1, k-min)*p^(k-min)*(1-p)^(k-min)*(1-p)^(max-k-1) 193 */ 194 public static final class Binomial implements DiscreteRNG { 195 private final Random random; 196 private final int min; 197 private final int n; 198 private final double[] v; 199 200 private static double select(int n, int k) { 201 double ret = 1.0; 202 for (int i = k + 1; i <= n; ++i) { 203 ret *= (double) i / (i - k); 204 } 205 return ret; 206 } 207 208 private static double power(double p, int k) { 209 return Math.exp(k * Math.log(p)); 210 } 211 212 /** 213 * Generate random integers from min (inclusive) to max (exclusive) 214 * following Binomial distribution. 215 * 216 * @param random 217 * The basic random number generator. 218 * @param min 219 * Minimum integer 220 * @param max 221 * maximum integer (exclusive). 222 * @param p 223 * parameter. 224 * 225 */ 226 public Binomial(Random random, int min, int max, double p) { 227 if (min >= max) { 228 throw new IllegalArgumentException("Invalid range"); 229 } 230 this.random = random; 231 this.min = min; 232 this.n = max - min - 1; 233 if (n > 0) { 234 v = new double[n + 1]; 235 double sum = 0.0; 236 for (int i = 0; i <= n; ++i) { 237 sum += select(n, i) * power(p, i) * power(1 - p, n - i); 238 v[i] = sum; 239 } 240 for (int i = 0; i <= n; ++i) { 241 v[i] /= sum; 242 } 243 } 244 else { 245 v = null; 246 } 247 } 248 249 /** 250 * @see DiscreteRNG#nextInt() 251 */ 252 @Override 253 public int nextInt() { 254 if (v == null) { 255 return min; 256 } 257 double d = random.nextDouble(); 258 int idx = Arrays.binarySearch(v, d); 259 if (idx > 0) { 260 ++idx; 261 } else { 262 idx = -(idx + 1); 263 } 264 265 if (idx >= v.length) { 266 idx = v.length - 1; 267 } 268 return idx + min; 269 } 270 } 271}