@InterfaceAudience.Private public final class BloomFilterUtil extends Object
Modifier and Type | Field and Description |
---|---|
static byte[] |
bitvals
Bit-value lookup array to prevent doing the same work over and over
|
static double |
LOG2_SQUARED
Used in computing the optimal Bloom filter size.
|
private static Random |
randomGeneratorForTest
A random number generator to use for "fake lookups" when testing to
estimate the ideal false positive rate.
|
static String |
STATS_RECORD_SEP
Record separator for the Bloom filter statistics human-readable string
|
Modifier | Constructor and Description |
---|---|
private |
BloomFilterUtil()
Private constructor to keep this class from being instantiated.
|
Modifier and Type | Method and Description |
---|---|
static double |
actualErrorRate(long maxKeys,
long bitSize,
int functionCount)
Computes the actual error rate for the given number of elements, number
of bits, and number of hash functions.
|
(package private) static boolean |
checkBit(int pos,
ByteBuff bloomBuf,
int bloomOffset)
Check if bit at specified index is 1.
|
static long |
computeBitSize(long maxKeys,
double errorRate) |
static int |
computeFoldableByteSize(long bitSize,
int foldFactor)
Increases the given byte size of a Bloom filter until it can be folded by
the given factor.
|
static long |
computeMaxKeys(long bitSize,
double errorRate,
int hashCount)
The maximum number of keys we can put into a Bloom filter of a certain
size to get the given error rate, with the given number of hash functions.
|
static boolean |
contains(byte[] buf,
int offset,
int length,
ByteBuff bloomBuf,
int bloomOffset,
int bloomSize,
Hash hash,
int hashCount) |
private static <T> boolean |
contains(ByteBuff bloomBuf,
int bloomOffset,
int bloomSize,
Hash hash,
int hashCount,
HashKey<T> hashKey) |
static boolean |
contains(Cell cell,
ByteBuff bloomBuf,
int bloomOffset,
int bloomSize,
Hash hash,
int hashCount,
BloomType type) |
static BloomFilterChunk |
createBySize(int byteSizeHint,
double errorRate,
int hashType,
int foldFactor,
BloomType bloomType)
Creates a Bloom filter chunk of the given size.
|
static String |
formatStats(BloomFilterBase bloomFilter)
A human-readable string with statistics for the given Bloom filter.
|
static long |
idealMaxKeys(long bitSize,
double errorRate)
The maximum number of keys we can put into a Bloom filter of a certain
size to maintain the given error rate, assuming the number of hash
functions is chosen optimally and does not even have to be an integer
(hence the "ideal" in the function name).
|
static int |
optimalFunctionCount(int maxKeys,
long bitSize) |
static void |
setFakeLookupMode(boolean enabled) |
static String |
toString(BloomFilterChunk bloomFilter) |
public static final String STATS_RECORD_SEP
public static final double LOG2_SQUARED
private static Random randomGeneratorForTest
public static final byte[] bitvals
private BloomFilterUtil()
public static long computeBitSize(long maxKeys, double errorRate)
maxKeys
- errorRate
- public static void setFakeLookupMode(boolean enabled)
public static long idealMaxKeys(long bitSize, double errorRate)
bitSize
- errorRate
- for a more precise estimate
public static long computeMaxKeys(long bitSize, double errorRate, int hashCount)
bitSize
- errorRate
- hashCount
- public static double actualErrorRate(long maxKeys, long bitSize, int functionCount)
maxKeys
- bitSize
- functionCount
- public static int computeFoldableByteSize(long bitSize, int foldFactor)
bitSize
- foldFactor
- public static int optimalFunctionCount(int maxKeys, long bitSize)
public static BloomFilterChunk createBySize(int byteSizeHint, double errorRate, int hashType, int foldFactor, BloomType bloomType)
byteSizeHint
- the desired number of bytes for the Bloom filter bit
array. Will be increased so that folding is possible.errorRate
- target false positive rate of the Bloom filterhashType
- Bloom filter hash function typefoldFactor
- bloomType
- public static boolean contains(byte[] buf, int offset, int length, ByteBuff bloomBuf, int bloomOffset, int bloomSize, Hash hash, int hashCount)
private static <T> boolean contains(ByteBuff bloomBuf, int bloomOffset, int bloomSize, Hash hash, int hashCount, HashKey<T> hashKey)
public static boolean contains(Cell cell, ByteBuff bloomBuf, int bloomOffset, int bloomSize, Hash hash, int hashCount, BloomType type)
static boolean checkBit(int pos, ByteBuff bloomBuf, int bloomOffset)
pos
- index of bitpublic static String formatStats(BloomFilterBase bloomFilter)
bloomFilter
- the Bloom filter to output statistics for;STATS_RECORD_SEP
.public static String toString(BloomFilterChunk bloomFilter)
Copyright © 2007–2019 The Apache Software Foundation. All rights reserved.