001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.io.ByteArrayOutputStream;
021import java.io.DataOutputStream;
022import java.nio.ByteBuffer;
023import junit.framework.TestCase;
024import org.apache.hadoop.hbase.HBaseClassTestRule;
025import org.apache.hadoop.hbase.nio.MultiByteBuff;
026import org.apache.hadoop.hbase.testclassification.MiscTests;
027import org.apache.hadoop.hbase.testclassification.SmallTests;
028import org.junit.ClassRule;
029import org.junit.experimental.categories.Category;
030
031@Category({MiscTests.class, SmallTests.class})
032public class TestBloomFilterChunk extends TestCase {
033
034  @ClassRule
035  public static final HBaseClassTestRule CLASS_RULE =
036      HBaseClassTestRule.forClass(TestBloomFilterChunk.class);
037
038  public void testBasicBloom() throws Exception {
039    BloomFilterChunk bf1 = new BloomFilterChunk(1000, (float)0.01, Hash.MURMUR_HASH, 0);
040    BloomFilterChunk bf2 = new BloomFilterChunk(1000, (float)0.01, Hash.MURMUR_HASH, 0);
041    bf1.allocBloom();
042    bf2.allocBloom();
043
044    // test 1: verify no fundamental false negatives or positives
045    byte[] key1 = {1,2,3,4,5,6,7,8,9};
046    byte[] key2 = {1,2,3,4,5,6,7,8,7};
047
048    bf1.add(key1, 0, key1.length);
049    bf2.add(key2, 0, key2.length);
050
051    assertTrue(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bf1.bloom), 0,
052        (int) bf1.byteSize, bf1.hash, bf1.hashCount));
053    assertFalse(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bf1.bloom), 0,
054        (int) bf1.byteSize, bf1.hash, bf1.hashCount));
055    assertFalse(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bf2.bloom), 0,
056        (int) bf2.byteSize, bf2.hash, bf2.hashCount));
057    assertTrue(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bf2.bloom), 0,
058        (int) bf2.byteSize, bf2.hash, bf2.hashCount));
059
060    byte [] bkey = {1,2,3,4};
061    byte [] bval = "this is a much larger byte array".getBytes();
062
063    bf1.add(bkey, 0, bkey.length);
064    bf1.add(bval, 1, bval.length-1);
065
066    assertTrue(BloomFilterUtil.contains(bkey, 0, bkey.length, new MultiByteBuff(bf1.bloom), 0,
067        (int) bf1.byteSize, bf1.hash, bf1.hashCount));
068    assertTrue(BloomFilterUtil.contains(bval, 1, bval.length - 1, new MultiByteBuff(bf1.bloom),
069        0, (int) bf1.byteSize, bf1.hash, bf1.hashCount));
070    assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bf1.bloom), 0,
071        (int) bf1.byteSize, bf1.hash, bf1.hashCount));
072
073    // test 2: serialization & deserialization.
074    // (convert bloom to byte array & read byte array back in as input)
075    ByteArrayOutputStream bOut = new ByteArrayOutputStream();
076    bf1.writeBloom(new DataOutputStream(bOut));
077    ByteBuffer bb = ByteBuffer.wrap(bOut.toByteArray());
078    BloomFilterChunk newBf1 = new BloomFilterChunk(1000, (float)0.01,
079        Hash.MURMUR_HASH, 0);
080    assertTrue(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bb), 0,
081        (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
082    assertFalse(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bb), 0,
083        (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
084    assertTrue(BloomFilterUtil.contains(bkey, 0, bkey.length, new MultiByteBuff(bb), 0,
085        (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
086    assertTrue(BloomFilterUtil.contains(bval, 1, bval.length - 1, new MultiByteBuff(bb), 0,
087        (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
088    assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bb), 0,
089        (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
090    assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bb), 0,
091        (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
092
093    System.out.println("Serialized as " + bOut.size() + " bytes");
094    assertTrue(bOut.size() - bf1.byteSize < 10); //... allow small padding
095  }
096
097  public void testBloomFold() throws Exception {
098    // test: foldFactor < log(max/actual)
099    BloomFilterChunk b = new BloomFilterChunk(1003, (float) 0.01,
100        Hash.MURMUR_HASH, 2);
101    b.allocBloom();
102    long origSize = b.getByteSize();
103    assertEquals(1204, origSize);
104    for (int i = 0; i < 12; ++i) {
105      byte[] ib = Bytes.toBytes(i);
106      b.add(ib, 0, ib.length);
107    }
108    b.compactBloom();
109    assertEquals(origSize>>2, b.getByteSize());
110    int falsePositives = 0;
111    for (int i = 0; i < 25; ++i) {
112      byte[] bytes = Bytes.toBytes(i);
113      if (BloomFilterUtil.contains(bytes, 0, bytes.length, new MultiByteBuff(b.bloom), 0,
114          (int) b.byteSize, b.hash, b.hashCount)) {
115        if (i >= 12)
116          falsePositives++;
117      } else {
118        assertFalse(i < 12);
119      }
120    }
121    assertTrue(falsePositives <= 1);
122
123    // test: foldFactor > log(max/actual)
124  }
125
126  public void testBloomPerf() throws Exception {
127    // add
128    float err = (float)0.01;
129    BloomFilterChunk b = new BloomFilterChunk(10*1000*1000, (float)err, Hash.MURMUR_HASH, 3);
130    b.allocBloom();
131    long startTime =  System.currentTimeMillis();
132    long origSize = b.getByteSize();
133    for (int i = 0; i < 1*1000*1000; ++i) {
134      byte[] ib = Bytes.toBytes(i);
135      b.add(ib, 0, ib.length);
136    }
137    long endTime = System.currentTimeMillis();
138    System.out.println("Total Add time = " + (endTime - startTime) + "ms");
139
140    // fold
141    startTime = System.currentTimeMillis();
142    b.compactBloom();
143    endTime = System.currentTimeMillis();
144    System.out.println("Total Fold time = " + (endTime - startTime) + "ms");
145    assertTrue(origSize >= b.getByteSize()<<3);
146
147    // test
148    startTime = System.currentTimeMillis();
149    int falsePositives = 0;
150    for (int i = 0; i < 2*1000*1000; ++i) {
151
152      byte[] bytes = Bytes.toBytes(i);
153      if (BloomFilterUtil.contains(bytes, 0, bytes.length, new MultiByteBuff(b.bloom), 0,
154          (int) b.byteSize, b.hash, b.hashCount)) {
155        if (i >= 1 * 1000 * 1000)
156          falsePositives++;
157      } else {
158        assertFalse(i < 1*1000*1000);
159      }
160    }
161    endTime = System.currentTimeMillis();
162    System.out.println("Total Contains time = " + (endTime - startTime) + "ms");
163    System.out.println("False Positive = " + falsePositives);
164    assertTrue(falsePositives <= (1*1000*1000)*err);
165
166    // test: foldFactor > log(max/actual)
167  }
168
169  public void testSizing() {
170    int bitSize = 8 * 128 * 1024; // 128 KB
171    double errorRate = 0.025; // target false positive rate
172
173    // How many keys can we store in a Bloom filter of this size maintaining
174    // the given false positive rate, not taking into account that the n
175    long maxKeys = BloomFilterUtil.idealMaxKeys(bitSize, errorRate);
176    assertEquals(136570, maxKeys);
177
178    // A reverse operation: how many bits would we need to store this many keys
179    // and keep the same low false positive rate?
180    long bitSize2 = BloomFilterUtil.computeBitSize(maxKeys, errorRate);
181
182    // The bit size comes out a little different due to rounding.
183    assertTrue(Math.abs(bitSize2 - bitSize) * 1.0 / bitSize < 1e-5);
184  }
185
186  public void testFoldableByteSize() {
187    assertEquals(128, BloomFilterUtil.computeFoldableByteSize(1000, 5));
188    assertEquals(640, BloomFilterUtil.computeFoldableByteSize(5001, 4));
189  }
190
191
192}
193