View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.util;
21  
22  import java.io.DataInput;
23  import java.io.IOException;
24  import java.nio.ByteBuffer;
25  
26  import org.apache.hadoop.hbase.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.io.hfile.BlockType;
28  import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
29  import org.apache.hadoop.hbase.io.hfile.HFile;
30  import org.apache.hadoop.hbase.io.hfile.HFileBlock;
31  import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;
32  
33  /**
34   * A Bloom filter implementation built on top of {@link ByteBloomFilter},
35   * encapsulating a set of fixed-size Bloom filters written out at the time of
36   * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
37   * block stream, and loaded on demand at query time. This class only provides
38   * reading capabilities.
39   */
40  @InterfaceAudience.Private
41  public class CompoundBloomFilter extends CompoundBloomFilterBase
42      implements BloomFilter {
43  
44    /** Used to load chunks on demand */
45    private HFile.Reader reader;
46  
47    private HFileBlockIndex.BlockIndexReader index;
48  
49    private int hashCount;
50    private Hash hash;
51  
52    private long[] numQueriesPerChunk;
53    private long[] numPositivesPerChunk;
54  
55    /**
56     * De-serialization for compound Bloom filter metadata. Must be consistent
57     * with what {@link CompoundBloomFilterWriter} does.
58     *
59     * @param meta serialized Bloom filter metadata without any magic blocks
60     * @throws IOException
61     */
62    public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
63        throws IOException {
64      this.reader = reader;
65  
66      totalByteSize = meta.readLong();
67      hashCount = meta.readInt();
68      hashType = meta.readInt();
69      totalKeyCount = meta.readLong();
70      totalMaxKeys = meta.readLong();
71      numChunks = meta.readInt();
72      byte[] comparatorClassName = Bytes.readByteArray(meta);
73      // The writer would have return 0 as the vint length for the case of 
74      // Bytes.BYTES_RAWCOMPARATOR.  In such cases do not initialize comparator, it can be
75      // null
76      if (comparatorClassName.length != 0) {
77        comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName));
78      }
79  
80      hash = Hash.getInstance(hashType);
81      if (hash == null) {
82        throw new IllegalArgumentException("Invalid hash type: " + hashType);
83      }
84  
85      index = new HFileBlockIndex.BlockIndexReader(comparator, 1);
86      index.readRootIndex(meta, numChunks);
87    }
88  
89    @Override
90    public boolean contains(byte[] key, int keyOffset, int keyLength,
91        ByteBuffer bloom) {
92      // We try to store the result in this variable so we can update stats for
93      // testing, but when an error happens, we log a message and return.
94      boolean result;
95  
96      int block = index.rootBlockContainingKey(key, keyOffset,
97          keyLength);
98      if (block < 0) {
99        result = false; // This key is not in the file.
100     } else {
101       HFileBlock bloomBlock;
102       try {
103         // We cache the block and use a positional read.
104         bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
105             index.getRootBlockDataSize(block), true, true, false, true,
106             BlockType.BLOOM_CHUNK, null);
107       } catch (IOException ex) {
108         // The Bloom filter is broken, turn it off.
109         throw new IllegalArgumentException(
110             "Failed to load Bloom block for key "
111                 + Bytes.toStringBinary(key, keyOffset, keyLength), ex);
112       }
113 
114       ByteBuffer bloomBuf = bloomBlock.getBufferReadOnly();
115       result = ByteBloomFilter.contains(key, keyOffset, keyLength,
116           bloomBuf, bloomBlock.headerSize(),
117           bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
118     }
119 
120     if (numQueriesPerChunk != null && block >= 0) {
121       // Update statistics. Only used in unit tests.
122       ++numQueriesPerChunk[block];
123       if (result)
124         ++numPositivesPerChunk[block];
125     }
126 
127     return result;
128   }
129 
130   public boolean supportsAutoLoading() {
131     return true;
132   }
133 
134   public int getNumChunks() {
135     return numChunks;
136   }
137 
138   public void enableTestingStats() {
139     numQueriesPerChunk = new long[numChunks];
140     numPositivesPerChunk = new long[numChunks];
141   }
142 
143   public String formatTestingStats() {
144     StringBuilder sb = new StringBuilder();
145     for (int i = 0; i < numChunks; ++i) {
146       sb.append("chunk #");
147       sb.append(i);
148       sb.append(": queries=");
149       sb.append(numQueriesPerChunk[i]);
150       sb.append(", positives=");
151       sb.append(numPositivesPerChunk[i]);
152       sb.append(", positiveRatio=");
153       sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
154       sb.append(";\n");
155     }
156     return sb.toString();
157   }
158 
159   public long getNumQueriesForTesting(int chunk) {
160     return numQueriesPerChunk[chunk];
161   }
162 
163   public long getNumPositivesForTesting(int chunk) {
164     return numPositivesPerChunk[chunk];
165   }
166 
167   @Override
168   public String toString() {
169     StringBuilder sb = new StringBuilder();
170     sb.append(ByteBloomFilter.formatStats(this));
171     sb.append(ByteBloomFilter.STATS_RECORD_SEP + 
172         "Number of chunks: " + numChunks);
173     sb.append(ByteBloomFilter.STATS_RECORD_SEP + 
174         ((comparator != null) ? "Comparator: "
175         + comparator.getClass().getSimpleName() : "Comparator: "
176         + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName()));
177     return sb.toString();
178   }
179 
180 }