View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.util;
21  
22  import java.io.DataInput;
23  import java.io.IOException;
24  import java.nio.ByteBuffer;
25  
26  import org.apache.hadoop.hbase.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.KeyValue;
28  import org.apache.hadoop.hbase.KeyValue.KVComparator;
29  import org.apache.hadoop.hbase.io.hfile.BlockType;
30  import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
31  import org.apache.hadoop.hbase.io.hfile.HFile;
32  import org.apache.hadoop.hbase.io.hfile.HFileBlock;
33  import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;
34  
35  /**
36   * A Bloom filter implementation built on top of {@link ByteBloomFilter},
37   * encapsulating a set of fixed-size Bloom filters written out at the time of
38   * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
39   * block stream, and loaded on demand at query time. This class only provides
40   * reading capabilities.
41   */
42  @InterfaceAudience.Private
43  public class CompoundBloomFilter extends CompoundBloomFilterBase
44      implements BloomFilter {
45  
46    /** Used to load chunks on demand */
47    private HFile.Reader reader;
48  
49    private HFileBlockIndex.BlockIndexReader index;
50  
51    private int hashCount;
52    private Hash hash;
53  
54    private long[] numQueriesPerChunk;
55    private long[] numPositivesPerChunk;
56  
57    /**
58     * De-serialization for compound Bloom filter metadata. Must be consistent
59     * with what {@link CompoundBloomFilterWriter} does.
60     *
61     * @param meta serialized Bloom filter metadata without any magic blocks
62     * @throws IOException
63     */
64    public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
65        throws IOException {
66      this.reader = reader;
67  
68      totalByteSize = meta.readLong();
69      hashCount = meta.readInt();
70      hashType = meta.readInt();
71      totalKeyCount = meta.readLong();
72      totalMaxKeys = meta.readLong();
73      numChunks = meta.readInt();
74      byte[] comparatorClassName = Bytes.readByteArray(meta);
75      if (comparatorClassName.length != 0) {
76        comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName));
77      } else {
78        // Fallback. In 2.0 we will not write the RAW_COMPARATOR name. So when reading back such meta
79        // data. Refer to HBASE-16189
80        // we set the comparator to RAW_COMPARATOR
81        comparator = KeyValue.RAW_COMPARATOR;
82      }
83  
84      hash = Hash.getInstance(hashType);
85      if (hash == null) {
86        throw new IllegalArgumentException("Invalid hash type: " + hashType);
87      }
88  
89      index = new HFileBlockIndex.BlockIndexReader(comparator, 1);
90      index.readRootIndex(meta, numChunks);
91    }
92  
93    @Override
94    public boolean contains(byte[] key, int keyOffset, int keyLength,
95        ByteBuffer bloom) {
96      // We try to store the result in this variable so we can update stats for
97      // testing, but when an error happens, we log a message and return.
98      boolean result;
99  
100     int block = index.rootBlockContainingKey(key, keyOffset,
101         keyLength);
102     if (block < 0) {
103       result = false; // This key is not in the file.
104     } else {
105       HFileBlock bloomBlock;
106       try {
107         // We cache the block and use a positional read.
108         bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
109             index.getRootBlockDataSize(block), true, true, false, true,
110             BlockType.BLOOM_CHUNK, null);
111       } catch (IOException ex) {
112         // The Bloom filter is broken, turn it off.
113         throw new IllegalArgumentException(
114             "Failed to load Bloom block for key "
115                 + Bytes.toStringBinary(key, keyOffset, keyLength), ex);
116       }
117 
118       ByteBuffer bloomBuf = bloomBlock.getBufferReadOnly();
119       result = ByteBloomFilter.contains(key, keyOffset, keyLength,
120           bloomBuf, bloomBlock.headerSize(),
121           bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
122     }
123 
124     if (numQueriesPerChunk != null && block >= 0) {
125       // Update statistics. Only used in unit tests.
126       ++numQueriesPerChunk[block];
127       if (result)
128         ++numPositivesPerChunk[block];
129     }
130 
131     return result;
132   }
133 
134   public boolean supportsAutoLoading() {
135     return true;
136   }
137 
138   public int getNumChunks() {
139     return numChunks;
140   }
141 
142   @Override
143   public KVComparator getComparator() {
144     return comparator;
145   }
146 
147   public void enableTestingStats() {
148     numQueriesPerChunk = new long[numChunks];
149     numPositivesPerChunk = new long[numChunks];
150   }
151 
152   public String formatTestingStats() {
153     StringBuilder sb = new StringBuilder();
154     for (int i = 0; i < numChunks; ++i) {
155       sb.append("chunk #");
156       sb.append(i);
157       sb.append(": queries=");
158       sb.append(numQueriesPerChunk[i]);
159       sb.append(", positives=");
160       sb.append(numPositivesPerChunk[i]);
161       sb.append(", positiveRatio=");
162       sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
163       sb.append(";\n");
164     }
165     return sb.toString();
166   }
167 
168   public long getNumQueriesForTesting(int chunk) {
169     return numQueriesPerChunk[chunk];
170   }
171 
172   public long getNumPositivesForTesting(int chunk) {
173     return numPositivesPerChunk[chunk];
174   }
175 
176   @Override
177   public String toString() {
178     StringBuilder sb = new StringBuilder();
179     sb.append(ByteBloomFilter.formatStats(this));
180     sb.append(ByteBloomFilter.STATS_RECORD_SEP + 
181         "Number of chunks: " + numChunks);
182     sb.append(ByteBloomFilter.STATS_RECORD_SEP + 
183         "Comparator: " + comparator.getClass().getSimpleName());
184     return sb.toString();
185   }
186 
187 }