View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.util;
21  
22  import java.io.DataInput;
23  import java.io.IOException;
24  import java.nio.ByteBuffer;
25  
26  import org.apache.hadoop.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.KeyValue.KVComparator;
28  import org.apache.hadoop.hbase.io.hfile.BlockType;
29  import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
30  import org.apache.hadoop.hbase.io.hfile.HFile;
31  import org.apache.hadoop.hbase.io.hfile.HFileBlock;
32  import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;
33  
34  /**
35   * A Bloom filter implementation built on top of {@link ByteBloomFilter},
36   * encapsulating a set of fixed-size Bloom filters written out at the time of
37   * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
38   * block stream, and loaded on demand at query time. This class only provides
39   * reading capabilities.
40   */
41  @InterfaceAudience.Private
42  public class CompoundBloomFilter extends CompoundBloomFilterBase
43      implements BloomFilter {
44  
45    /** Used to load chunks on demand */
46    private HFile.Reader reader;
47  
48    private HFileBlockIndex.BlockIndexReader index;
49  
50    private int hashCount;
51    private Hash hash;
52  
53    private long[] numQueriesPerChunk;
54    private long[] numPositivesPerChunk;
55  
56    /**
57     * De-serialization for compound Bloom filter metadata. Must be consistent
58     * with what {@link CompoundBloomFilterWriter} does.
59     *
60     * @param meta serialized Bloom filter metadata without any magic blocks
61     * @throws IOException
62     */
63    public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
64        throws IOException {
65      this.reader = reader;
66  
67      totalByteSize = meta.readLong();
68      hashCount = meta.readInt();
69      hashType = meta.readInt();
70      totalKeyCount = meta.readLong();
71      totalMaxKeys = meta.readLong();
72      numChunks = meta.readInt();
73      comparator = FixedFileTrailer.createComparator(
74          Bytes.toString(Bytes.readByteArray(meta)));
75  
76      hash = Hash.getInstance(hashType);
77      if (hash == null) {
78        throw new IllegalArgumentException("Invalid hash type: " + hashType);
79      }
80  
81      index = new HFileBlockIndex.BlockIndexReader(comparator, 1);
82      index.readRootIndex(meta, numChunks);
83    }
84  
85    @Override
86    public boolean contains(byte[] key, int keyOffset, int keyLength,
87        ByteBuffer bloom) {
88      // We try to store the result in this variable so we can update stats for
89      // testing, but when an error happens, we log a message and return.
90      boolean result;
91  
92      int block = index.rootBlockContainingKey(key, keyOffset, keyLength);
93      if (block < 0) {
94        result = false; // This key is not in the file.
95      } else {
96        HFileBlock bloomBlock;
97        try {
98          // We cache the block and use a positional read.
99          bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
100             index.getRootBlockDataSize(block), true, true, false,
101             BlockType.BLOOM_CHUNK);
102       } catch (IOException ex) {
103         // The Bloom filter is broken, turn it off.
104         throw new IllegalArgumentException(
105             "Failed to load Bloom block for key "
106                 + Bytes.toStringBinary(key, keyOffset, keyLength), ex);
107       }
108 
109       ByteBuffer bloomBuf = bloomBlock.getBufferReadOnly();
110       result = ByteBloomFilter.contains(key, keyOffset, keyLength,
111           bloomBuf.array(), bloomBuf.arrayOffset() + bloomBlock.headerSize(),
112           bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
113     }
114 
115     if (numQueriesPerChunk != null && block >= 0) {
116       // Update statistics. Only used in unit tests.
117       ++numQueriesPerChunk[block];
118       if (result)
119         ++numPositivesPerChunk[block];
120     }
121 
122     return result;
123   }
124 
125   public boolean supportsAutoLoading() {
126     return true;
127   }
128 
129   public int getNumChunks() {
130     return numChunks;
131   }
132 
133   @Override
134   public KVComparator getComparator() {
135     return comparator;
136   }
137 
138   public void enableTestingStats() {
139     numQueriesPerChunk = new long[numChunks];
140     numPositivesPerChunk = new long[numChunks];
141   }
142 
143   public String formatTestingStats() {
144     StringBuilder sb = new StringBuilder();
145     for (int i = 0; i < numChunks; ++i) {
146       sb.append("chunk #");
147       sb.append(i);
148       sb.append(": queries=");
149       sb.append(numQueriesPerChunk[i]);
150       sb.append(", positives=");
151       sb.append(numPositivesPerChunk[i]);
152       sb.append(", positiveRatio=");
153       sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
154       sb.append(";\n");
155     }
156     return sb.toString();
157   }
158 
159   public long getNumQueriesForTesting(int chunk) {
160     return numQueriesPerChunk[chunk];
161   }
162 
163   public long getNumPositivesForTesting(int chunk) {
164     return numPositivesPerChunk[chunk];
165   }
166 
167   @Override
168   public String toString() {
169     StringBuilder sb = new StringBuilder();
170     sb.append(ByteBloomFilter.formatStats(this));
171     sb.append(ByteBloomFilter.STATS_RECORD_SEP + 
172         "Number of chunks: " + numChunks);
173     sb.append(ByteBloomFilter.STATS_RECORD_SEP + 
174         "Comparator: " + comparator.getClass().getSimpleName());
175     return sb.toString();
176   }
177 
178 }