View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.util;
21  
22  import java.io.DataInput;
23  import java.io.IOException;
24  import java.nio.ByteBuffer;
25  
26  import org.apache.hadoop.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.KeyValue.KVComparator;
28  import org.apache.hadoop.hbase.io.hfile.BlockType;
29  import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
30  import org.apache.hadoop.hbase.io.hfile.HFile;
31  import org.apache.hadoop.hbase.io.hfile.HFileBlock;
32  import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;
33  
34  /**
35   * A Bloom filter implementation built on top of {@link ByteBloomFilter},
36   * encapsulating a set of fixed-size Bloom filters written out at the time of
37   * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
38   * block stream, and loaded on demand at query time. This class only provides
39   * reading capabilities.
40   */
41  @InterfaceAudience.Private
42  public class CompoundBloomFilter extends CompoundBloomFilterBase
43      implements BloomFilter {
44  
45    /** Used to load chunks on demand */
46    private HFile.Reader reader;
47  
48    private HFileBlockIndex.BlockIndexReader index;
49  
50    private int hashCount;
51    private Hash hash;
52  
53    private long[] numQueriesPerChunk;
54    private long[] numPositivesPerChunk;
55  
56    /**
57     * De-serialization for compound Bloom filter metadata. Must be consistent
58     * with what {@link CompoundBloomFilterWriter} does.
59     *
60     * @param meta serialized Bloom filter metadata without any magic blocks
61     * @throws IOException
62     */
63    public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
64        throws IOException {
65      this.reader = reader;
66  
67      totalByteSize = meta.readLong();
68      hashCount = meta.readInt();
69      hashType = meta.readInt();
70      totalKeyCount = meta.readLong();
71      totalMaxKeys = meta.readLong();
72      numChunks = meta.readInt();
73      comparator = FixedFileTrailer.createComparator(
74          Bytes.toString(Bytes.readByteArray(meta)));
75  
76      hash = Hash.getInstance(hashType);
77      if (hash == null) {
78        throw new IllegalArgumentException("Invalid hash type: " + hashType);
79      }
80  
81      index = new HFileBlockIndex.BlockIndexReader(comparator, 1);
82      index.readRootIndex(meta, numChunks);
83    }
84  
85    @Override
86    public boolean contains(byte[] key, int keyOffset, int keyLength,
87        ByteBuffer bloom) {
88      // We try to store the result in this variable so we can update stats for
89      // testing, but when an error happens, we log a message and return.
90      boolean result;
91  
92      int block = index.rootBlockContainingKey(key, keyOffset,
93          keyLength);
94      if (block < 0) {
95        result = false; // This key is not in the file.
96      } else {
97        HFileBlock bloomBlock;
98        try {
99          // We cache the block and use a positional read.
100         bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
101             index.getRootBlockDataSize(block), true, true, false, true,
102             BlockType.BLOOM_CHUNK, null);
103       } catch (IOException ex) {
104         // The Bloom filter is broken, turn it off.
105         throw new IllegalArgumentException(
106             "Failed to load Bloom block for key "
107                 + Bytes.toStringBinary(key, keyOffset, keyLength), ex);
108       }
109 
110       ByteBuffer bloomBuf = bloomBlock.getBufferReadOnly();
111       result = ByteBloomFilter.contains(key, keyOffset, keyLength,
112           bloomBuf.array(), bloomBuf.arrayOffset() + bloomBlock.headerSize(),
113           bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
114     }
115 
116     if (numQueriesPerChunk != null && block >= 0) {
117       // Update statistics. Only used in unit tests.
118       ++numQueriesPerChunk[block];
119       if (result)
120         ++numPositivesPerChunk[block];
121     }
122 
123     return result;
124   }
125 
126   public boolean supportsAutoLoading() {
127     return true;
128   }
129 
130   public int getNumChunks() {
131     return numChunks;
132   }
133 
134   @Override
135   public KVComparator getComparator() {
136     return comparator;
137   }
138 
139   public void enableTestingStats() {
140     numQueriesPerChunk = new long[numChunks];
141     numPositivesPerChunk = new long[numChunks];
142   }
143 
144   public String formatTestingStats() {
145     StringBuilder sb = new StringBuilder();
146     for (int i = 0; i < numChunks; ++i) {
147       sb.append("chunk #");
148       sb.append(i);
149       sb.append(": queries=");
150       sb.append(numQueriesPerChunk[i]);
151       sb.append(", positives=");
152       sb.append(numPositivesPerChunk[i]);
153       sb.append(", positiveRatio=");
154       sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
155       sb.append(";\n");
156     }
157     return sb.toString();
158   }
159 
160   public long getNumQueriesForTesting(int chunk) {
161     return numQueriesPerChunk[chunk];
162   }
163 
164   public long getNumPositivesForTesting(int chunk) {
165     return numPositivesPerChunk[chunk];
166   }
167 
168   @Override
169   public String toString() {
170     StringBuilder sb = new StringBuilder();
171     sb.append(ByteBloomFilter.formatStats(this));
172     sb.append(ByteBloomFilter.STATS_RECORD_SEP + 
173         "Number of chunks: " + numChunks);
174     sb.append(ByteBloomFilter.STATS_RECORD_SEP + 
175         "Comparator: " + comparator.getClass().getSimpleName());
176     return sb.toString();
177   }
178 
179 }