View Javadoc

1   /*
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.util;
22  
23  import java.io.DataInput;
24  import java.io.IOException;
25  import java.nio.ByteBuffer;
26  
27  import org.apache.hadoop.hbase.io.hfile.BlockType;
28  import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
29  import org.apache.hadoop.hbase.io.hfile.HFile;
30  import org.apache.hadoop.hbase.io.hfile.HFileBlock;
31  import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;
32  import org.apache.hadoop.io.RawComparator;
33  
34  /**
35   * A Bloom filter implementation built on top of {@link ByteBloomFilter},
36   * encapsulating a set of fixed-size Bloom filters written out at the time of
37   * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
38   * block stream, and loaded on demand at query time. This class only provides
39   * reading capabilities.
40   */
41  public class CompoundBloomFilter extends CompoundBloomFilterBase
42      implements BloomFilter {
43  
44    /** Used to load chunks on demand */
45    private HFile.Reader reader;
46  
47    private HFileBlockIndex.BlockIndexReader index;
48  
49    private int hashCount;
50    private Hash hash;
51  
52    private long[] numQueriesPerChunk;
53    private long[] numPositivesPerChunk;
54  
55    /**
56     * De-serialization for compound Bloom filter metadata. Must be consistent
57     * with what {@link CompoundBloomFilterWriter} does.
58     *
59     * @param meta serialized Bloom filter metadata without any magic blocks
60     * @throws IOException
61     */
62    public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
63        throws IOException {
64      this.reader = reader;
65  
66      totalByteSize = meta.readLong();
67      hashCount = meta.readInt();
68      hashType = meta.readInt();
69      totalKeyCount = meta.readLong();
70      totalMaxKeys = meta.readLong();
71      numChunks = meta.readInt();
72      comparator = FixedFileTrailer.createComparator(
73          Bytes.toString(Bytes.readByteArray(meta)));
74  
75      hash = Hash.getInstance(hashType);
76      if (hash == null) {
77        throw new IllegalArgumentException("Invalid hash type: " + hashType);
78      }
79  
80      index = new HFileBlockIndex.BlockIndexReader(comparator, 1);
81      index.readRootIndex(meta, numChunks);
82    }
83  
84    @Override
85    public boolean contains(byte[] key, int keyOffset, int keyLength,
86        ByteBuffer bloom) {
87      // We try to store the result in this variable so we can update stats for
88      // testing, but when an error happens, we log a message and return.
89      boolean result;
90  
91      int block = index.rootBlockContainingKey(key, keyOffset, keyLength);
92      if (block < 0) {
93        result = false; // This key is not in the file.
94      } else {
95        HFileBlock bloomBlock;
96        try {
97          // We cache the block and use a positional read.
98          bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
99              index.getRootBlockDataSize(block), true, true, false,
100             BlockType.BLOOM_CHUNK);
101       } catch (IOException ex) {
102         // The Bloom filter is broken, turn it off.
103         throw new IllegalArgumentException(
104             "Failed to load Bloom block for key "
105                 + Bytes.toStringBinary(key, keyOffset, keyLength), ex);
106       }
107 
108       ByteBuffer bloomBuf = bloomBlock.getBufferReadOnly();
109       result = ByteBloomFilter.contains(key, keyOffset, keyLength,
110           bloomBuf.array(), bloomBuf.arrayOffset() + bloomBlock.headerSize(),
111           bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
112     }
113 
114     if (numQueriesPerChunk != null && block >= 0) {
115       // Update statistics. Only used in unit tests.
116       ++numQueriesPerChunk[block];
117       if (result)
118         ++numPositivesPerChunk[block];
119     }
120 
121     return result;
122   }
123 
124   public boolean supportsAutoLoading() {
125     return true;
126   }
127 
128   public int getNumChunks() {
129     return numChunks;
130   }
131 
132   @Override
133   public RawComparator<byte[]> getComparator() {
134     return comparator;
135   }
136 
137   public void enableTestingStats() {
138     numQueriesPerChunk = new long[numChunks];
139     numPositivesPerChunk = new long[numChunks];
140   }
141 
142   public String formatTestingStats() {
143     StringBuilder sb = new StringBuilder();
144     for (int i = 0; i < numChunks; ++i) {
145       sb.append("chunk #");
146       sb.append(i);
147       sb.append(": queries=");
148       sb.append(numQueriesPerChunk[i]);
149       sb.append(", positives=");
150       sb.append(numPositivesPerChunk[i]);
151       sb.append(", positiveRatio=");
152       sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
153       sb.append(";\n");
154     }
155     return sb.toString();
156   }
157 
158   public long getNumQueriesForTesting(int chunk) {
159     return numQueriesPerChunk[chunk];
160   }
161 
162   public long getNumPositivesForTesting(int chunk) {
163     return numPositivesPerChunk[chunk];
164   }
165 
166   @Override
167   public String toString() {
168     StringBuilder sb = new StringBuilder();
169     sb.append(ByteBloomFilter.formatStats(this));
170     sb.append(ByteBloomFilter.STATS_RECORD_SEP + 
171         "Number of chunks: " + numChunks);
172     sb.append(ByteBloomFilter.STATS_RECORD_SEP + 
173         "Comparator: " + comparator.getClass().getSimpleName());
174     return sb.toString();
175   }
176 
177 }