Source code

001/*
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.io.hfile;
021
022import java.io.DataInput;
023import java.io.IOException;
024
025import org.apache.hadoop.hbase.Cell;
026import org.apache.yetus.audience.InterfaceAudience;
027import org.apache.hadoop.hbase.nio.ByteBuff;
028import org.apache.hadoop.hbase.regionserver.BloomType;
029import org.apache.hadoop.hbase.util.BloomFilter;
030import org.apache.hadoop.hbase.util.BloomFilterUtil;
031import org.apache.hadoop.hbase.util.Bytes;
032import org.apache.hadoop.hbase.util.Hash;
033
034/**
035 * A Bloom filter implementation built on top of 
036 * {@link org.apache.hadoop.hbase.util.BloomFilterChunk}, encapsulating
037 * a set of fixed-size Bloom filters written out at the time of
038 * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
039 * block stream, and loaded on demand at query time. This class only provides
040 * reading capabilities.
041 */
042@InterfaceAudience.Private
043public class CompoundBloomFilter extends CompoundBloomFilterBase
044    implements BloomFilter {
045
046  /** Used to load chunks on demand */
047  private HFile.Reader reader;
048
049  private HFileBlockIndex.BlockIndexReader index;
050
051  private int hashCount;
052  private Hash hash;
053
054  private long[] numQueriesPerChunk;
055  private long[] numPositivesPerChunk;
056
057  /**
058   * De-serialization for compound Bloom filter metadata. Must be consistent
059   * with what {@link CompoundBloomFilterWriter} does.
060   *
061   * @param meta serialized Bloom filter metadata without any magic blocks
062   * @throws IOException
063   */
064  public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
065      throws IOException {
066    this.reader = reader;
067
068    totalByteSize = meta.readLong();
069    hashCount = meta.readInt();
070    hashType = meta.readInt();
071    totalKeyCount = meta.readLong();
072    totalMaxKeys = meta.readLong();
073    numChunks = meta.readInt();
074    byte[] comparatorClassName = Bytes.readByteArray(meta);
075    // The writer would have return 0 as the vint length for the case of 
076    // Bytes.BYTES_RAWCOMPARATOR.  In such cases do not initialize comparator, it can be
077    // null
078    if (comparatorClassName.length != 0) {
079      comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName));
080    }
081
082    hash = Hash.getInstance(hashType);
083    if (hash == null) {
084      throw new IllegalArgumentException("Invalid hash type: " + hashType);
085    }
086    // We will pass null for ROW block
087    if(comparator == null) {
088      index = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
089    } else {
090      index = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, 1);
091    }
092    index.readRootIndex(meta, numChunks);
093  }
094
095  @Override
096  public boolean contains(byte[] key, int keyOffset, int keyLength, ByteBuff bloom) {
097    int block = index.rootBlockContainingKey(key, keyOffset, keyLength);
098    if (block < 0) {
099      return false; // This key is not in the file.
100    }
101    boolean result;
102    HFileBlock bloomBlock = getBloomBlock(block);
103    try {
104      ByteBuff bloomBuf = bloomBlock.getBufferReadOnly();
105      result = BloomFilterUtil.contains(key, keyOffset, keyLength, bloomBuf,
106          bloomBlock.headerSize(), bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
107    } finally {
108      // After the use, should release the block to deallocate byte buffers.
109      bloomBlock.release();
110    }
111    if (numPositivesPerChunk != null && result) {
112      // Update statistics. Only used in unit tests.
113      ++numPositivesPerChunk[block];
114    }
115    return result;
116  }
117
118  private HFileBlock getBloomBlock(int block) {
119    HFileBlock bloomBlock;
120    try {
121      // We cache the block and use a positional read.
122      bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
123          index.getRootBlockDataSize(block), true, true, false, true, BlockType.BLOOM_CHUNK, null);
124    } catch (IOException ex) {
125      // The Bloom filter is broken, turn it off.
126      throw new IllegalArgumentException("Failed to load Bloom block", ex);
127    }
128
129    if (numQueriesPerChunk != null) {
130      // Update statistics. Only used in unit tests.
131      ++numQueriesPerChunk[block];
132    }
133    return bloomBlock;
134  }
135
136  @Override
137  public boolean contains(Cell keyCell, ByteBuff bloom, BloomType type) {
138    int block = index.rootBlockContainingKey(keyCell);
139    if (block < 0) {
140      return false; // This key is not in the file.
141    }
142    boolean result;
143    HFileBlock bloomBlock = getBloomBlock(block);
144    try {
145      ByteBuff bloomBuf = bloomBlock.getBufferReadOnly();
146      result = BloomFilterUtil.contains(keyCell, bloomBuf, bloomBlock.headerSize(),
147        bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount, type);
148    } finally {
149      // After the use, should release the block to deallocate the byte buffers.
150      bloomBlock.release();
151    }
152    if (numPositivesPerChunk != null && result) {
153      // Update statistics. Only used in unit tests.
154      ++numPositivesPerChunk[block];
155    }
156    return result;
157  }
158
159  @Override
160  public boolean supportsAutoLoading() {
161    return true;
162  }
163
164  public int getNumChunks() {
165    return numChunks;
166  }
167
168  public void enableTestingStats() {
169    numQueriesPerChunk = new long[numChunks];
170    numPositivesPerChunk = new long[numChunks];
171  }
172
173  public String formatTestingStats() {
174    StringBuilder sb = new StringBuilder();
175    for (int i = 0; i < numChunks; ++i) {
176      sb.append("chunk #");
177      sb.append(i);
178      sb.append(": queries=");
179      sb.append(numQueriesPerChunk[i]);
180      sb.append(", positives=");
181      sb.append(numPositivesPerChunk[i]);
182      sb.append(", positiveRatio=");
183      sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
184      sb.append(";\n");
185    }
186    return sb.toString();
187  }
188
189  public long getNumQueriesForTesting(int chunk) {
190    return numQueriesPerChunk[chunk];
191  }
192
193  public long getNumPositivesForTesting(int chunk) {
194    return numPositivesPerChunk[chunk];
195  }
196
197  @Override
198  public String toString() {
199    StringBuilder sb = new StringBuilder();
200    sb.append(BloomFilterUtil.formatStats(this));
201    sb.append(BloomFilterUtil.STATS_RECORD_SEP + 
202        "Number of chunks: " + numChunks);
203    sb.append(BloomFilterUtil.STATS_RECORD_SEP + 
204        ((comparator != null) ? "Comparator: "
205        + comparator.getClass().getSimpleName() : "Comparator: "
206        + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName()));
207    return sb.toString();
208  }
209
210}