001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import java.io.DataInput;
021import java.io.IOException;
022import org.apache.hadoop.hbase.Cell;
023import org.apache.hadoop.hbase.HBaseInterfaceAudience;
024import org.apache.hadoop.hbase.nio.ByteBuff;
025import org.apache.hadoop.hbase.regionserver.BloomType;
026import org.apache.hadoop.hbase.util.BloomFilter;
027import org.apache.hadoop.hbase.util.BloomFilterUtil;
028import org.apache.hadoop.hbase.util.Bytes;
029import org.apache.hadoop.hbase.util.Hash;
030import org.apache.yetus.audience.InterfaceAudience;
031
032/**
033 * A Bloom filter implementation built on top of
034 * {@link org.apache.hadoop.hbase.util.BloomFilterChunk}, encapsulating a set of fixed-size Bloom
035 * filters written out at the time of {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into
036 * the data block stream, and loaded on demand at query time. This class only provides reading
037 * capabilities.
038 */
039@InterfaceAudience.Private
040public class CompoundBloomFilter extends CompoundBloomFilterBase implements BloomFilter {
041
042  /** Used to load chunks on demand */
043  private HFile.Reader reader;
044  private final BloomFilterMetrics metrics;
045
046  private HFileBlockIndex.BlockIndexReader index;
047
048  private int hashCount;
049  private Hash hash;
050
051  private long[] numQueriesPerChunk;
052  private long[] numPositivesPerChunk;
053
054  /**
055   * De-serialization for compound Bloom filter metadata. Must be consistent with what
056   * {@link CompoundBloomFilterWriter} does.
057   * @param meta    serialized Bloom filter metadata without any magic blocks
058   * @param reader  reader for hfile
059   * @param metrics for collecting bloom filter metrics. may be null
060   */
061  public CompoundBloomFilter(DataInput meta, HFile.Reader reader, BloomFilterMetrics metrics)
062    throws IOException {
063    this.reader = reader;
064    this.metrics = metrics;
065
066    totalByteSize = meta.readLong();
067    hashCount = meta.readInt();
068    hashType = meta.readInt();
069    totalKeyCount = meta.readLong();
070    totalMaxKeys = meta.readLong();
071    numChunks = meta.readInt();
072    byte[] comparatorClassName = Bytes.readByteArray(meta);
073    // The writer would have return 0 as the vint length for the case of
074    // Bytes.BYTES_RAWCOMPARATOR. In such cases do not initialize comparator, it can be
075    // null
076    if (comparatorClassName.length != 0) {
077      comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName));
078    }
079
080    hash = Hash.getInstance(hashType);
081    if (hash == null) {
082      throw new IllegalArgumentException("Invalid hash type: " + hashType);
083    }
084    // We will pass null for ROW block
085    if (comparator == null) {
086      index = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
087    } else {
088      index = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, 1);
089    }
090    index.readRootIndex(meta, numChunks);
091  }
092
093  @Override
094  public boolean contains(byte[] key, int keyOffset, int keyLength, ByteBuff bloom) {
095    boolean result = containsInternal(key, keyOffset, keyLength, bloom);
096    if (metrics != null) {
097      metrics.incrementRequests(result);
098    }
099    return result;
100  }
101
102  private boolean containsInternal(byte[] key, int keyOffset, int keyLength, ByteBuff bloom) {
103    int block = index.rootBlockContainingKey(key, keyOffset, keyLength);
104    if (block < 0) {
105      return false; // This key is not in the file.
106    }
107    boolean result;
108    HFileBlock bloomBlock = getBloomBlock(block);
109    try {
110      ByteBuff bloomBuf = bloomBlock.getBufferReadOnly();
111      result = BloomFilterUtil.contains(key, keyOffset, keyLength, bloomBuf,
112        bloomBlock.headerSize(), bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
113    } finally {
114      // After the use, should release the block to deallocate byte buffers.
115      bloomBlock.release();
116    }
117    if (numPositivesPerChunk != null && result) {
118      // Update statistics. Only used in unit tests.
119      ++numPositivesPerChunk[block];
120    }
121    return result;
122  }
123
124  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
125  public HFileBlock getBloomBlock(int block) {
126    HFileBlock bloomBlock;
127    try {
128      // We cache the block and use a positional read.
129      bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
130        index.getRootBlockDataSize(block), true, true, false, true, BlockType.BLOOM_CHUNK, null);
131    } catch (IOException ex) {
132      // The Bloom filter is broken, turn it off.
133      throw new IllegalArgumentException("Failed to load Bloom block", ex);
134    }
135
136    if (numQueriesPerChunk != null) {
137      // Update statistics. Only used in unit tests.
138      ++numQueriesPerChunk[block];
139    }
140    return bloomBlock;
141  }
142
143  @Override
144  public boolean contains(Cell keyCell, ByteBuff bloom, BloomType type) {
145    boolean result = containsInternal(keyCell, bloom, type);
146    if (metrics != null) {
147      metrics.incrementRequests(result);
148    }
149    return result;
150  }
151
152  private boolean containsInternal(Cell keyCell, ByteBuff bloom, BloomType type) {
153    int block = index.rootBlockContainingKey(keyCell);
154    if (block < 0) {
155      return false; // This key is not in the file.
156    }
157    boolean result;
158    HFileBlock bloomBlock = getBloomBlock(block);
159    try {
160      ByteBuff bloomBuf = bloomBlock.getBufferReadOnly();
161      result = BloomFilterUtil.contains(keyCell, bloomBuf, bloomBlock.headerSize(),
162        bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount, type);
163    } finally {
164      // After the use, should release the block to deallocate the byte buffers.
165      bloomBlock.release();
166    }
167    if (numPositivesPerChunk != null && result) {
168      // Update statistics. Only used in unit tests.
169      ++numPositivesPerChunk[block];
170    }
171    return result;
172  }
173
174  @Override
175  public boolean supportsAutoLoading() {
176    return true;
177  }
178
179  public int getNumChunks() {
180    return numChunks;
181  }
182
183  public void enableTestingStats() {
184    numQueriesPerChunk = new long[numChunks];
185    numPositivesPerChunk = new long[numChunks];
186  }
187
188  public String formatTestingStats() {
189    StringBuilder sb = new StringBuilder();
190    for (int i = 0; i < numChunks; ++i) {
191      sb.append("chunk #");
192      sb.append(i);
193      sb.append(": queries=");
194      sb.append(numQueriesPerChunk[i]);
195      sb.append(", positives=");
196      sb.append(numPositivesPerChunk[i]);
197      sb.append(", positiveRatio=");
198      sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
199      sb.append(";\n");
200    }
201    return sb.toString();
202  }
203
204  public long getNumQueriesForTesting(int chunk) {
205    return numQueriesPerChunk[chunk];
206  }
207
208  public long getNumPositivesForTesting(int chunk) {
209    return numPositivesPerChunk[chunk];
210  }
211
212  @Override
213  public String toString() {
214    StringBuilder sb = new StringBuilder();
215    sb.append(BloomFilterUtil.formatStats(this));
216    sb.append(BloomFilterUtil.STATS_RECORD_SEP + "Number of chunks: " + numChunks);
217    sb.append(BloomFilterUtil.STATS_RECORD_SEP + ((comparator != null)
218      ? "Comparator: " + comparator.getClass().getSimpleName()
219      : "Comparator: " + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName()));
220    return sb.toString();
221  }
222
223  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
224  public HFileBlockIndex.BlockIndexReader getBloomIndex() {
225    return index;
226  }
227
228  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
229  public int getHashCount() {
230    return hashCount;
231  }
232
233  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
234  public Hash getHash() {
235    return hash;
236  }
237}