Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import java.io.DataInput;
021import java.io.IOException;
022import org.apache.hadoop.hbase.Cell;
023import org.apache.hadoop.hbase.nio.ByteBuff;
024import org.apache.hadoop.hbase.regionserver.BloomType;
025import org.apache.hadoop.hbase.util.BloomFilter;
026import org.apache.hadoop.hbase.util.BloomFilterUtil;
027import org.apache.hadoop.hbase.util.Bytes;
028import org.apache.hadoop.hbase.util.Hash;
029import org.apache.yetus.audience.InterfaceAudience;
030
031/**
032 * A Bloom filter implementation built on top of
033 * {@link org.apache.hadoop.hbase.util.BloomFilterChunk}, encapsulating a set of fixed-size Bloom
034 * filters written out at the time of {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into
035 * the data block stream, and loaded on demand at query time. This class only provides reading
036 * capabilities.
037 */
038@InterfaceAudience.Private
039public class CompoundBloomFilter extends CompoundBloomFilterBase implements BloomFilter {
040
041  /** Used to load chunks on demand */
042  private HFile.Reader reader;
043  private final BloomFilterMetrics metrics;
044
045  private HFileBlockIndex.BlockIndexReader index;
046
047  private int hashCount;
048  private Hash hash;
049
050  private long[] numQueriesPerChunk;
051  private long[] numPositivesPerChunk;
052
053  /**
054   * De-serialization for compound Bloom filter metadata. Must be consistent with what
055   * {@link CompoundBloomFilterWriter} does.
056   * @param meta    serialized Bloom filter metadata without any magic blocks
057   * @param reader  reader for hfile
058   * @param metrics for collecting bloom filter metrics. may be null
059   */
060  public CompoundBloomFilter(DataInput meta, HFile.Reader reader, BloomFilterMetrics metrics)
061    throws IOException {
062    this.reader = reader;
063    this.metrics = metrics;
064
065    totalByteSize = meta.readLong();
066    hashCount = meta.readInt();
067    hashType = meta.readInt();
068    totalKeyCount = meta.readLong();
069    totalMaxKeys = meta.readLong();
070    numChunks = meta.readInt();
071    byte[] comparatorClassName = Bytes.readByteArray(meta);
072    // The writer would have return 0 as the vint length for the case of
073    // Bytes.BYTES_RAWCOMPARATOR. In such cases do not initialize comparator, it can be
074    // null
075    if (comparatorClassName.length != 0) {
076      comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName));
077    }
078
079    hash = Hash.getInstance(hashType);
080    if (hash == null) {
081      throw new IllegalArgumentException("Invalid hash type: " + hashType);
082    }
083    // We will pass null for ROW block
084    if (comparator == null) {
085      index = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
086    } else {
087      index = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, 1);
088    }
089    index.readRootIndex(meta, numChunks);
090  }
091
092  @Override
093  public boolean contains(byte[] key, int keyOffset, int keyLength, ByteBuff bloom) {
094    boolean result = containsInternal(key, keyOffset, keyLength, bloom);
095    if (metrics != null) {
096      metrics.incrementRequests(result);
097    }
098    return result;
099  }
100
101  private boolean containsInternal(byte[] key, int keyOffset, int keyLength, ByteBuff bloom) {
102    int block = index.rootBlockContainingKey(key, keyOffset, keyLength);
103    if (block < 0) {
104      return false; // This key is not in the file.
105    }
106    boolean result;
107    HFileBlock bloomBlock = getBloomBlock(block);
108    try {
109      ByteBuff bloomBuf = bloomBlock.getBufferReadOnly();
110      result = BloomFilterUtil.contains(key, keyOffset, keyLength, bloomBuf,
111        bloomBlock.headerSize(), bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
112    } finally {
113      // After the use, should release the block to deallocate byte buffers.
114      bloomBlock.release();
115    }
116    if (numPositivesPerChunk != null && result) {
117      // Update statistics. Only used in unit tests.
118      ++numPositivesPerChunk[block];
119    }
120    return result;
121  }
122
123  private HFileBlock getBloomBlock(int block) {
124    HFileBlock bloomBlock;
125    try {
126      // We cache the block and use a positional read.
127      bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
128        index.getRootBlockDataSize(block), true, true, false, true, BlockType.BLOOM_CHUNK, null);
129    } catch (IOException ex) {
130      // The Bloom filter is broken, turn it off.
131      throw new IllegalArgumentException("Failed to load Bloom block", ex);
132    }
133
134    if (numQueriesPerChunk != null) {
135      // Update statistics. Only used in unit tests.
136      ++numQueriesPerChunk[block];
137    }
138    return bloomBlock;
139  }
140
141  @Override
142  public boolean contains(Cell keyCell, ByteBuff bloom, BloomType type) {
143    boolean result = containsInternal(keyCell, bloom, type);
144    if (metrics != null) {
145      metrics.incrementRequests(result);
146    }
147    return result;
148  }
149
150  private boolean containsInternal(Cell keyCell, ByteBuff bloom, BloomType type) {
151    int block = index.rootBlockContainingKey(keyCell);
152    if (block < 0) {
153      return false; // This key is not in the file.
154    }
155    boolean result;
156    HFileBlock bloomBlock = getBloomBlock(block);
157    try {
158      ByteBuff bloomBuf = bloomBlock.getBufferReadOnly();
159      result = BloomFilterUtil.contains(keyCell, bloomBuf, bloomBlock.headerSize(),
160        bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount, type);
161    } finally {
162      // After the use, should release the block to deallocate the byte buffers.
163      bloomBlock.release();
164    }
165    if (numPositivesPerChunk != null && result) {
166      // Update statistics. Only used in unit tests.
167      ++numPositivesPerChunk[block];
168    }
169    return result;
170  }
171
172  @Override
173  public boolean supportsAutoLoading() {
174    return true;
175  }
176
177  public int getNumChunks() {
178    return numChunks;
179  }
180
181  public void enableTestingStats() {
182    numQueriesPerChunk = new long[numChunks];
183    numPositivesPerChunk = new long[numChunks];
184  }
185
186  public String formatTestingStats() {
187    StringBuilder sb = new StringBuilder();
188    for (int i = 0; i < numChunks; ++i) {
189      sb.append("chunk #");
190      sb.append(i);
191      sb.append(": queries=");
192      sb.append(numQueriesPerChunk[i]);
193      sb.append(", positives=");
194      sb.append(numPositivesPerChunk[i]);
195      sb.append(", positiveRatio=");
196      sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
197      sb.append(";\n");
198    }
199    return sb.toString();
200  }
201
202  public long getNumQueriesForTesting(int chunk) {
203    return numQueriesPerChunk[chunk];
204  }
205
206  public long getNumPositivesForTesting(int chunk) {
207    return numPositivesPerChunk[chunk];
208  }
209
210  @Override
211  public String toString() {
212    StringBuilder sb = new StringBuilder();
213    sb.append(BloomFilterUtil.formatStats(this));
214    sb.append(BloomFilterUtil.STATS_RECORD_SEP + "Number of chunks: " + numChunks);
215    sb.append(BloomFilterUtil.STATS_RECORD_SEP + ((comparator != null)
216      ? "Comparator: " + comparator.getClass().getSimpleName()
217      : "Comparator: " + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName()));
218    return sb.toString();
219  }
220
221}