001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import java.io.DataInput; 021import java.io.IOException; 022import org.apache.hadoop.hbase.Cell; 023import org.apache.hadoop.hbase.HBaseInterfaceAudience; 024import org.apache.hadoop.hbase.nio.ByteBuff; 025import org.apache.hadoop.hbase.regionserver.BloomType; 026import org.apache.hadoop.hbase.util.BloomFilter; 027import org.apache.hadoop.hbase.util.BloomFilterUtil; 028import org.apache.hadoop.hbase.util.Bytes; 029import org.apache.hadoop.hbase.util.Hash; 030import org.apache.yetus.audience.InterfaceAudience; 031 032/** 033 * A Bloom filter implementation built on top of 034 * {@link org.apache.hadoop.hbase.util.BloomFilterChunk}, encapsulating a set of fixed-size Bloom 035 * filters written out at the time of {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into 036 * the data block stream, and loaded on demand at query time. This class only provides reading 037 * capabilities. 038 */ 039@InterfaceAudience.Private 040public class CompoundBloomFilter extends CompoundBloomFilterBase implements BloomFilter { 041 042 /** Used to load chunks on demand */ 043 private HFile.Reader reader; 044 private final BloomFilterMetrics metrics; 045 046 private HFileBlockIndex.BlockIndexReader index; 047 048 private int hashCount; 049 private Hash hash; 050 051 private long[] numQueriesPerChunk; 052 private long[] numPositivesPerChunk; 053 054 /** 055 * De-serialization for compound Bloom filter metadata. Must be consistent with what 056 * {@link CompoundBloomFilterWriter} does. 057 * @param meta serialized Bloom filter metadata without any magic blocks 058 * @param reader reader for hfile 059 * @param metrics for collecting bloom filter metrics. may be null 060 */ 061 public CompoundBloomFilter(DataInput meta, HFile.Reader reader, BloomFilterMetrics metrics) 062 throws IOException { 063 this.reader = reader; 064 this.metrics = metrics; 065 066 totalByteSize = meta.readLong(); 067 hashCount = meta.readInt(); 068 hashType = meta.readInt(); 069 totalKeyCount = meta.readLong(); 070 totalMaxKeys = meta.readLong(); 071 numChunks = meta.readInt(); 072 byte[] comparatorClassName = Bytes.readByteArray(meta); 073 // The writer would have return 0 as the vint length for the case of 074 // Bytes.BYTES_RAWCOMPARATOR. In such cases do not initialize comparator, it can be 075 // null 076 if (comparatorClassName.length != 0) { 077 comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName)); 078 } 079 080 hash = Hash.getInstance(hashType); 081 if (hash == null) { 082 throw new IllegalArgumentException("Invalid hash type: " + hashType); 083 } 084 // We will pass null for ROW block 085 if (comparator == null) { 086 index = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1); 087 } else { 088 index = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, 1); 089 } 090 index.readRootIndex(meta, numChunks); 091 } 092 093 @Override 094 public boolean contains(byte[] key, int keyOffset, int keyLength, ByteBuff bloom) { 095 boolean result = containsInternal(key, keyOffset, keyLength, bloom); 096 if (metrics != null) { 097 metrics.incrementRequests(result); 098 } 099 return result; 100 } 101 102 private boolean containsInternal(byte[] key, int keyOffset, int keyLength, ByteBuff bloom) { 103 int block = index.rootBlockContainingKey(key, keyOffset, keyLength); 104 if (block < 0) { 105 return false; // This key is not in the file. 106 } 107 boolean result; 108 HFileBlock bloomBlock = getBloomBlock(block); 109 try { 110 ByteBuff bloomBuf = bloomBlock.getBufferReadOnly(); 111 result = BloomFilterUtil.contains(key, keyOffset, keyLength, bloomBuf, 112 bloomBlock.headerSize(), bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount); 113 } finally { 114 // After the use, should release the block to deallocate byte buffers. 115 bloomBlock.release(); 116 } 117 if (numPositivesPerChunk != null && result) { 118 // Update statistics. Only used in unit tests. 119 ++numPositivesPerChunk[block]; 120 } 121 return result; 122 } 123 124 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST) 125 public HFileBlock getBloomBlock(int block) { 126 HFileBlock bloomBlock; 127 try { 128 // We cache the block and use a positional read. 129 bloomBlock = reader.readBlock(index.getRootBlockOffset(block), 130 index.getRootBlockDataSize(block), true, true, false, true, BlockType.BLOOM_CHUNK, null); 131 } catch (IOException ex) { 132 // The Bloom filter is broken, turn it off. 133 throw new IllegalArgumentException("Failed to load Bloom block", ex); 134 } 135 136 if (numQueriesPerChunk != null) { 137 // Update statistics. Only used in unit tests. 138 ++numQueriesPerChunk[block]; 139 } 140 return bloomBlock; 141 } 142 143 @Override 144 public boolean contains(Cell keyCell, ByteBuff bloom, BloomType type) { 145 boolean result = containsInternal(keyCell, bloom, type); 146 if (metrics != null) { 147 metrics.incrementRequests(result); 148 } 149 return result; 150 } 151 152 private boolean containsInternal(Cell keyCell, ByteBuff bloom, BloomType type) { 153 int block = index.rootBlockContainingKey(keyCell); 154 if (block < 0) { 155 return false; // This key is not in the file. 156 } 157 boolean result; 158 HFileBlock bloomBlock = getBloomBlock(block); 159 try { 160 ByteBuff bloomBuf = bloomBlock.getBufferReadOnly(); 161 result = BloomFilterUtil.contains(keyCell, bloomBuf, bloomBlock.headerSize(), 162 bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount, type); 163 } finally { 164 // After the use, should release the block to deallocate the byte buffers. 165 bloomBlock.release(); 166 } 167 if (numPositivesPerChunk != null && result) { 168 // Update statistics. Only used in unit tests. 169 ++numPositivesPerChunk[block]; 170 } 171 return result; 172 } 173 174 @Override 175 public boolean supportsAutoLoading() { 176 return true; 177 } 178 179 public int getNumChunks() { 180 return numChunks; 181 } 182 183 public void enableTestingStats() { 184 numQueriesPerChunk = new long[numChunks]; 185 numPositivesPerChunk = new long[numChunks]; 186 } 187 188 public String formatTestingStats() { 189 StringBuilder sb = new StringBuilder(); 190 for (int i = 0; i < numChunks; ++i) { 191 sb.append("chunk #"); 192 sb.append(i); 193 sb.append(": queries="); 194 sb.append(numQueriesPerChunk[i]); 195 sb.append(", positives="); 196 sb.append(numPositivesPerChunk[i]); 197 sb.append(", positiveRatio="); 198 sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]); 199 sb.append(";\n"); 200 } 201 return sb.toString(); 202 } 203 204 public long getNumQueriesForTesting(int chunk) { 205 return numQueriesPerChunk[chunk]; 206 } 207 208 public long getNumPositivesForTesting(int chunk) { 209 return numPositivesPerChunk[chunk]; 210 } 211 212 @Override 213 public String toString() { 214 StringBuilder sb = new StringBuilder(); 215 sb.append(BloomFilterUtil.formatStats(this)); 216 sb.append(BloomFilterUtil.STATS_RECORD_SEP + "Number of chunks: " + numChunks); 217 sb.append(BloomFilterUtil.STATS_RECORD_SEP + ((comparator != null) 218 ? "Comparator: " + comparator.getClass().getSimpleName() 219 : "Comparator: " + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName())); 220 return sb.toString(); 221 } 222 223 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST) 224 public HFileBlockIndex.BlockIndexReader getBloomIndex() { 225 return index; 226 } 227 228 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST) 229 public int getHashCount() { 230 return hashCount; 231 } 232 233 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST) 234 public Hash getHash() { 235 return hash; 236 } 237}