001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import java.io.DataInput; 021import java.io.IOException; 022import org.apache.hadoop.hbase.Cell; 023import org.apache.hadoop.hbase.nio.ByteBuff; 024import org.apache.hadoop.hbase.regionserver.BloomType; 025import org.apache.hadoop.hbase.util.BloomFilter; 026import org.apache.hadoop.hbase.util.BloomFilterUtil; 027import org.apache.hadoop.hbase.util.Bytes; 028import org.apache.hadoop.hbase.util.Hash; 029import org.apache.yetus.audience.InterfaceAudience; 030 031/** 032 * A Bloom filter implementation built on top of 033 * {@link org.apache.hadoop.hbase.util.BloomFilterChunk}, encapsulating a set of fixed-size Bloom 034 * filters written out at the time of {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into 035 * the data block stream, and loaded on demand at query time. This class only provides reading 036 * capabilities. 037 */ 038@InterfaceAudience.Private 039public class CompoundBloomFilter extends CompoundBloomFilterBase implements BloomFilter { 040 041 /** Used to load chunks on demand */ 042 private HFile.Reader reader; 043 private final BloomFilterMetrics metrics; 044 045 private HFileBlockIndex.BlockIndexReader index; 046 047 private int hashCount; 048 private Hash hash; 049 050 private long[] numQueriesPerChunk; 051 private long[] numPositivesPerChunk; 052 053 /** 054 * De-serialization for compound Bloom filter metadata. Must be consistent with what 055 * {@link CompoundBloomFilterWriter} does. 056 * @param meta serialized Bloom filter metadata without any magic blocks 057 * @param reader reader for hfile 058 * @param metrics for collecting bloom filter metrics. may be null 059 */ 060 public CompoundBloomFilter(DataInput meta, HFile.Reader reader, BloomFilterMetrics metrics) 061 throws IOException { 062 this.reader = reader; 063 this.metrics = metrics; 064 065 totalByteSize = meta.readLong(); 066 hashCount = meta.readInt(); 067 hashType = meta.readInt(); 068 totalKeyCount = meta.readLong(); 069 totalMaxKeys = meta.readLong(); 070 numChunks = meta.readInt(); 071 byte[] comparatorClassName = Bytes.readByteArray(meta); 072 // The writer would have return 0 as the vint length for the case of 073 // Bytes.BYTES_RAWCOMPARATOR. In such cases do not initialize comparator, it can be 074 // null 075 if (comparatorClassName.length != 0) { 076 comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName)); 077 } 078 079 hash = Hash.getInstance(hashType); 080 if (hash == null) { 081 throw new IllegalArgumentException("Invalid hash type: " + hashType); 082 } 083 // We will pass null for ROW block 084 if (comparator == null) { 085 index = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1); 086 } else { 087 index = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, 1); 088 } 089 index.readRootIndex(meta, numChunks); 090 } 091 092 @Override 093 public boolean contains(byte[] key, int keyOffset, int keyLength, ByteBuff bloom) { 094 boolean result = containsInternal(key, keyOffset, keyLength, bloom); 095 if (metrics != null) { 096 metrics.incrementRequests(result); 097 } 098 return result; 099 } 100 101 private boolean containsInternal(byte[] key, int keyOffset, int keyLength, ByteBuff bloom) { 102 int block = index.rootBlockContainingKey(key, keyOffset, keyLength); 103 if (block < 0) { 104 return false; // This key is not in the file. 105 } 106 boolean result; 107 HFileBlock bloomBlock = getBloomBlock(block); 108 try { 109 ByteBuff bloomBuf = bloomBlock.getBufferReadOnly(); 110 result = BloomFilterUtil.contains(key, keyOffset, keyLength, bloomBuf, 111 bloomBlock.headerSize(), bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount); 112 } finally { 113 // After the use, should release the block to deallocate byte buffers. 114 bloomBlock.release(); 115 } 116 if (numPositivesPerChunk != null && result) { 117 // Update statistics. Only used in unit tests. 118 ++numPositivesPerChunk[block]; 119 } 120 return result; 121 } 122 123 private HFileBlock getBloomBlock(int block) { 124 HFileBlock bloomBlock; 125 try { 126 // We cache the block and use a positional read. 127 bloomBlock = reader.readBlock(index.getRootBlockOffset(block), 128 index.getRootBlockDataSize(block), true, true, false, true, BlockType.BLOOM_CHUNK, null); 129 } catch (IOException ex) { 130 // The Bloom filter is broken, turn it off. 131 throw new IllegalArgumentException("Failed to load Bloom block", ex); 132 } 133 134 if (numQueriesPerChunk != null) { 135 // Update statistics. Only used in unit tests. 136 ++numQueriesPerChunk[block]; 137 } 138 return bloomBlock; 139 } 140 141 @Override 142 public boolean contains(Cell keyCell, ByteBuff bloom, BloomType type) { 143 boolean result = containsInternal(keyCell, bloom, type); 144 if (metrics != null) { 145 metrics.incrementRequests(result); 146 } 147 return result; 148 } 149 150 private boolean containsInternal(Cell keyCell, ByteBuff bloom, BloomType type) { 151 int block = index.rootBlockContainingKey(keyCell); 152 if (block < 0) { 153 return false; // This key is not in the file. 154 } 155 boolean result; 156 HFileBlock bloomBlock = getBloomBlock(block); 157 try { 158 ByteBuff bloomBuf = bloomBlock.getBufferReadOnly(); 159 result = BloomFilterUtil.contains(keyCell, bloomBuf, bloomBlock.headerSize(), 160 bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount, type); 161 } finally { 162 // After the use, should release the block to deallocate the byte buffers. 163 bloomBlock.release(); 164 } 165 if (numPositivesPerChunk != null && result) { 166 // Update statistics. Only used in unit tests. 167 ++numPositivesPerChunk[block]; 168 } 169 return result; 170 } 171 172 @Override 173 public boolean supportsAutoLoading() { 174 return true; 175 } 176 177 public int getNumChunks() { 178 return numChunks; 179 } 180 181 public void enableTestingStats() { 182 numQueriesPerChunk = new long[numChunks]; 183 numPositivesPerChunk = new long[numChunks]; 184 } 185 186 public String formatTestingStats() { 187 StringBuilder sb = new StringBuilder(); 188 for (int i = 0; i < numChunks; ++i) { 189 sb.append("chunk #"); 190 sb.append(i); 191 sb.append(": queries="); 192 sb.append(numQueriesPerChunk[i]); 193 sb.append(", positives="); 194 sb.append(numPositivesPerChunk[i]); 195 sb.append(", positiveRatio="); 196 sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]); 197 sb.append(";\n"); 198 } 199 return sb.toString(); 200 } 201 202 public long getNumQueriesForTesting(int chunk) { 203 return numQueriesPerChunk[chunk]; 204 } 205 206 public long getNumPositivesForTesting(int chunk) { 207 return numPositivesPerChunk[chunk]; 208 } 209 210 @Override 211 public String toString() { 212 StringBuilder sb = new StringBuilder(); 213 sb.append(BloomFilterUtil.formatStats(this)); 214 sb.append(BloomFilterUtil.STATS_RECORD_SEP + "Number of chunks: " + numChunks); 215 sb.append(BloomFilterUtil.STATS_RECORD_SEP + ((comparator != null) 216 ? "Comparator: " + comparator.getClass().getSimpleName() 217 : "Comparator: " + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName())); 218 return sb.toString(); 219 } 220 221}