001/* 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020package org.apache.hadoop.hbase.io.hfile; 021 022import java.io.DataInput; 023import java.io.IOException; 024 025import org.apache.hadoop.hbase.Cell; 026import org.apache.yetus.audience.InterfaceAudience; 027import org.apache.hadoop.hbase.nio.ByteBuff; 028import org.apache.hadoop.hbase.regionserver.BloomType; 029import org.apache.hadoop.hbase.util.BloomFilter; 030import org.apache.hadoop.hbase.util.BloomFilterUtil; 031import org.apache.hadoop.hbase.util.Bytes; 032import org.apache.hadoop.hbase.util.Hash; 033 034/** 035 * A Bloom filter implementation built on top of 036 * {@link org.apache.hadoop.hbase.util.BloomFilterChunk}, encapsulating 037 * a set of fixed-size Bloom filters written out at the time of 038 * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data 039 * block stream, and loaded on demand at query time. This class only provides 040 * reading capabilities. 041 */ 042@InterfaceAudience.Private 043public class CompoundBloomFilter extends CompoundBloomFilterBase 044 implements BloomFilter { 045 046 /** Used to load chunks on demand */ 047 private HFile.Reader reader; 048 049 private HFileBlockIndex.BlockIndexReader index; 050 051 private int hashCount; 052 private Hash hash; 053 054 private long[] numQueriesPerChunk; 055 private long[] numPositivesPerChunk; 056 057 /** 058 * De-serialization for compound Bloom filter metadata. Must be consistent 059 * with what {@link CompoundBloomFilterWriter} does. 060 * 061 * @param meta serialized Bloom filter metadata without any magic blocks 062 * @throws IOException 063 */ 064 public CompoundBloomFilter(DataInput meta, HFile.Reader reader) 065 throws IOException { 066 this.reader = reader; 067 068 totalByteSize = meta.readLong(); 069 hashCount = meta.readInt(); 070 hashType = meta.readInt(); 071 totalKeyCount = meta.readLong(); 072 totalMaxKeys = meta.readLong(); 073 numChunks = meta.readInt(); 074 byte[] comparatorClassName = Bytes.readByteArray(meta); 075 // The writer would have return 0 as the vint length for the case of 076 // Bytes.BYTES_RAWCOMPARATOR. In such cases do not initialize comparator, it can be 077 // null 078 if (comparatorClassName.length != 0) { 079 comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName)); 080 } 081 082 hash = Hash.getInstance(hashType); 083 if (hash == null) { 084 throw new IllegalArgumentException("Invalid hash type: " + hashType); 085 } 086 // We will pass null for ROW block 087 if(comparator == null) { 088 index = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1); 089 } else { 090 index = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, 1); 091 } 092 index.readRootIndex(meta, numChunks); 093 } 094 095 @Override 096 public boolean contains(byte[] key, int keyOffset, int keyLength, ByteBuff bloom) { 097 int block = index.rootBlockContainingKey(key, keyOffset, keyLength); 098 if (block < 0) { 099 return false; // This key is not in the file. 100 } 101 boolean result; 102 HFileBlock bloomBlock = getBloomBlock(block); 103 try { 104 ByteBuff bloomBuf = bloomBlock.getBufferReadOnly(); 105 result = BloomFilterUtil.contains(key, keyOffset, keyLength, bloomBuf, 106 bloomBlock.headerSize(), bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount); 107 } finally { 108 // After the use, should release the block to deallocate byte buffers. 109 bloomBlock.release(); 110 } 111 if (numPositivesPerChunk != null && result) { 112 // Update statistics. Only used in unit tests. 113 ++numPositivesPerChunk[block]; 114 } 115 return result; 116 } 117 118 private HFileBlock getBloomBlock(int block) { 119 HFileBlock bloomBlock; 120 try { 121 // We cache the block and use a positional read. 122 bloomBlock = reader.readBlock(index.getRootBlockOffset(block), 123 index.getRootBlockDataSize(block), true, true, false, true, BlockType.BLOOM_CHUNK, null); 124 } catch (IOException ex) { 125 // The Bloom filter is broken, turn it off. 126 throw new IllegalArgumentException("Failed to load Bloom block", ex); 127 } 128 129 if (numQueriesPerChunk != null) { 130 // Update statistics. Only used in unit tests. 131 ++numQueriesPerChunk[block]; 132 } 133 return bloomBlock; 134 } 135 136 @Override 137 public boolean contains(Cell keyCell, ByteBuff bloom, BloomType type) { 138 int block = index.rootBlockContainingKey(keyCell); 139 if (block < 0) { 140 return false; // This key is not in the file. 141 } 142 boolean result; 143 HFileBlock bloomBlock = getBloomBlock(block); 144 try { 145 ByteBuff bloomBuf = bloomBlock.getBufferReadOnly(); 146 result = BloomFilterUtil.contains(keyCell, bloomBuf, bloomBlock.headerSize(), 147 bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount, type); 148 } finally { 149 // After the use, should release the block to deallocate the byte buffers. 150 bloomBlock.release(); 151 } 152 if (numPositivesPerChunk != null && result) { 153 // Update statistics. Only used in unit tests. 154 ++numPositivesPerChunk[block]; 155 } 156 return result; 157 } 158 159 @Override 160 public boolean supportsAutoLoading() { 161 return true; 162 } 163 164 public int getNumChunks() { 165 return numChunks; 166 } 167 168 public void enableTestingStats() { 169 numQueriesPerChunk = new long[numChunks]; 170 numPositivesPerChunk = new long[numChunks]; 171 } 172 173 public String formatTestingStats() { 174 StringBuilder sb = new StringBuilder(); 175 for (int i = 0; i < numChunks; ++i) { 176 sb.append("chunk #"); 177 sb.append(i); 178 sb.append(": queries="); 179 sb.append(numQueriesPerChunk[i]); 180 sb.append(", positives="); 181 sb.append(numPositivesPerChunk[i]); 182 sb.append(", positiveRatio="); 183 sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]); 184 sb.append(";\n"); 185 } 186 return sb.toString(); 187 } 188 189 public long getNumQueriesForTesting(int chunk) { 190 return numQueriesPerChunk[chunk]; 191 } 192 193 public long getNumPositivesForTesting(int chunk) { 194 return numPositivesPerChunk[chunk]; 195 } 196 197 @Override 198 public String toString() { 199 StringBuilder sb = new StringBuilder(); 200 sb.append(BloomFilterUtil.formatStats(this)); 201 sb.append(BloomFilterUtil.STATS_RECORD_SEP + 202 "Number of chunks: " + numChunks); 203 sb.append(BloomFilterUtil.STATS_RECORD_SEP + 204 ((comparator != null) ? "Comparator: " 205 + comparator.getClass().getSimpleName() : "Comparator: " 206 + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName())); 207 return sb.toString(); 208 } 209 210}