001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import java.io.DataInput; 021import java.io.IOException; 022import org.apache.hadoop.conf.Configuration; 023import org.apache.hadoop.hbase.CellComparatorImpl; 024import org.apache.hadoop.hbase.io.hfile.BloomFilterMetrics; 025import org.apache.hadoop.hbase.io.hfile.CacheConfig; 026import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilter; 027import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterBase; 028import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter; 029import org.apache.hadoop.hbase.io.hfile.HFile; 030import org.apache.hadoop.hbase.regionserver.BloomType; 031import org.apache.yetus.audience.InterfaceAudience; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035/** 036 * Handles Bloom filter initialization based on configuration and serialized metadata in the reader 037 * and writer of {@link org.apache.hadoop.hbase.regionserver.HStoreFile}. 038 */ 039@InterfaceAudience.Private 040public final class BloomFilterFactory { 041 042 private static final Logger LOG = LoggerFactory.getLogger(BloomFilterFactory.class.getName()); 043 044 /** This class should not be instantiated. */ 045 private BloomFilterFactory() { 046 } 047 048 /** 049 * Specifies the target error rate to use when selecting the number of keys per Bloom filter. 050 */ 051 public static final String IO_STOREFILE_BLOOM_ERROR_RATE = "io.storefile.bloom.error.rate"; 052 053 /** 054 * Maximum folding factor allowed. The Bloom filter will be shrunk by the factor of up to 2 ** 055 * this times if we oversize it initially. 056 */ 057 public static final String IO_STOREFILE_BLOOM_MAX_FOLD = "io.storefile.bloom.max.fold"; 058 059 /** 060 * For default (single-block) Bloom filters this specifies the maximum number of keys. 061 */ 062 public static final String IO_STOREFILE_BLOOM_MAX_KEYS = "io.storefile.bloom.max.keys"; 063 064 /** Master switch to enable Bloom filters */ 065 public static final String IO_STOREFILE_BLOOM_ENABLED = "io.storefile.bloom.enabled"; 066 067 /** Master switch to enable Delete Family Bloom filters */ 068 public static final String IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED = 069 "io.storefile.delete.family.bloom.enabled"; 070 071 /** 072 * Target Bloom block size. Bloom filter blocks of approximately this size are interleaved with 073 * data blocks. 074 */ 075 public static final String IO_STOREFILE_BLOOM_BLOCK_SIZE = "io.storefile.bloom.block.size"; 076 077 /** Maximum number of times a Bloom filter can be "folded" if oversized */ 078 private static final int MAX_ALLOWED_FOLD_FACTOR = 7; 079 080 /** 081 * Instantiates the correct Bloom filter class based on the version provided in the meta block 082 * data. 083 * @param meta the byte array holding the Bloom filter's metadata, including version information 084 * @param reader the {@link HFile} reader to use to lazily load Bloom filter blocks 085 * @return an instance of the correct type of Bloom filter 086 */ 087 public static BloomFilter createFromMeta(DataInput meta, HFile.Reader reader) 088 throws IllegalArgumentException, IOException { 089 return createFromMeta(meta, reader, null); 090 } 091 092 public static BloomFilter createFromMeta(DataInput meta, HFile.Reader reader, 093 BloomFilterMetrics metrics) throws IllegalArgumentException, IOException { 094 int version = meta.readInt(); 095 switch (version) { 096 case CompoundBloomFilterBase.VERSION: 097 return new CompoundBloomFilter(meta, reader, metrics); 098 099 default: 100 throw new IllegalArgumentException("Bad bloom filter format version " + version); 101 } 102 } 103 104 /** 105 * Returns true if general Bloom (Row or RowCol) filters are enabled in the given configuration 106 */ 107 public static boolean isGeneralBloomEnabled(Configuration conf) { 108 return conf.getBoolean(IO_STOREFILE_BLOOM_ENABLED, true); 109 } 110 111 /** Returns true if Delete Family Bloom filters are enabled in the given configuration */ 112 public static boolean isDeleteFamilyBloomEnabled(Configuration conf) { 113 return conf.getBoolean(IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED, true); 114 } 115 116 /** Returns the Bloom filter error rate in the given configuration */ 117 public static float getErrorRate(Configuration conf) { 118 return conf.getFloat(IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); 119 } 120 121 /** Returns the value for Bloom filter max fold in the given configuration */ 122 public static int getMaxFold(Configuration conf) { 123 return conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, MAX_ALLOWED_FOLD_FACTOR); 124 } 125 126 /** Returns the compound Bloom filter block size from the configuration */ 127 public static int getBloomBlockSize(Configuration conf) { 128 return conf.getInt(IO_STOREFILE_BLOOM_BLOCK_SIZE, 128 * 1024); 129 } 130 131 /** Returns max key for the Bloom filter from the configuration */ 132 public static int getMaxKeys(Configuration conf) { 133 return conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS, 128 * 1000 * 1000); 134 } 135 136 /** 137 * Creates a new general (Row or RowCol) Bloom filter at the time of 138 * {@link org.apache.hadoop.hbase.regionserver.HStoreFile} writing. 139 * @param maxKeys an estimate of the number of keys we expect to insert. Irrelevant if compound 140 * Bloom filters are enabled. 141 * @param writer the HFile writer 142 * @return the new Bloom filter, or null in case Bloom filters are disabled or when failed to 143 * create one. 144 */ 145 public static BloomFilterWriter createGeneralBloomAtWrite(Configuration conf, 146 CacheConfig cacheConf, BloomType bloomType, int maxKeys, HFile.Writer writer) { 147 if (!isGeneralBloomEnabled(conf)) { 148 LOG.trace("Bloom filters are disabled by configuration for " + writer.getPath() 149 + (conf == null ? " (configuration is null)" : "")); 150 return null; 151 } else if (bloomType == BloomType.NONE) { 152 LOG.trace("Bloom filter is turned off for the column family"); 153 return null; 154 } 155 156 float err = getErrorRate(conf); 157 158 // In case of row/column Bloom filter lookups, each lookup is an OR if two 159 // separate lookups. Therefore, if each lookup's false positive rate is p, 160 // the resulting false positive rate is err = 1 - (1 - p)^2, and 161 // p = 1 - sqrt(1 - err). 162 if (bloomType == BloomType.ROWCOL) { 163 err = (float) (1 - Math.sqrt(1 - err)); 164 } 165 166 int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, MAX_ALLOWED_FOLD_FACTOR); 167 168 // Do we support compound bloom filters? 169 // In case of compound Bloom filters we ignore the maxKeys hint. 170 CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf), 171 err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(), 172 bloomType == BloomType.ROWCOL ? CellComparatorImpl.COMPARATOR : null, bloomType); 173 writer.addInlineBlockWriter(bloomWriter); 174 return bloomWriter; 175 } 176 177 /** 178 * Creates a new Delete Family Bloom filter at the time of 179 * {@link org.apache.hadoop.hbase.regionserver.HStoreFile} writing. 180 * @param maxKeys an estimate of the number of keys we expect to insert. Irrelevant if compound 181 * Bloom filters are enabled. 182 * @param writer the HFile writer 183 * @return the new Bloom filter, or null in case Bloom filters are disabled or when failed to 184 * create one. 185 */ 186 public static BloomFilterWriter createDeleteBloomAtWrite(Configuration conf, 187 CacheConfig cacheConf, int maxKeys, HFile.Writer writer) { 188 if (!isDeleteFamilyBloomEnabled(conf)) { 189 LOG.info("Delete Bloom filters are disabled by configuration for " + writer.getPath() 190 + (conf == null ? " (configuration is null)" : "")); 191 return null; 192 } 193 194 float err = getErrorRate(conf); 195 196 int maxFold = getMaxFold(conf); 197 // In case of compound Bloom filters we ignore the maxKeys hint. 198 CompoundBloomFilterWriter bloomWriter = 199 new CompoundBloomFilterWriter(getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold, 200 cacheConf.shouldCacheBloomsOnWrite(), null, BloomType.ROW); 201 writer.addInlineBlockWriter(bloomWriter); 202 return bloomWriter; 203 } 204}