View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.util;
21  
22  import java.io.DataInput;
23  import java.io.IOException;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.CellComparator;
30  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
31  import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilter;
32  import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterBase;
33  import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter;
34  import org.apache.hadoop.hbase.io.hfile.HFile;
35  import org.apache.hadoop.hbase.regionserver.BloomType;
36  
37  /**
38   * Handles Bloom filter initialization based on configuration and serialized
39   * metadata in the reader and writer of {@link org.apache.hadoop.hbase.regionserver.StoreFile}.
40   */
41  @InterfaceAudience.Private
42  public final class BloomFilterFactory {
43  
44    private static final Log LOG =
45        LogFactory.getLog(BloomFilterFactory.class.getName());
46  
47    /** This class should not be instantiated. */
48    private BloomFilterFactory() {}
49  
50    /**
51     * Specifies the target error rate to use when selecting the number of keys
52     * per Bloom filter.
53     */
54    public static final String IO_STOREFILE_BLOOM_ERROR_RATE =
55        "io.storefile.bloom.error.rate";
56  
57    /**
58     * Maximum folding factor allowed. The Bloom filter will be shrunk by
59     * the factor of up to 2 ** this times if we oversize it initially.
60     */
61    public static final String IO_STOREFILE_BLOOM_MAX_FOLD =
62        "io.storefile.bloom.max.fold";
63  
64    /**
65     * For default (single-block) Bloom filters this specifies the maximum number
66     * of keys.
67     */
68    public static final String IO_STOREFILE_BLOOM_MAX_KEYS =
69        "io.storefile.bloom.max.keys";
70  
71    /** Master switch to enable Bloom filters */
72    public static final String IO_STOREFILE_BLOOM_ENABLED =
73        "io.storefile.bloom.enabled";
74  
75    /** Master switch to enable Delete Family Bloom filters */
76    public static final String IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED =
77        "io.storefile.delete.family.bloom.enabled";
78  
79    /**
80     * Target Bloom block size. Bloom filter blocks of approximately this size
81     * are interleaved with data blocks.
82     */
83    public static final String IO_STOREFILE_BLOOM_BLOCK_SIZE =
84        "io.storefile.bloom.block.size";
85  
86    /** Maximum number of times a Bloom filter can be "folded" if oversized */
87    private static final int MAX_ALLOWED_FOLD_FACTOR = 7;
88  
89    /**
90     * Instantiates the correct Bloom filter class based on the version provided
91     * in the meta block data.
92     *
93     * @param meta the byte array holding the Bloom filter's metadata, including
94     *          version information
95     * @param reader the {@link HFile} reader to use to lazily load Bloom filter
96     *          blocks
97     * @return an instance of the correct type of Bloom filter
98     * @throws IllegalArgumentException
99     */
100   public static BloomFilter
101       createFromMeta(DataInput meta, HFile.Reader reader)
102       throws IllegalArgumentException, IOException {
103     int version = meta.readInt();
104     switch (version) {
105       case CompoundBloomFilterBase.VERSION:
106         return new CompoundBloomFilter(meta, reader);
107 
108       default:
109         throw new IllegalArgumentException(
110           "Bad bloom filter format version " + version
111         );
112     }
113   }
114 
115   /**
116    * @return true if general Bloom (Row or RowCol) filters are enabled in the
117    * given configuration
118    */
119   public static boolean isGeneralBloomEnabled(Configuration conf) {
120     return conf.getBoolean(IO_STOREFILE_BLOOM_ENABLED, true);
121   }
122 
123   /**
124    * @return true if Delete Family Bloom filters are enabled in the given configuration
125    */
126   public static boolean isDeleteFamilyBloomEnabled(Configuration conf) {
127     return conf.getBoolean(IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED, true);
128   }
129 
130   /**
131    * @return the Bloom filter error rate in the given configuration
132    */
133   public static float getErrorRate(Configuration conf) {
134     return conf.getFloat(IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
135   }
136 
137   /**
138    * @return the value for Bloom filter max fold in the given configuration
139    */
140   public static int getMaxFold(Configuration conf) {
141     return conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, MAX_ALLOWED_FOLD_FACTOR);
142   }
143 
144   /** @return the compound Bloom filter block size from the configuration */
145   public static int getBloomBlockSize(Configuration conf) {
146     return conf.getInt(IO_STOREFILE_BLOOM_BLOCK_SIZE, 128 * 1024);
147   }
148 
149   /**
150   * @return max key for the Bloom filter from the configuration
151   */
152   public static int getMaxKeys(Configuration conf) {
153     return conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS, 128 * 1000 * 1000);
154   }
155 
156   /**
157    * Creates a new general (Row or RowCol) Bloom filter at the time of
158    * {@link org.apache.hadoop.hbase.regionserver.StoreFile} writing.
159    *
160    * @param conf
161    * @param cacheConf
162    * @param bloomType
163    * @param maxKeys an estimate of the number of keys we expect to insert.
164    *        Irrelevant if compound Bloom filters are enabled.
165    * @param writer the HFile writer
166    * @return the new Bloom filter, or null in case Bloom filters are disabled
167    *         or when failed to create one.
168    */
169   public static BloomFilterWriter createGeneralBloomAtWrite(Configuration conf,
170       CacheConfig cacheConf, BloomType bloomType, int maxKeys,
171       HFile.Writer writer) {
172     if (!isGeneralBloomEnabled(conf)) {
173       LOG.trace("Bloom filters are disabled by configuration for "
174           + writer.getPath()
175           + (conf == null ? " (configuration is null)" : ""));
176       return null;
177     } else if (bloomType == BloomType.NONE) {
178       LOG.trace("Bloom filter is turned off for the column family");
179       return null;
180     }
181 
182     float err = getErrorRate(conf);
183 
184     // In case of row/column Bloom filter lookups, each lookup is an OR if two
185     // separate lookups. Therefore, if each lookup's false positive rate is p,
186     // the resulting false positive rate is err = 1 - (1 - p)^2, and
187     // p = 1 - sqrt(1 - err).
188     if (bloomType == BloomType.ROWCOL) {
189       err = (float) (1 - Math.sqrt(1 - err));
190     }
191 
192     int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD,
193         MAX_ALLOWED_FOLD_FACTOR);
194 
195     // Do we support compound bloom filters?
196     // In case of compound Bloom filters we ignore the maxKeys hint.
197     CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf),
198         err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(),
199         bloomType == BloomType.ROWCOL ? CellComparator.COMPARATOR : null);
200     writer.addInlineBlockWriter(bloomWriter);
201     return bloomWriter;
202   }
203 
204   /**
205    * Creates a new Delete Family Bloom filter at the time of
206    * {@link org.apache.hadoop.hbase.regionserver.StoreFile} writing.
207    * @param conf
208    * @param cacheConf
209    * @param maxKeys an estimate of the number of keys we expect to insert.
210    *        Irrelevant if compound Bloom filters are enabled.
211    * @param writer the HFile writer
212    * @return the new Bloom filter, or null in case Bloom filters are disabled
213    *         or when failed to create one.
214    */
215   public static BloomFilterWriter createDeleteBloomAtWrite(Configuration conf,
216       CacheConfig cacheConf, int maxKeys, HFile.Writer writer) {
217     if (!isDeleteFamilyBloomEnabled(conf)) {
218       LOG.info("Delete Bloom filters are disabled by configuration for "
219           + writer.getPath()
220           + (conf == null ? " (configuration is null)" : ""));
221       return null;
222     }
223 
224     float err = getErrorRate(conf);
225 
226     int maxFold = getMaxFold(conf);
227     // In case of compound Bloom filters we ignore the maxKeys hint.
228     CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf),
229         err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(),
230         null);
231     writer.addInlineBlockWriter(bloomWriter);
232     return bloomWriter;
233   }
234 };