View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.util;
21  
22  import java.io.DataInput;
23  import java.io.IOException;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.KeyValue;
30  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
31  import org.apache.hadoop.hbase.io.hfile.HFile;
32  import org.apache.hadoop.hbase.regionserver.BloomType;
33  
34  /**
35   * Handles Bloom filter initialization based on configuration and serialized
36   * metadata in the reader and writer of {@link org.apache.hadoop.hbase.regionserver.StoreFile}.
37   */
38  @InterfaceAudience.Private
39  public final class BloomFilterFactory {
40  
41    private static final Log LOG =
42        LogFactory.getLog(BloomFilterFactory.class.getName());
43  
44    /** This class should not be instantiated. */
45    private BloomFilterFactory() {}
46  
47    /**
48     * Specifies the target error rate to use when selecting the number of keys
49     * per Bloom filter.
50     */
51    public static final String IO_STOREFILE_BLOOM_ERROR_RATE =
52        "io.storefile.bloom.error.rate";
53  
54    /**
55     * Maximum folding factor allowed. The Bloom filter will be shrunk by
56     * the factor of up to 2 ** this times if we oversize it initially.
57     */
58    public static final String IO_STOREFILE_BLOOM_MAX_FOLD =
59        "io.storefile.bloom.max.fold";
60  
61    /**
62     * For default (single-block) Bloom filters this specifies the maximum number
63     * of keys.
64     */
65    public static final String IO_STOREFILE_BLOOM_MAX_KEYS =
66        "io.storefile.bloom.max.keys";
67  
68    /** Master switch to enable Bloom filters */
69    public static final String IO_STOREFILE_BLOOM_ENABLED =
70        "io.storefile.bloom.enabled";
71  
72    /** Master switch to enable Delete Family Bloom filters */
73    public static final String IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED =
74        "io.storefile.delete.family.bloom.enabled";
75  
76    /**
77     * Target Bloom block size. Bloom filter blocks of approximately this size
78     * are interleaved with data blocks.
79     */
80    public static final String IO_STOREFILE_BLOOM_BLOCK_SIZE =
81        "io.storefile.bloom.block.size";
82  
83    /** Maximum number of times a Bloom filter can be "folded" if oversized */
84    private static final int MAX_ALLOWED_FOLD_FACTOR = 7;
85  
86    /**
87     * Instantiates the correct Bloom filter class based on the version provided
88     * in the meta block data.
89     *
90     * @param meta the byte array holding the Bloom filter's metadata, including
91     *          version information
92     * @param reader the {@link HFile} reader to use to lazily load Bloom filter
93     *          blocks
94     * @return an instance of the correct type of Bloom filter
95     * @throws IllegalArgumentException
96     */
97    public static BloomFilter
98        createFromMeta(DataInput meta, HFile.Reader reader)
99        throws IllegalArgumentException, IOException {
100     int version = meta.readInt();
101     switch (version) {
102       case ByteBloomFilter.VERSION:
103         // This is only possible in a version 1 HFile. We are ignoring the
104         // passed comparator because raw byte comparators are always used
105         // in version 1 Bloom filters.
106         return new ByteBloomFilter(meta);
107 
108       case CompoundBloomFilterBase.VERSION:
109         return new CompoundBloomFilter(meta, reader);
110 
111       default:
112         throw new IllegalArgumentException(
113           "Bad bloom filter format version " + version
114         );
115     }
116   }
117 
118   /**
119    * @return true if general Bloom (Row or RowCol) filters are enabled in the
120    * given configuration
121    */
122   public static boolean isGeneralBloomEnabled(Configuration conf) {
123     return conf.getBoolean(IO_STOREFILE_BLOOM_ENABLED, true);
124   }
125 
126   /**
127    * @return true if Delete Family Bloom filters are enabled in the given configuration
128    */
129   public static boolean isDeleteFamilyBloomEnabled(Configuration conf) {
130     return conf.getBoolean(IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED, true);
131   }
132 
133   /**
134    * @return the Bloom filter error rate in the given configuration
135    */
136   public static float getErrorRate(Configuration conf) {
137     return conf.getFloat(IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
138   }
139 
140   /**
141    * @return the value for Bloom filter max fold in the given configuration
142    */
143   public static int getMaxFold(Configuration conf) {
144     return conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, MAX_ALLOWED_FOLD_FACTOR);
145   }
146 
147   /** @return the compound Bloom filter block size from the configuration */
148   public static int getBloomBlockSize(Configuration conf) {
149     return conf.getInt(IO_STOREFILE_BLOOM_BLOCK_SIZE, 128 * 1024);
150   }
151 
152   /**
153   * @return max key for the Bloom filter from the configuration
154   */
155   public static int getMaxKeys(Configuration conf) {
156     return conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS, 128 * 1000 * 1000);
157   }
158 
159   /**
160    * Creates a new general (Row or RowCol) Bloom filter at the time of
161    * {@link org.apache.hadoop.hbase.regionserver.StoreFile} writing.
162    *
163    * @param conf
164    * @param cacheConf
165    * @param bloomType
166    * @param maxKeys an estimate of the number of keys we expect to insert.
167    *        Irrelevant if compound Bloom filters are enabled.
168    * @param writer the HFile writer
169    * @return the new Bloom filter, or null in case Bloom filters are disabled
170    *         or when failed to create one.
171    */
172   public static BloomFilterWriter createGeneralBloomAtWrite(Configuration conf,
173       CacheConfig cacheConf, BloomType bloomType, int maxKeys,
174       HFile.Writer writer) {
175     if (!isGeneralBloomEnabled(conf)) {
176       LOG.trace("Bloom filters are disabled by configuration for "
177           + writer.getPath()
178           + (conf == null ? " (configuration is null)" : ""));
179       return null;
180     } else if (bloomType == BloomType.NONE) {
181       LOG.trace("Bloom filter is turned off for the column family");
182       return null;
183     }
184 
185     float err = getErrorRate(conf);
186 
187     // In case of row/column Bloom filter lookups, each lookup is an OR if two
188     // separate lookups. Therefore, if each lookup's false positive rate is p,
189     // the resulting false positive rate is err = 1 - (1 - p)^2, and
190     // p = 1 - sqrt(1 - err).
191     if (bloomType == BloomType.ROWCOL) {
192       err = (float) (1 - Math.sqrt(1 - err));
193     }
194 
195     int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD,
196         MAX_ALLOWED_FOLD_FACTOR);
197 
198     // Do we support compound bloom filters?
199     // In case of compound Bloom filters we ignore the maxKeys hint.
200     CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf),
201         err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(),
202         bloomType == BloomType.ROWCOL ? KeyValue.COMPARATOR : KeyValue.RAW_COMPARATOR);
203     writer.addInlineBlockWriter(bloomWriter);
204     return bloomWriter;
205   }
206 
207   /**
208    * Creates a new Delete Family Bloom filter at the time of
209    * {@link org.apache.hadoop.hbase.regionserver.StoreFile} writing.
210    * @param conf
211    * @param cacheConf
212    * @param maxKeys an estimate of the number of keys we expect to insert.
213    *        Irrelevant if compound Bloom filters are enabled.
214    * @param writer the HFile writer
215    * @return the new Bloom filter, or null in case Bloom filters are disabled
216    *         or when failed to create one.
217    */
218   public static BloomFilterWriter createDeleteBloomAtWrite(Configuration conf,
219       CacheConfig cacheConf, int maxKeys, HFile.Writer writer) {
220     if (!isDeleteFamilyBloomEnabled(conf)) {
221       LOG.info("Delete Bloom filters are disabled by configuration for "
222           + writer.getPath()
223           + (conf == null ? " (configuration is null)" : ""));
224       return null;
225     }
226 
227     float err = getErrorRate(conf);
228 
229     int maxFold = getMaxFold(conf);
230     // In case of compound Bloom filters we ignore the maxKeys hint.
231     CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf),
232         err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(),
233         KeyValue.RAW_COMPARATOR);
234     writer.addInlineBlockWriter(bloomWriter);
235     return bloomWriter;
236   }
237 };