001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.io.DataInput;
021import java.io.IOException;
022import org.apache.hadoop.conf.Configuration;
023import org.apache.hadoop.hbase.CellComparatorImpl;
024import org.apache.hadoop.hbase.io.hfile.BloomFilterMetrics;
025import org.apache.hadoop.hbase.io.hfile.CacheConfig;
026import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilter;
027import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterBase;
028import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter;
029import org.apache.hadoop.hbase.io.hfile.HFile;
030import org.apache.hadoop.hbase.regionserver.BloomType;
031import org.apache.yetus.audience.InterfaceAudience;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035/**
036 * Handles Bloom filter initialization based on configuration and serialized metadata in the reader
037 * and writer of {@link org.apache.hadoop.hbase.regionserver.HStoreFile}.
038 */
039@InterfaceAudience.Private
040public final class BloomFilterFactory {
041
042  private static final Logger LOG = LoggerFactory.getLogger(BloomFilterFactory.class.getName());
043
044  /** This class should not be instantiated. */
045  private BloomFilterFactory() {
046  }
047
048  /**
049   * Specifies the target error rate to use when selecting the number of keys per Bloom filter.
050   */
051  public static final String IO_STOREFILE_BLOOM_ERROR_RATE = "io.storefile.bloom.error.rate";
052
053  /**
054   * Maximum folding factor allowed. The Bloom filter will be shrunk by the factor of up to 2 **
055   * this times if we oversize it initially.
056   */
057  public static final String IO_STOREFILE_BLOOM_MAX_FOLD = "io.storefile.bloom.max.fold";
058
059  /**
060   * For default (single-block) Bloom filters this specifies the maximum number of keys.
061   */
062  public static final String IO_STOREFILE_BLOOM_MAX_KEYS = "io.storefile.bloom.max.keys";
063
064  /** Master switch to enable Bloom filters */
065  public static final String IO_STOREFILE_BLOOM_ENABLED = "io.storefile.bloom.enabled";
066
067  /** Master switch to enable Delete Family Bloom filters */
068  public static final String IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED =
069    "io.storefile.delete.family.bloom.enabled";
070
071  /**
072   * Target Bloom block size. Bloom filter blocks of approximately this size are interleaved with
073   * data blocks.
074   */
075  public static final String IO_STOREFILE_BLOOM_BLOCK_SIZE = "io.storefile.bloom.block.size";
076
077  /** Maximum number of times a Bloom filter can be "folded" if oversized */
078  private static final int MAX_ALLOWED_FOLD_FACTOR = 7;
079
080  /**
081   * Instantiates the correct Bloom filter class based on the version provided in the meta block
082   * data.
083   * @param meta   the byte array holding the Bloom filter's metadata, including version information
084   * @param reader the {@link HFile} reader to use to lazily load Bloom filter blocks
085   * @return an instance of the correct type of Bloom filter
086   */
087  public static BloomFilter createFromMeta(DataInput meta, HFile.Reader reader)
088    throws IllegalArgumentException, IOException {
089    return createFromMeta(meta, reader, null);
090  }
091
092  public static BloomFilter createFromMeta(DataInput meta, HFile.Reader reader,
093    BloomFilterMetrics metrics) throws IllegalArgumentException, IOException {
094    int version = meta.readInt();
095    switch (version) {
096      case CompoundBloomFilterBase.VERSION:
097        return new CompoundBloomFilter(meta, reader, metrics);
098
099      default:
100        throw new IllegalArgumentException("Bad bloom filter format version " + version);
101    }
102  }
103
104  /**
105   * Returns true if general Bloom (Row or RowCol) filters are enabled in the given configuration
106   */
107  public static boolean isGeneralBloomEnabled(Configuration conf) {
108    return conf.getBoolean(IO_STOREFILE_BLOOM_ENABLED, true);
109  }
110
111  /** Returns true if Delete Family Bloom filters are enabled in the given configuration */
112  public static boolean isDeleteFamilyBloomEnabled(Configuration conf) {
113    return conf.getBoolean(IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED, true);
114  }
115
116  /** Returns the Bloom filter error rate in the given configuration */
117  public static float getErrorRate(Configuration conf) {
118    return conf.getFloat(IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
119  }
120
121  /** Returns the value for Bloom filter max fold in the given configuration */
122  public static int getMaxFold(Configuration conf) {
123    return conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, MAX_ALLOWED_FOLD_FACTOR);
124  }
125
126  /** Returns the compound Bloom filter block size from the configuration */
127  public static int getBloomBlockSize(Configuration conf) {
128    return conf.getInt(IO_STOREFILE_BLOOM_BLOCK_SIZE, 128 * 1024);
129  }
130
131  /** Returns max key for the Bloom filter from the configuration */
132  public static int getMaxKeys(Configuration conf) {
133    return conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS, 128 * 1000 * 1000);
134  }
135
136  /**
137   * Creates a new general (Row or RowCol) Bloom filter at the time of
138   * {@link org.apache.hadoop.hbase.regionserver.HStoreFile} writing.
139   * @param maxKeys an estimate of the number of keys we expect to insert. Irrelevant if compound
140   *                Bloom filters are enabled.
141   * @param writer  the HFile writer
142   * @return the new Bloom filter, or null in case Bloom filters are disabled or when failed to
143   *         create one.
144   */
145  public static BloomFilterWriter createGeneralBloomAtWrite(Configuration conf,
146    CacheConfig cacheConf, BloomType bloomType, int maxKeys, HFile.Writer writer) {
147    if (!isGeneralBloomEnabled(conf)) {
148      LOG.trace("Bloom filters are disabled by configuration for " + writer.getPath()
149        + (conf == null ? " (configuration is null)" : ""));
150      return null;
151    } else if (bloomType == BloomType.NONE) {
152      LOG.trace("Bloom filter is turned off for the column family");
153      return null;
154    }
155
156    float err = getErrorRate(conf);
157
158    // In case of row/column Bloom filter lookups, each lookup is an OR if two
159    // separate lookups. Therefore, if each lookup's false positive rate is p,
160    // the resulting false positive rate is err = 1 - (1 - p)^2, and
161    // p = 1 - sqrt(1 - err).
162    if (bloomType == BloomType.ROWCOL) {
163      err = (float) (1 - Math.sqrt(1 - err));
164    }
165
166    int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, MAX_ALLOWED_FOLD_FACTOR);
167
168    // Do we support compound bloom filters?
169    // In case of compound Bloom filters we ignore the maxKeys hint.
170    CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf),
171      err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(),
172      bloomType == BloomType.ROWCOL ? CellComparatorImpl.COMPARATOR : null, bloomType);
173    writer.addInlineBlockWriter(bloomWriter);
174    return bloomWriter;
175  }
176
177  /**
178   * Creates a new Delete Family Bloom filter at the time of
179   * {@link org.apache.hadoop.hbase.regionserver.HStoreFile} writing.
180   * @param maxKeys an estimate of the number of keys we expect to insert. Irrelevant if compound
181   *                Bloom filters are enabled.
182   * @param writer  the HFile writer
183   * @return the new Bloom filter, or null in case Bloom filters are disabled or when failed to
184   *         create one.
185   */
186  public static BloomFilterWriter createDeleteBloomAtWrite(Configuration conf,
187    CacheConfig cacheConf, int maxKeys, HFile.Writer writer) {
188    if (!isDeleteFamilyBloomEnabled(conf)) {
189      LOG.info("Delete Bloom filters are disabled by configuration for " + writer.getPath()
190        + (conf == null ? " (configuration is null)" : ""));
191      return null;
192    }
193
194    float err = getErrorRate(conf);
195
196    int maxFold = getMaxFold(conf);
197    // In case of compound Bloom filters we ignore the maxKeys hint.
198    CompoundBloomFilterWriter bloomWriter =
199      new CompoundBloomFilterWriter(getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold,
200        cacheConf.shouldCacheBloomsOnWrite(), null, BloomType.ROW);
201    writer.addInlineBlockWriter(bloomWriter);
202    return bloomWriter;
203  }
204}