001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.regionserver.compactions;
021
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.List;
025
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.hbase.regionserver.HStoreFile;
028import org.apache.hadoop.hbase.regionserver.StoreConfigInformation;
029import org.apache.yetus.audience.InterfaceAudience;
030import org.slf4j.Logger;
031import org.slf4j.LoggerFactory;
032
033/**
034 * Class to pick which files if any to compact together.
035 *
036 * This class will search all possibilities for different and if it gets stuck it will choose
037 * the smallest set of files to compact.
038 */
039@InterfaceAudience.Private
040public class ExploringCompactionPolicy extends RatioBasedCompactionPolicy {
041  private static final Logger LOG = LoggerFactory.getLogger(ExploringCompactionPolicy.class);
042
043  /**
044   * Constructor for ExploringCompactionPolicy.
045   * @param conf The configuration object
046   * @param storeConfigInfo An object to provide info about the store.
047   */
048  public ExploringCompactionPolicy(final Configuration conf,
049                                   final StoreConfigInformation storeConfigInfo) {
050    super(conf, storeConfigInfo);
051  }
052
053  @Override
054  protected final ArrayList<HStoreFile> applyCompactionPolicy(ArrayList<HStoreFile> candidates,
055      boolean mayUseOffPeak, boolean mightBeStuck) throws IOException {
056    return new ArrayList<>(applyCompactionPolicy(candidates, mightBeStuck, mayUseOffPeak,
057      comConf.getMinFilesToCompact(), comConf.getMaxFilesToCompact()));
058  }
059
060  public List<HStoreFile> applyCompactionPolicy(List<HStoreFile> candidates, boolean mightBeStuck,
061      boolean mayUseOffPeak, int minFiles, int maxFiles) {
062    final double currentRatio = mayUseOffPeak
063        ? comConf.getCompactionRatioOffPeak() : comConf.getCompactionRatio();
064
065    // Start off choosing nothing.
066    List<HStoreFile> bestSelection = new ArrayList<>(0);
067    List<HStoreFile> smallest = mightBeStuck ? new ArrayList<>(0) : null;
068    long bestSize = 0;
069    long smallestSize = Long.MAX_VALUE;
070
071    int opts = 0, optsInRatio = 0, bestStart = -1; // for debug logging
072    // Consider every starting place.
073    for (int start = 0; start < candidates.size(); start++) {
074      // Consider every different sub list permutation in between start and end with min files.
075      for (int currentEnd = start + minFiles - 1;
076          currentEnd < candidates.size(); currentEnd++) {
077        List<HStoreFile> potentialMatchFiles = candidates.subList(start, currentEnd + 1);
078
079        // Sanity checks
080        if (potentialMatchFiles.size() < minFiles) {
081          continue;
082        }
083        if (potentialMatchFiles.size() > maxFiles) {
084          continue;
085        }
086
087        // Compute the total size of files that will
088        // have to be read if this set of files is compacted.
089        long size = getTotalStoreSize(potentialMatchFiles);
090
091        // Store the smallest set of files.  This stored set of files will be used
092        // if it looks like the algorithm is stuck.
093        if (mightBeStuck && size < smallestSize) {
094          smallest = potentialMatchFiles;
095          smallestSize = size;
096        }
097
098        if (size > comConf.getMaxCompactSize(mayUseOffPeak)) {
099          continue;
100        }
101
102        ++opts;
103        if (size >= comConf.getMinCompactSize()
104            && !filesInRatio(potentialMatchFiles, currentRatio)) {
105          continue;
106        }
107
108        ++optsInRatio;
109        if (isBetterSelection(bestSelection, bestSize, potentialMatchFiles, size, mightBeStuck)) {
110          bestSelection = potentialMatchFiles;
111          bestSize = size;
112          bestStart = start;
113        }
114      }
115    }
116    if (bestSelection.isEmpty() && mightBeStuck) {
117      LOG.debug("Exploring compaction algorithm has selected " + smallest.size()
118          + " files of size "+ smallestSize + " because the store might be stuck");
119      return new ArrayList<>(smallest);
120    }
121    LOG.debug("Exploring compaction algorithm has selected {}  files of size {} starting at " +
122      "candidate #{} after considering {} permutations with {} in ratio", bestSelection.size(),
123      bestSize, bestSize, opts, optsInRatio);
124    return new ArrayList<>(bestSelection);
125  }
126
127  private boolean isBetterSelection(List<HStoreFile> bestSelection, long bestSize,
128      List<HStoreFile> selection, long size, boolean mightBeStuck) {
129    if (mightBeStuck && bestSize > 0 && size > 0) {
130      // Keep the selection that removes most files for least size. That penaltizes adding
131      // large files to compaction, but not small files, so we don't become totally inefficient
132      // (might want to tweak that in future). Also, given the current order of looking at
133      // permutations, prefer earlier files and smaller selection if the difference is small.
134      final double REPLACE_IF_BETTER_BY = 1.05;
135      double thresholdQuality = ((double)bestSelection.size() / bestSize) * REPLACE_IF_BETTER_BY;
136      return thresholdQuality < ((double)selection.size() / size);
137    }
138    // Keep if this gets rid of more files.  Or the same number of files for less io.
139    return selection.size() > bestSelection.size()
140      || (selection.size() == bestSelection.size() && size < bestSize);
141  }
142
143  /**
144   * Find the total size of a list of store files.
145   * @param potentialMatchFiles StoreFile list.
146   * @return Sum of StoreFile.getReader().length();
147   */
148  private long getTotalStoreSize(List<HStoreFile> potentialMatchFiles) {
149    return potentialMatchFiles.stream().mapToLong(sf -> sf.getReader().length()).sum();
150  }
151
152  /**
153   * Check that all files satisfy the constraint
154   *      FileSize(i) <= ( Sum(0,N,FileSize(_)) - FileSize(i) ) * Ratio.
155   *
156   * @param files List of store files to consider as a compaction candidate.
157   * @param currentRatio The ratio to use.
158   * @return a boolean if these files satisfy the ratio constraints.
159   */
160  private boolean filesInRatio(List<HStoreFile> files, double currentRatio) {
161    if (files.size() < 2) {
162      return true;
163    }
164
165    long totalFileSize = getTotalStoreSize(files);
166
167    for (HStoreFile file : files) {
168      long singleFileSize = file.getReader().length();
169      long sumAllOtherFileSizes = totalFileSize - singleFileSize;
170
171      if (singleFileSize > sumAllOtherFileSizes * currentRatio) {
172        return false;
173      }
174    }
175    return true;
176  }
177}