View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver.compactions;
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.List;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.hbase.classification.InterfaceAudience;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.hbase.regionserver.StoreConfigInformation;
31  import org.apache.hadoop.hbase.regionserver.StoreFile;
32  
33  /**
34   * Class to pick which files if any to compact together.
35   *
36   * This class will search all possibilities for different and if it gets stuck it will choose
37   * the smallest set of files to compact.
38   */
39  @InterfaceAudience.Private
40  public class ExploringCompactionPolicy extends RatioBasedCompactionPolicy {
41    private static final Log LOG = LogFactory.getLog(ExploringCompactionPolicy.class);
42  
43    /**
44     * Constructor for ExploringCompactionPolicy.
45     * @param conf The configuration object
46     * @param storeConfigInfo An object to provide info about the store.
47     */
48    public ExploringCompactionPolicy(final Configuration conf,
49                                     final StoreConfigInformation storeConfigInfo) {
50      super(conf, storeConfigInfo);
51    }
52  
53    @Override
54    final ArrayList<StoreFile> applyCompactionPolicy(final ArrayList<StoreFile> candidates,
55      final boolean mayUseOffPeak, final boolean mightBeStuck) throws IOException {
56      return new ArrayList<StoreFile>(applyCompactionPolicy(candidates, mightBeStuck,
57          mayUseOffPeak, comConf.getMinFilesToCompact(), comConf.getMaxFilesToCompact()));
58    }
59  
60    public List<StoreFile> applyCompactionPolicy(final List<StoreFile> candidates,
61         boolean mightBeStuck, boolean mayUseOffPeak, int minFiles, int maxFiles) {
62  
63      final double currentRatio = mayUseOffPeak
64          ? comConf.getCompactionRatioOffPeak() : comConf.getCompactionRatio();
65  
66      // Start off choosing nothing.
67      List<StoreFile> bestSelection = new ArrayList<StoreFile>(0);
68      List<StoreFile> smallest = mightBeStuck ? new ArrayList<StoreFile>(0) : null;
69      long bestSize = 0;
70      long smallestSize = Long.MAX_VALUE;
71  
72      int opts = 0, optsInRatio = 0, bestStart = -1; // for debug logging
73      // Consider every starting place.
74      for (int start = 0; start < candidates.size(); start++) {
75        // Consider every different sub list permutation in between start and end with min files.
76        for (int currentEnd = start + minFiles - 1;
77            currentEnd < candidates.size(); currentEnd++) {
78          List<StoreFile> potentialMatchFiles = candidates.subList(start, currentEnd + 1);
79  
80          // Sanity checks
81          if (potentialMatchFiles.size() < minFiles) {
82            continue;
83          }
84          if (potentialMatchFiles.size() > maxFiles) {
85            continue;
86          }
87  
88          // Compute the total size of files that will
89          // have to be read if this set of files is compacted.
90          long size = getTotalStoreSize(potentialMatchFiles);
91  
92          // Store the smallest set of files.  This stored set of files will be used
93          // if it looks like the algorithm is stuck.
94          if (mightBeStuck && size < smallestSize) {
95            smallest = potentialMatchFiles;
96            smallestSize = size;
97          }
98  
99          if (size > comConf.getMaxCompactSize()) {
100           continue;
101         }
102 
103         ++opts;
104         if (size >= comConf.getMinCompactSize()
105             && !filesInRatio(potentialMatchFiles, currentRatio)) {
106           continue;
107         }
108 
109         ++optsInRatio;
110         if (isBetterSelection(bestSelection, bestSize, potentialMatchFiles, size, mightBeStuck)) {
111           bestSelection = potentialMatchFiles;
112           bestSize = size;
113           bestStart = start;
114         }
115       }
116     }
117     if (bestSelection.size() == 0 && mightBeStuck) {
118       LOG.debug("Exploring compaction algorithm has selected " + smallest.size()
119           + " files of size "+ smallestSize + " because the store might be stuck");
120       return new ArrayList<StoreFile>(smallest);
121     }
122     LOG.debug("Exploring compaction algorithm has selected " + bestSelection.size()
123         + " files of size " + bestSize + " starting at candidate #" + bestStart +
124         " after considering " + opts + " permutations with " + optsInRatio + " in ratio");
125     return new ArrayList<StoreFile>(bestSelection);
126   }
127 
128   private boolean isBetterSelection(List<StoreFile> bestSelection,
129       long bestSize, List<StoreFile> selection, long size, boolean mightBeStuck) {
130     if (mightBeStuck && bestSize > 0 && size > 0) {
131       // Keep the selection that removes most files for least size. That penaltizes adding
132       // large files to compaction, but not small files, so we don't become totally inefficient
133       // (might want to tweak that in future). Also, given the current order of looking at
134       // permutations, prefer earlier files and smaller selection if the difference is small.
135       final double REPLACE_IF_BETTER_BY = 1.05;
136       double thresholdQuality = ((double)bestSelection.size() / bestSize) * REPLACE_IF_BETTER_BY;
137       return thresholdQuality < ((double)selection.size() / size);
138     }
139     // Keep if this gets rid of more files.  Or the same number of files for less io.
140     return selection.size() > bestSelection.size()
141       || (selection.size() == bestSelection.size() && size < bestSize);
142   }
143 
144   /**
145    * Find the total size of a list of store files.
146    * @param potentialMatchFiles StoreFile list.
147    * @return Sum of StoreFile.getReader().length();
148    */
149   private long getTotalStoreSize(final List<StoreFile> potentialMatchFiles) {
150     long size = 0;
151 
152     for (StoreFile s:potentialMatchFiles) {
153       size += s.getReader().length();
154     }
155     return size;
156   }
157 
158   /**
159    * Check that all files satisfy the constraint
160    *      FileSize(i) <= ( Sum(0,N,FileSize(_)) - FileSize(i) ) * Ratio.
161    *
162    * @param files List of store files to consider as a compaction candidate.
163    * @param currentRatio The ratio to use.
164    * @return a boolean if these files satisfy the ratio constraints.
165    */
166   private boolean filesInRatio(final List<StoreFile> files, final double currentRatio) {
167     if (files.size() < 2) {
168       return true;
169     }
170 
171     long totalFileSize = getTotalStoreSize(files);
172 
173     for (StoreFile file : files) {
174       long singleFileSize = file.getReader().length();
175       long sumAllOtherFileSizes = totalFileSize - singleFileSize;
176 
177       if (singleFileSize > sumAllOtherFileSizes * currentRatio) {
178         return false;
179       }
180     }
181     return true;
182   }
183 }