001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020package org.apache.hadoop.hbase.regionserver.compactions; 021 022import java.io.IOException; 023import java.util.ArrayList; 024import java.util.List; 025 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.hbase.regionserver.HStoreFile; 028import org.apache.hadoop.hbase.regionserver.StoreConfigInformation; 029import org.apache.yetus.audience.InterfaceAudience; 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032 033/** 034 * Class to pick which files if any to compact together. 035 * 036 * This class will search all possibilities for different and if it gets stuck it will choose 037 * the smallest set of files to compact. 038 */ 039@InterfaceAudience.Private 040public class ExploringCompactionPolicy extends RatioBasedCompactionPolicy { 041 private static final Logger LOG = LoggerFactory.getLogger(ExploringCompactionPolicy.class); 042 043 /** 044 * Constructor for ExploringCompactionPolicy. 045 * @param conf The configuration object 046 * @param storeConfigInfo An object to provide info about the store. 047 */ 048 public ExploringCompactionPolicy(final Configuration conf, 049 final StoreConfigInformation storeConfigInfo) { 050 super(conf, storeConfigInfo); 051 } 052 053 @Override 054 protected final ArrayList<HStoreFile> applyCompactionPolicy(ArrayList<HStoreFile> candidates, 055 boolean mayUseOffPeak, boolean mightBeStuck) throws IOException { 056 return new ArrayList<>(applyCompactionPolicy(candidates, mightBeStuck, mayUseOffPeak, 057 comConf.getMinFilesToCompact(), comConf.getMaxFilesToCompact())); 058 } 059 060 public List<HStoreFile> applyCompactionPolicy(List<HStoreFile> candidates, boolean mightBeStuck, 061 boolean mayUseOffPeak, int minFiles, int maxFiles) { 062 final double currentRatio = mayUseOffPeak 063 ? comConf.getCompactionRatioOffPeak() : comConf.getCompactionRatio(); 064 065 // Start off choosing nothing. 066 List<HStoreFile> bestSelection = new ArrayList<>(0); 067 List<HStoreFile> smallest = mightBeStuck ? new ArrayList<>(0) : null; 068 long bestSize = 0; 069 long smallestSize = Long.MAX_VALUE; 070 071 int opts = 0, optsInRatio = 0, bestStart = -1; // for debug logging 072 // Consider every starting place. 073 for (int start = 0; start < candidates.size(); start++) { 074 // Consider every different sub list permutation in between start and end with min files. 075 for (int currentEnd = start + minFiles - 1; 076 currentEnd < candidates.size(); currentEnd++) { 077 List<HStoreFile> potentialMatchFiles = candidates.subList(start, currentEnd + 1); 078 079 // Sanity checks 080 if (potentialMatchFiles.size() < minFiles) { 081 continue; 082 } 083 if (potentialMatchFiles.size() > maxFiles) { 084 continue; 085 } 086 087 // Compute the total size of files that will 088 // have to be read if this set of files is compacted. 089 long size = getTotalStoreSize(potentialMatchFiles); 090 091 // Store the smallest set of files. This stored set of files will be used 092 // if it looks like the algorithm is stuck. 093 if (mightBeStuck && size < smallestSize) { 094 smallest = potentialMatchFiles; 095 smallestSize = size; 096 } 097 098 if (size > comConf.getMaxCompactSize(mayUseOffPeak)) { 099 continue; 100 } 101 102 ++opts; 103 if (size >= comConf.getMinCompactSize() 104 && !filesInRatio(potentialMatchFiles, currentRatio)) { 105 continue; 106 } 107 108 ++optsInRatio; 109 if (isBetterSelection(bestSelection, bestSize, potentialMatchFiles, size, mightBeStuck)) { 110 bestSelection = potentialMatchFiles; 111 bestSize = size; 112 bestStart = start; 113 } 114 } 115 } 116 if (bestSelection.isEmpty() && mightBeStuck) { 117 LOG.debug("Exploring compaction algorithm has selected " + smallest.size() 118 + " files of size "+ smallestSize + " because the store might be stuck"); 119 return new ArrayList<>(smallest); 120 } 121 LOG.debug("Exploring compaction algorithm has selected {} files of size {} starting at " + 122 "candidate #{} after considering {} permutations with {} in ratio", bestSelection.size(), 123 bestSize, bestSize, opts, optsInRatio); 124 return new ArrayList<>(bestSelection); 125 } 126 127 private boolean isBetterSelection(List<HStoreFile> bestSelection, long bestSize, 128 List<HStoreFile> selection, long size, boolean mightBeStuck) { 129 if (mightBeStuck && bestSize > 0 && size > 0) { 130 // Keep the selection that removes most files for least size. That penaltizes adding 131 // large files to compaction, but not small files, so we don't become totally inefficient 132 // (might want to tweak that in future). Also, given the current order of looking at 133 // permutations, prefer earlier files and smaller selection if the difference is small. 134 final double REPLACE_IF_BETTER_BY = 1.05; 135 double thresholdQuality = ((double)bestSelection.size() / bestSize) * REPLACE_IF_BETTER_BY; 136 return thresholdQuality < ((double)selection.size() / size); 137 } 138 // Keep if this gets rid of more files. Or the same number of files for less io. 139 return selection.size() > bestSelection.size() 140 || (selection.size() == bestSelection.size() && size < bestSize); 141 } 142 143 /** 144 * Find the total size of a list of store files. 145 * @param potentialMatchFiles StoreFile list. 146 * @return Sum of StoreFile.getReader().length(); 147 */ 148 private long getTotalStoreSize(List<HStoreFile> potentialMatchFiles) { 149 return potentialMatchFiles.stream().mapToLong(sf -> sf.getReader().length()).sum(); 150 } 151 152 /** 153 * Check that all files satisfy the constraint 154 * FileSize(i) <= ( Sum(0,N,FileSize(_)) - FileSize(i) ) * Ratio. 155 * 156 * @param files List of store files to consider as a compaction candidate. 157 * @param currentRatio The ratio to use. 158 * @return a boolean if these files satisfy the ratio constraints. 159 */ 160 private boolean filesInRatio(List<HStoreFile> files, double currentRatio) { 161 if (files.size() < 2) { 162 return true; 163 } 164 165 long totalFileSize = getTotalStoreSize(files); 166 167 for (HStoreFile file : files) { 168 long singleFileSize = file.getReader().length(); 169 long sumAllOtherFileSizes = totalFileSize - singleFileSize; 170 171 if (singleFileSize > sumAllOtherFileSizes * currentRatio) { 172 return false; 173 } 174 } 175 return true; 176 } 177}