001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license 003 * agreements. See the NOTICE file distributed with this work for additional information regarding 004 * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the 005 * "License"); you may not use this file except in compliance with the License. You may obtain a 006 * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable 007 * law or agreed to in writing, software distributed under the License is distributed on an "AS IS" 008 * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License 009 * for the specific language governing permissions and limitations under the License. 010 */ 011package org.apache.hadoop.hbase.regionserver.compactions; 012 013import java.io.IOException; 014import java.util.ArrayList; 015import java.util.Collection; 016import java.util.List; 017import java.util.OptionalInt; 018import java.util.Random; 019import org.apache.hadoop.conf.Configuration; 020import org.apache.hadoop.hbase.regionserver.HStoreFile; 021import org.apache.hadoop.hbase.regionserver.StoreConfigInformation; 022import org.apache.hadoop.hbase.regionserver.StoreUtils; 023import org.apache.yetus.audience.InterfaceAudience; 024import org.slf4j.Logger; 025import org.slf4j.LoggerFactory; 026 027import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 028import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 029 030/** 031 * An abstract compaction policy that select files on seq id order. 032 */ 033@InterfaceAudience.Private 034public abstract class SortedCompactionPolicy extends CompactionPolicy { 035 036 private static final Logger LOG = LoggerFactory.getLogger(SortedCompactionPolicy.class); 037 038 public SortedCompactionPolicy(Configuration conf, StoreConfigInformation storeConfigInfo) { 039 super(conf, storeConfigInfo); 040 } 041 042 public List<HStoreFile> preSelectCompactionForCoprocessor(Collection<HStoreFile> candidates, 043 List<HStoreFile> filesCompacting) { 044 return getCurrentEligibleFiles(new ArrayList<>(candidates), filesCompacting); 045 } 046 047 /** 048 * @param candidateFiles candidate files, ordered from oldest to newest by seqId. We rely on 049 * DefaultStoreFileManager to sort the files by seqId to guarantee contiguous compaction based 050 * on seqId for data consistency. 051 * @return subset copy of candidate list that meets compaction criteria 052 */ 053 public CompactionRequestImpl selectCompaction(Collection<HStoreFile> candidateFiles, 054 List<HStoreFile> filesCompacting, boolean isUserCompaction, boolean mayUseOffPeak, 055 boolean forceMajor) throws IOException { 056 // Preliminary compaction subject to filters 057 ArrayList<HStoreFile> candidateSelection = new ArrayList<>(candidateFiles); 058 // Stuck and not compacting enough (estimate). It is not guaranteed that we will be 059 // able to compact more if stuck and compacting, because ratio policy excludes some 060 // non-compacting files from consideration during compaction (see getCurrentEligibleFiles). 061 int futureFiles = filesCompacting.isEmpty() ? 0 : 1; 062 boolean mayBeStuck = (candidateFiles.size() - filesCompacting.size() + futureFiles) 063 >= storeConfigInfo.getBlockingFileCount(); 064 065 candidateSelection = getCurrentEligibleFiles(candidateSelection, filesCompacting); 066 LOG.debug("Selecting compaction from " + candidateFiles.size() + " store files, " + 067 filesCompacting.size() + " compacting, " + candidateSelection.size() + 068 " eligible, " + storeConfigInfo.getBlockingFileCount() + " blocking"); 069 070 // If we can't have all files, we cannot do major anyway 071 boolean isAllFiles = candidateFiles.size() == candidateSelection.size(); 072 if (!(forceMajor && isAllFiles)) { 073 candidateSelection = skipLargeFiles(candidateSelection, mayUseOffPeak); 074 isAllFiles = candidateFiles.size() == candidateSelection.size(); 075 } 076 077 // Try a major compaction if this is a user-requested major compaction, 078 // or if we do not have too many files to compact and this was requested as a major compaction 079 boolean isTryingMajor = (forceMajor && isAllFiles && isUserCompaction) 080 || (((forceMajor && isAllFiles) || shouldPerformMajorCompaction(candidateSelection)) 081 && (candidateSelection.size() < comConf.getMaxFilesToCompact())); 082 // Or, if there are any references among the candidates. 083 boolean isAfterSplit = StoreUtils.hasReferences(candidateSelection); 084 085 CompactionRequestImpl result = createCompactionRequest(candidateSelection, 086 isTryingMajor || isAfterSplit, mayUseOffPeak, mayBeStuck); 087 result.setAfterSplit(isAfterSplit); 088 089 ArrayList<HStoreFile> filesToCompact = Lists.newArrayList(result.getFiles()); 090 removeExcessFiles(filesToCompact, isUserCompaction, isTryingMajor); 091 result.updateFiles(filesToCompact); 092 093 isAllFiles = (candidateFiles.size() == filesToCompact.size()); 094 result.setOffPeak(!filesToCompact.isEmpty() && !isAllFiles && mayUseOffPeak); 095 result.setIsMajor(isTryingMajor && isAllFiles, isAllFiles); 096 097 return result; 098 } 099 100 protected abstract CompactionRequestImpl createCompactionRequest( 101 ArrayList<HStoreFile> candidateSelection, boolean tryingMajor, boolean mayUseOffPeak, 102 boolean mayBeStuck) throws IOException; 103 104 /** 105 * @param filesToCompact Files to compact. Can be null. 106 * @return True if we should run a major compaction. 107 */ 108 @Override 109 public abstract boolean shouldPerformMajorCompaction(Collection<HStoreFile> filesToCompact) 110 throws IOException; 111 112 /** 113 * Used calculation jitter 114 */ 115 private final Random random = new Random(); 116 117 /** 118 * @param filesToCompact 119 * @return When to run next major compaction 120 */ 121 public long getNextMajorCompactTime(Collection<HStoreFile> filesToCompact) { 122 /** Default to {@link org.apache.hadoop.hbase.HConstants#DEFAULT_MAJOR_COMPACTION_PERIOD}. */ 123 long period = comConf.getMajorCompactionPeriod(); 124 if (period <= 0) { 125 return period; 126 } 127 128 /** 129 * Default to {@link org.apache.hadoop.hbase.HConstants#DEFAULT_MAJOR_COMPACTION_JITTER}, 130 * that is, +/- 3.5 days (7 days * 0.5). 131 */ 132 double jitterPct = comConf.getMajorCompactionJitter(); 133 if (jitterPct <= 0) { 134 return period; 135 } 136 137 // deterministic jitter avoids a major compaction storm on restart 138 OptionalInt seed = StoreUtils.getDeterministicRandomSeed(filesToCompact); 139 if (seed.isPresent()) { 140 // Synchronized to ensure one user of random instance at a time. 141 double rnd; 142 synchronized (this) { 143 this.random.setSeed(seed.getAsInt()); 144 rnd = this.random.nextDouble(); 145 } 146 long jitter = Math.round(period * jitterPct); 147 return period + jitter - Math.round(2L * jitter * rnd); 148 } else { 149 return 0L; 150 } 151 } 152 153 /** 154 * @param compactionSize Total size of some compaction 155 * @return whether this should be a large or small compaction 156 */ 157 @Override 158 public boolean throttleCompaction(long compactionSize) { 159 return compactionSize > comConf.getThrottlePoint(); 160 } 161 162 public abstract boolean needsCompaction(Collection<HStoreFile> storeFiles, 163 List<HStoreFile> filesCompacting); 164 165 protected ArrayList<HStoreFile> getCurrentEligibleFiles(ArrayList<HStoreFile> candidateFiles, 166 final List<HStoreFile> filesCompacting) { 167 // candidates = all storefiles not already in compaction queue 168 if (!filesCompacting.isEmpty()) { 169 // exclude all files older than the newest file we're currently 170 // compacting. this allows us to preserve contiguity (HBASE-2856) 171 HStoreFile last = filesCompacting.get(filesCompacting.size() - 1); 172 int idx = candidateFiles.indexOf(last); 173 Preconditions.checkArgument(idx != -1); 174 candidateFiles.subList(0, idx + 1).clear(); 175 } 176 return candidateFiles; 177 } 178 179 /** 180 * @param candidates pre-filtrate 181 * @return filtered subset exclude all files above maxCompactSize 182 * Also save all references. We MUST compact them 183 */ 184 protected ArrayList<HStoreFile> skipLargeFiles(ArrayList<HStoreFile> candidates, 185 boolean mayUseOffpeak) { 186 int pos = 0; 187 while (pos < candidates.size() && !candidates.get(pos).isReference() 188 && (candidates.get(pos).getReader().length() > comConf.getMaxCompactSize(mayUseOffpeak))) { 189 ++pos; 190 } 191 if (pos > 0) { 192 LOG.debug("Some files are too large. Excluding " + pos 193 + " files from compaction candidates"); 194 candidates.subList(0, pos).clear(); 195 } 196 return candidates; 197 } 198 199 /** 200 * @param candidates pre-filtrate 201 */ 202 protected void filterBulk(ArrayList<HStoreFile> candidates) { 203 candidates.removeIf(HStoreFile::excludeFromMinorCompaction); 204 } 205 206 /** 207 * @param candidates pre-filtrate 208 */ 209 protected void removeExcessFiles(ArrayList<HStoreFile> candidates, 210 boolean isUserCompaction, boolean isMajorCompaction) { 211 int excess = candidates.size() - comConf.getMaxFilesToCompact(); 212 if (excess > 0) { 213 if (isMajorCompaction && isUserCompaction) { 214 LOG.debug("Warning, compacting more than " + comConf.getMaxFilesToCompact() 215 + " files because of a user-requested major compaction"); 216 } else { 217 LOG.debug("Too many admissible files. Excluding " + excess 218 + " files from compaction candidates"); 219 candidates.subList(comConf.getMaxFilesToCompact(), candidates.size()).clear(); 220 } 221 } 222 } 223 224 /** 225 * @param candidates pre-filtrate 226 * @return filtered subset forget the compactionSelection if we don't have enough files 227 */ 228 protected ArrayList<HStoreFile> checkMinFilesCriteria(ArrayList<HStoreFile> candidates, 229 int minFiles) { 230 if (candidates.size() < minFiles) { 231 if (LOG.isDebugEnabled()) { 232 LOG.debug("Not compacting files because we only have " + candidates.size() + 233 " files ready for compaction. Need " + minFiles + " to initiate."); 234 } 235 candidates.clear(); 236 } 237 return candidates; 238 } 239}