001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver.compactions; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collection; 023import java.util.List; 024import java.util.OptionalInt; 025import java.util.Random; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.hbase.regionserver.HStoreFile; 028import org.apache.hadoop.hbase.regionserver.StoreConfigInformation; 029import org.apache.hadoop.hbase.regionserver.StoreUtils; 030import org.apache.yetus.audience.InterfaceAudience; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 035import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 036 037/** 038 * An abstract compaction policy that select files on seq id order. 039 */ 040@InterfaceAudience.Private 041public abstract class SortedCompactionPolicy extends CompactionPolicy { 042 043 private static final Logger LOG = LoggerFactory.getLogger(SortedCompactionPolicy.class); 044 045 private static final Random RNG = new Random(); 046 047 public SortedCompactionPolicy(Configuration conf, StoreConfigInformation storeConfigInfo) { 048 super(conf, storeConfigInfo); 049 } 050 051 public List<HStoreFile> preSelectCompactionForCoprocessor(Collection<HStoreFile> candidates, 052 List<HStoreFile> filesCompacting) { 053 return getCurrentEligibleFiles(new ArrayList<>(candidates), filesCompacting); 054 } 055 056 /** 057 * @param candidateFiles candidate files, ordered from oldest to newest by seqId. We rely on 058 * DefaultStoreFileManager to sort the files by seqId to guarantee 059 * contiguous compaction based on seqId for data consistency. 060 * @return subset copy of candidate list that meets compaction criteria 061 */ 062 public CompactionRequestImpl selectCompaction(Collection<HStoreFile> candidateFiles, 063 List<HStoreFile> filesCompacting, boolean isUserCompaction, boolean mayUseOffPeak, 064 boolean forceMajor) throws IOException { 065 // Preliminary compaction subject to filters 066 ArrayList<HStoreFile> candidateSelection = new ArrayList<>(candidateFiles); 067 // Stuck and not compacting enough (estimate). It is not guaranteed that we will be 068 // able to compact more if stuck and compacting, because ratio policy excludes some 069 // non-compacting files from consideration during compaction (see getCurrentEligibleFiles). 070 int futureFiles = filesCompacting.isEmpty() ? 0 : 1; 071 boolean mayBeStuck = (candidateFiles.size() - filesCompacting.size() + futureFiles) 072 >= storeConfigInfo.getBlockingFileCount(); 073 074 candidateSelection = getCurrentEligibleFiles(candidateSelection, filesCompacting); 075 LOG.debug("Selecting compaction from " + candidateFiles.size() + " store files, " 076 + filesCompacting.size() + " compacting, " + candidateSelection.size() + " eligible, " 077 + storeConfigInfo.getBlockingFileCount() + " blocking"); 078 079 // If we can't have all files, we cannot do major anyway 080 boolean isAllFiles = candidateFiles.size() == candidateSelection.size(); 081 if (!(forceMajor && isAllFiles)) { 082 candidateSelection = skipLargeFiles(candidateSelection, mayUseOffPeak); 083 isAllFiles = candidateFiles.size() == candidateSelection.size(); 084 } 085 086 // Try a major compaction if this is a user-requested major compaction, 087 // or if we do not have too many files to compact and this was requested as a major compaction 088 boolean isTryingMajor = (forceMajor && isAllFiles && isUserCompaction) 089 || (((forceMajor && isAllFiles) || shouldPerformMajorCompaction(candidateSelection)) 090 && (candidateSelection.size() < comConf.getMaxFilesToCompact())); 091 // Or, if there are any references among the candidates. 092 boolean isAfterSplit = StoreUtils.hasReferences(candidateSelection); 093 094 CompactionRequestImpl result = createCompactionRequest(candidateSelection, 095 isTryingMajor || isAfterSplit, mayUseOffPeak, mayBeStuck); 096 result.setAfterSplit(isAfterSplit); 097 098 ArrayList<HStoreFile> filesToCompact = Lists.newArrayList(result.getFiles()); 099 removeExcessFiles(filesToCompact, isUserCompaction, isTryingMajor); 100 result.updateFiles(filesToCompact); 101 102 isAllFiles = (candidateFiles.size() == filesToCompact.size()); 103 result.setOffPeak(!filesToCompact.isEmpty() && !isAllFiles && mayUseOffPeak); 104 result.setIsMajor(isTryingMajor && isAllFiles, isAllFiles); 105 106 return result; 107 } 108 109 protected abstract CompactionRequestImpl createCompactionRequest( 110 ArrayList<HStoreFile> candidateSelection, boolean tryingMajor, boolean mayUseOffPeak, 111 boolean mayBeStuck) throws IOException; 112 113 /** 114 * @param filesToCompact Files to compact. Can be null. 115 * @return True if we should run a major compaction. 116 */ 117 @Override 118 public abstract boolean shouldPerformMajorCompaction(Collection<HStoreFile> filesToCompact) 119 throws IOException; 120 121 /** 122 * n * @return When to run next major compaction 123 */ 124 public long getNextMajorCompactTime(Collection<HStoreFile> filesToCompact) { 125 /** Default to {@link org.apache.hadoop.hbase.HConstants#DEFAULT_MAJOR_COMPACTION_PERIOD}. */ 126 long period = comConf.getMajorCompactionPeriod(); 127 if (period <= 0) { 128 return period; 129 } 130 131 /** 132 * Default to {@link org.apache.hadoop.hbase.HConstants#DEFAULT_MAJOR_COMPACTION_JITTER}, that 133 * is, +/- 3.5 days (7 days * 0.5). 134 */ 135 double jitterPct = comConf.getMajorCompactionJitter(); 136 if (jitterPct <= 0) { 137 return period; 138 } 139 140 // deterministic jitter avoids a major compaction storm on restart 141 OptionalInt seed = StoreUtils.getDeterministicRandomSeed(filesToCompact); 142 if (seed.isPresent()) { 143 long jitter = Math.round(period * jitterPct); 144 // Synchronized to ensure one user of random instance at a time. 145 synchronized (RNG) { 146 RNG.setSeed(seed.getAsInt()); 147 return period + jitter - Math.round(2L * jitter * RNG.nextDouble()); 148 } 149 } else { 150 return 0L; 151 } 152 } 153 154 /** 155 * @param compactionSize Total size of some compaction 156 * @return whether this should be a large or small compaction 157 */ 158 @Override 159 public boolean throttleCompaction(long compactionSize) { 160 return compactionSize > comConf.getThrottlePoint(); 161 } 162 163 public abstract boolean needsCompaction(Collection<HStoreFile> storeFiles, 164 List<HStoreFile> filesCompacting); 165 166 protected ArrayList<HStoreFile> getCurrentEligibleFiles(ArrayList<HStoreFile> candidateFiles, 167 final List<HStoreFile> filesCompacting) { 168 // candidates = all storefiles not already in compaction queue 169 if (!filesCompacting.isEmpty()) { 170 // exclude all files older than the newest file we're currently 171 // compacting. this allows us to preserve contiguity (HBASE-2856) 172 HStoreFile last = filesCompacting.get(filesCompacting.size() - 1); 173 int idx = candidateFiles.indexOf(last); 174 Preconditions.checkArgument(idx != -1); 175 candidateFiles.subList(0, idx + 1).clear(); 176 } 177 return candidateFiles; 178 } 179 180 /** 181 * @param candidates pre-filtrate 182 * @return filtered subset exclude all files above maxCompactSize Also save all references. We 183 * MUST compact them 184 */ 185 protected ArrayList<HStoreFile> skipLargeFiles(ArrayList<HStoreFile> candidates, 186 boolean mayUseOffpeak) { 187 int pos = 0; 188 while ( 189 pos < candidates.size() && !candidates.get(pos).isReference() 190 && (candidates.get(pos).getReader().length() > comConf.getMaxCompactSize(mayUseOffpeak)) 191 ) { 192 ++pos; 193 } 194 if (pos > 0) { 195 LOG.debug("Some files are too large. Excluding " + pos + " files from compaction candidates"); 196 candidates.subList(0, pos).clear(); 197 } 198 return candidates; 199 } 200 201 /** 202 * @param candidates pre-filtrate 203 */ 204 protected void filterBulk(ArrayList<HStoreFile> candidates) { 205 candidates.removeIf(HStoreFile::excludeFromMinorCompaction); 206 } 207 208 /** 209 * @param candidates pre-filtrate 210 */ 211 protected void removeExcessFiles(ArrayList<HStoreFile> candidates, boolean isUserCompaction, 212 boolean isMajorCompaction) { 213 int excess = candidates.size() - comConf.getMaxFilesToCompact(); 214 if (excess > 0) { 215 if (isMajorCompaction && isUserCompaction) { 216 LOG.debug("Warning, compacting more than " + comConf.getMaxFilesToCompact() 217 + " files because of a user-requested major compaction"); 218 } else { 219 LOG.debug( 220 "Too many admissible files. Excluding " + excess + " files from compaction candidates"); 221 candidates.subList(comConf.getMaxFilesToCompact(), candidates.size()).clear(); 222 } 223 } 224 } 225 226 /** 227 * @param candidates pre-filtrate 228 * @return filtered subset forget the compactionSelection if we don't have enough files 229 */ 230 protected ArrayList<HStoreFile> checkMinFilesCriteria(ArrayList<HStoreFile> candidates, 231 int minFiles) { 232 if (candidates.size() < minFiles) { 233 if (LOG.isDebugEnabled()) { 234 LOG.debug("Not compacting files because we only have " + candidates.size() 235 + " files ready for compaction. Need " + minFiles + " to initiate."); 236 } 237 candidates.clear(); 238 } 239 return candidates; 240 } 241}