001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.janitor; 019 020import java.io.IOException; 021import java.io.InputStream; 022import java.util.Comparator; 023import java.util.HashSet; 024import java.util.List; 025import java.util.Map; 026import java.util.Properties; 027import java.util.concurrent.atomic.AtomicBoolean; 028import java.util.stream.Collectors; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FileSystem; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.HBaseConfiguration; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.MetaTableAccessor; 035import org.apache.hadoop.hbase.ScheduledChore; 036import org.apache.hadoop.hbase.TableName; 037import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 038import org.apache.hadoop.hbase.client.Connection; 039import org.apache.hadoop.hbase.client.ConnectionFactory; 040import org.apache.hadoop.hbase.client.Get; 041import org.apache.hadoop.hbase.client.Put; 042import org.apache.hadoop.hbase.client.RegionInfo; 043import org.apache.hadoop.hbase.client.Result; 044import org.apache.hadoop.hbase.client.Table; 045import org.apache.hadoop.hbase.client.TableDescriptor; 046import org.apache.hadoop.hbase.master.MasterServices; 047import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 048import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure; 049import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; 050import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 051import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 052import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 053import org.apache.hadoop.hbase.util.Bytes; 054import org.apache.hadoop.hbase.util.CommonFSUtils; 055import org.apache.hadoop.hbase.util.Pair; 056import org.apache.hadoop.hbase.util.PairOfSameType; 057import org.apache.hadoop.hbase.util.Threads; 058import org.apache.yetus.audience.InterfaceAudience; 059import org.slf4j.Logger; 060import org.slf4j.LoggerFactory; 061 062/** 063 * A janitor for the catalog tables. Scans the <code>hbase:meta</code> catalog table on a period. 064 * Makes a lastReport on state of hbase:meta. Looks for unused regions to garbage collect. Scan of 065 * hbase:meta runs if we are NOT in maintenance mode, if we are NOT shutting down, AND if the 066 * assignmentmanager is loaded. Playing it safe, we will garbage collect no-longer needed region 067 * references only if there are no regions-in-transition (RIT). 068 */ 069// TODO: Only works with single hbase:meta region currently. Fix. 070// TODO: Should it start over every time? Could it continue if runs into problem? Only if 071// problem does not mess up 'results'. 072// TODO: Do more by way of 'repair'; see note on unknownServers below. 073@InterfaceAudience.Private 074public class CatalogJanitor extends ScheduledChore { 075 076 private static final Logger LOG = LoggerFactory.getLogger(CatalogJanitor.class.getName()); 077 078 private final AtomicBoolean alreadyRunning = new AtomicBoolean(false); 079 private final AtomicBoolean enabled = new AtomicBoolean(true); 080 private final MasterServices services; 081 082 /** 083 * Saved report from last hbase:meta scan to completion. May be stale if having trouble completing 084 * scan. Check its date. 085 */ 086 private volatile Report lastReport; 087 088 public CatalogJanitor(final MasterServices services) { 089 super("CatalogJanitor-" + services.getServerName().toShortString(), services, 090 services.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000)); 091 this.services = services; 092 } 093 094 @Override 095 protected boolean initialChore() { 096 try { 097 if (getEnabled()) { 098 scan(); 099 } 100 } catch (IOException e) { 101 LOG.warn("Failed initial janitorial scan of hbase:meta table", e); 102 return false; 103 } 104 return true; 105 } 106 107 public boolean setEnabled(final boolean enabled) { 108 boolean alreadyEnabled = this.enabled.getAndSet(enabled); 109 // If disabling is requested on an already enabled chore, we could have an active 110 // scan still going on, callers might not be aware of that and do further action thinkng 111 // that no action would be from this chore. In this case, the right action is to wait for 112 // the active scan to complete before exiting this function. 113 if (!enabled && alreadyEnabled) { 114 while (alreadyRunning.get()) { 115 Threads.sleepWithoutInterrupt(100); 116 } 117 } 118 return alreadyEnabled; 119 } 120 121 public boolean getEnabled() { 122 return this.enabled.get(); 123 } 124 125 @Override 126 protected void chore() { 127 try { 128 AssignmentManager am = this.services.getAssignmentManager(); 129 if (getEnabled() && !this.services.isInMaintenanceMode() && 130 !this.services.getServerManager().isClusterShutdown() && isMetaLoaded(am)) { 131 scan(); 132 } else { 133 LOG.warn("CatalogJanitor is disabled! Enabled=" + getEnabled() + ", maintenanceMode=" + 134 this.services.isInMaintenanceMode() + ", am=" + am + ", metaLoaded=" + isMetaLoaded(am) + 135 ", hasRIT=" + isRIT(am) + " clusterShutDown=" + 136 this.services.getServerManager().isClusterShutdown()); 137 } 138 } catch (IOException e) { 139 LOG.warn("Failed janitorial scan of hbase:meta table", e); 140 } 141 } 142 143 private static boolean isMetaLoaded(AssignmentManager am) { 144 return am != null && am.isMetaLoaded(); 145 } 146 147 private static boolean isRIT(AssignmentManager am) { 148 return isMetaLoaded(am) && am.hasRegionsInTransition(); 149 } 150 151 /** 152 * Run janitorial scan of catalog <code>hbase:meta</code> table looking for garbage to collect. 153 * @return How many items gc'd whether for merge or split. Returns -1 if previous scan is in 154 * progress. 155 */ 156 public int scan() throws IOException { 157 int gcs = 0; 158 try { 159 if (!alreadyRunning.compareAndSet(false, true)) { 160 LOG.debug("CatalogJanitor already running"); 161 // -1 indicates previous scan is in progress 162 return -1; 163 } 164 this.lastReport = scanForReport(); 165 if (!this.lastReport.isEmpty()) { 166 LOG.warn(this.lastReport.toString()); 167 } 168 169 if (isRIT(this.services.getAssignmentManager())) { 170 LOG.warn("Playing-it-safe skipping merge/split gc'ing of regions from hbase:meta while " + 171 "regions-in-transition (RIT)"); 172 } 173 Map<RegionInfo, Result> mergedRegions = this.lastReport.mergedRegions; 174 for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) { 175 if (this.services.isInMaintenanceMode()) { 176 // Stop cleaning if the master is in maintenance mode 177 break; 178 } 179 180 List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells()); 181 if (parents != null && cleanMergeRegion(e.getKey(), parents)) { 182 gcs++; 183 } 184 } 185 // Clean split parents 186 Map<RegionInfo, Result> splitParents = this.lastReport.splitParents; 187 188 // Now work on our list of found parents. See if any we can clean up. 189 HashSet<String> parentNotCleaned = new HashSet<>(); 190 for (Map.Entry<RegionInfo, Result> e : splitParents.entrySet()) { 191 if (this.services.isInMaintenanceMode()) { 192 // Stop cleaning if the master is in maintenance mode 193 break; 194 } 195 196 if (!parentNotCleaned.contains(e.getKey().getEncodedName()) && 197 cleanParent(e.getKey(), e.getValue())) { 198 gcs++; 199 } else { 200 // We could not clean the parent, so it's daughters should not be 201 // cleaned either (HBASE-6160) 202 PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(e.getValue()); 203 parentNotCleaned.add(daughters.getFirst().getEncodedName()); 204 parentNotCleaned.add(daughters.getSecond().getEncodedName()); 205 } 206 } 207 return gcs; 208 } finally { 209 alreadyRunning.set(false); 210 } 211 } 212 213 /** 214 * Scan hbase:meta. 215 * @return Return generated {@link Report} 216 */ 217 // will be override in tests. 218 protected Report scanForReport() throws IOException { 219 ReportMakingVisitor visitor = new ReportMakingVisitor(this.services); 220 // Null tablename means scan all of meta. 221 MetaTableAccessor.scanMetaForTableRegions(this.services.getConnection(), visitor, null); 222 return visitor.getReport(); 223 } 224 225 /** 226 * @return Returns last published Report that comes of last successful scan of hbase:meta. 227 */ 228 public Report getLastReport() { 229 return this.lastReport; 230 } 231 232 /** 233 * If merged region no longer holds reference to the merge regions, archive merge region on hdfs 234 * and perform deleting references in hbase:meta 235 * @return true if we delete references in merged region on hbase:meta and archive the files on 236 * the file system 237 */ 238 private boolean cleanMergeRegion(final RegionInfo mergedRegion, List<RegionInfo> parents) 239 throws IOException { 240 FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); 241 Path rootdir = this.services.getMasterFileSystem().getRootDir(); 242 Path tabledir = CommonFSUtils.getTableDir(rootdir, mergedRegion.getTable()); 243 TableDescriptor htd = getDescriptor(mergedRegion.getTable()); 244 HRegionFileSystem regionFs = null; 245 try { 246 regionFs = HRegionFileSystem.openRegionFromFileSystem(this.services.getConfiguration(), fs, 247 tabledir, mergedRegion, true); 248 } catch (IOException e) { 249 LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName()); 250 } 251 if (regionFs == null || !regionFs.hasReferences(htd)) { 252 LOG.debug( 253 "Deleting parents ({}) from fs; merged child {} no longer holds references", parents 254 .stream().map(r -> RegionInfo.getShortNameToLog(r)).collect(Collectors.joining(", ")), 255 mergedRegion); 256 ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor(); 257 pe.submitProcedure( 258 new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), mergedRegion, parents)); 259 for (RegionInfo ri : parents) { 260 // The above scheduled GCMultipleMergedRegionsProcedure does the below. 261 // Do we need this? 262 this.services.getAssignmentManager().getRegionStates().deleteRegion(ri); 263 this.services.getServerManager().removeRegion(ri); 264 } 265 return true; 266 } 267 return false; 268 } 269 270 /** 271 * Compare HRegionInfos in a way that has split parents sort BEFORE their daughters. 272 */ 273 static class SplitParentFirstComparator implements Comparator<RegionInfo> { 274 Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator(); 275 276 @Override 277 public int compare(RegionInfo left, RegionInfo right) { 278 // This comparator differs from the one RegionInfo in that it sorts 279 // parent before daughters. 280 if (left == null) { 281 return -1; 282 } 283 if (right == null) { 284 return 1; 285 } 286 // Same table name. 287 int result = left.getTable().compareTo(right.getTable()); 288 if (result != 0) { 289 return result; 290 } 291 // Compare start keys. 292 result = Bytes.compareTo(left.getStartKey(), right.getStartKey()); 293 if (result != 0) { 294 return result; 295 } 296 // Compare end keys, but flip the operands so parent comes first 297 result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey()); 298 299 return result; 300 } 301 } 302 303 static boolean cleanParent(MasterServices services, RegionInfo parent, Result rowContent) 304 throws IOException { 305 // Check whether it is a merged region and if it is clean of references. 306 if (MetaTableAccessor.hasMergeRegions(rowContent.rawCells())) { 307 // Wait until clean of merge parent regions first 308 return false; 309 } 310 // Run checks on each daughter split. 311 PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(rowContent); 312 Pair<Boolean, Boolean> a = checkDaughterInFs(services, parent, daughters.getFirst()); 313 Pair<Boolean, Boolean> b = checkDaughterInFs(services, parent, daughters.getSecond()); 314 if (hasNoReferences(a) && hasNoReferences(b)) { 315 String daughterA = 316 daughters.getFirst() != null ? daughters.getFirst().getShortNameToLog() : "null"; 317 String daughterB = 318 daughters.getSecond() != null ? daughters.getSecond().getShortNameToLog() : "null"; 319 LOG.debug("Deleting region " + parent.getShortNameToLog() + " because daughters -- " + 320 daughterA + ", " + daughterB + " -- no longer hold references"); 321 ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor(); 322 pe.submitProcedure(new GCRegionProcedure(pe.getEnvironment(), parent)); 323 // Remove from in-memory states 324 services.getAssignmentManager().getRegionStates().deleteRegion(parent); 325 services.getServerManager().removeRegion(parent); 326 return true; 327 } 328 return false; 329 } 330 331 /** 332 * If daughters no longer hold reference to the parents, delete the parent. 333 * @param parent RegionInfo of split offlined parent 334 * @param rowContent Content of <code>parent</code> row in <code>metaRegionName</code> 335 * @return True if we removed <code>parent</code> from meta table and from the filesystem. 336 */ 337 private boolean cleanParent(final RegionInfo parent, Result rowContent) throws IOException { 338 return cleanParent(services, parent, rowContent); 339 } 340 341 /** 342 * @param p A pair where the first boolean says whether or not the daughter region directory 343 * exists in the filesystem and then the second boolean says whether the daughter has 344 * references to the parent. 345 * @return True the passed <code>p</code> signifies no references. 346 */ 347 private static boolean hasNoReferences(final Pair<Boolean, Boolean> p) { 348 return !p.getFirst() || !p.getSecond(); 349 } 350 351 /** 352 * Checks if a daughter region -- either splitA or splitB -- still holds references to parent. 353 * @param parent Parent region 354 * @param daughter Daughter region 355 * @return A pair where the first boolean says whether or not the daughter region directory exists 356 * in the filesystem and then the second boolean says whether the daughter has references 357 * to the parent. 358 */ 359 private static Pair<Boolean, Boolean> checkDaughterInFs(MasterServices services, 360 final RegionInfo parent, final RegionInfo daughter) throws IOException { 361 if (daughter == null) { 362 return new Pair<>(Boolean.FALSE, Boolean.FALSE); 363 } 364 365 FileSystem fs = services.getMasterFileSystem().getFileSystem(); 366 Path rootdir = services.getMasterFileSystem().getRootDir(); 367 Path tabledir = CommonFSUtils.getTableDir(rootdir, daughter.getTable()); 368 369 Path daughterRegionDir = new Path(tabledir, daughter.getEncodedName()); 370 371 HRegionFileSystem regionFs; 372 373 try { 374 if (!CommonFSUtils.isExists(fs, daughterRegionDir)) { 375 return new Pair<>(Boolean.FALSE, Boolean.FALSE); 376 } 377 } catch (IOException ioe) { 378 LOG.error("Error trying to determine if daughter region exists, " + 379 "assuming exists and has references", ioe); 380 return new Pair<>(Boolean.TRUE, Boolean.TRUE); 381 } 382 383 boolean references = false; 384 TableDescriptor parentDescriptor = services.getTableDescriptors().get(parent.getTable()); 385 try { 386 regionFs = HRegionFileSystem.openRegionFromFileSystem(services.getConfiguration(), fs, 387 tabledir, daughter, true); 388 389 for (ColumnFamilyDescriptor family : parentDescriptor.getColumnFamilies()) { 390 references = regionFs.hasReferences(family.getNameAsString()); 391 if (references) { 392 break; 393 } 394 } 395 } catch (IOException e) { 396 LOG.error("Error trying to determine referenced files from : " + daughter.getEncodedName() + 397 ", to: " + parent.getEncodedName() + " assuming has references", e); 398 return new Pair<>(Boolean.TRUE, Boolean.TRUE); 399 } 400 return new Pair<>(Boolean.TRUE, references); 401 } 402 403 private TableDescriptor getDescriptor(final TableName tableName) throws IOException { 404 return this.services.getTableDescriptors().get(tableName); 405 } 406 407 private static void checkLog4jProperties() { 408 String filename = "log4j.properties"; 409 try { 410 final InputStream inStream = 411 CatalogJanitor.class.getClassLoader().getResourceAsStream(filename); 412 if (inStream != null) { 413 new Properties().load(inStream); 414 } else { 415 System.out.println("No " + filename + " on classpath; Add one else no logging output!"); 416 } 417 } catch (IOException e) { 418 LOG.error("Log4j check failed", e); 419 } 420 } 421 422 /** 423 * For testing against a cluster. Doesn't have a MasterServices context so does not report on good 424 * vs bad servers. 425 */ 426 public static void main(String[] args) throws IOException { 427 checkLog4jProperties(); 428 ReportMakingVisitor visitor = new ReportMakingVisitor(null); 429 Configuration configuration = HBaseConfiguration.create(); 430 configuration.setBoolean("hbase.defaults.for.version.skip", true); 431 try (Connection connection = ConnectionFactory.createConnection(configuration)) { 432 /* 433 * Used to generate an overlap. 434 */ 435 Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0.")); 436 g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); 437 try (Table t = connection.getTable(TableName.META_TABLE_NAME)) { 438 Result r = t.get(g); 439 byte[] row = g.getRow(); 440 row[row.length - 2] <<= row[row.length - 2]; 441 Put p = new Put(g.getRow()); 442 p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, 443 r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER)); 444 t.put(p); 445 } 446 MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null); 447 Report report = visitor.getReport(); 448 LOG.info(report != null ? report.toString() : "empty"); 449 } 450 } 451}