001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import java.io.IOException; 021import java.io.InputStream; 022import java.util.ArrayList; 023import java.util.Comparator; 024import java.util.HashSet; 025import java.util.List; 026import java.util.Map; 027import java.util.Properties; 028import java.util.TreeMap; 029import java.util.concurrent.atomic.AtomicBoolean; 030import java.util.stream.Collectors; 031 032import org.apache.hadoop.conf.Configuration; 033import org.apache.hadoop.fs.FileSystem; 034import org.apache.hadoop.fs.Path; 035import org.apache.hadoop.hbase.HBaseConfiguration; 036import org.apache.hadoop.hbase.HConstants; 037import org.apache.hadoop.hbase.HRegionLocation; 038import org.apache.hadoop.hbase.MetaTableAccessor; 039import org.apache.hadoop.hbase.RegionLocations; 040import org.apache.hadoop.hbase.ScheduledChore; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 044import org.apache.hadoop.hbase.client.Connection; 045import org.apache.hadoop.hbase.client.ConnectionFactory; 046import org.apache.hadoop.hbase.client.Get; 047import org.apache.hadoop.hbase.client.Put; 048import org.apache.hadoop.hbase.client.RegionInfo; 049import org.apache.hadoop.hbase.client.Result; 050import org.apache.hadoop.hbase.client.Table; 051import org.apache.hadoop.hbase.client.TableDescriptor; 052import org.apache.hadoop.hbase.client.TableState; 053import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 054import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure; 055import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; 056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 057import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 058import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 059import org.apache.hadoop.hbase.util.Bytes; 060import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 061import org.apache.hadoop.hbase.util.FSUtils; 062import org.apache.hadoop.hbase.util.Pair; 063import org.apache.hadoop.hbase.util.PairOfSameType; 064import org.apache.hadoop.hbase.util.Threads; 065import org.apache.yetus.audience.InterfaceAudience; 066import org.slf4j.Logger; 067import org.slf4j.LoggerFactory; 068 069import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 070 071/** 072 * A janitor for the catalog tables. Scans the <code>hbase:meta</code> catalog 073 * table on a period. Makes a lastReport on state of hbase:meta. Looks for unused 074 * regions to garbage collect. Scan of hbase:meta runs if we are NOT in maintenance 075 * mode, if we are NOT shutting down, AND if the assignmentmanager is loaded. 076 * Playing it safe, we will garbage collect no-longer needed region references 077 * only if there are no regions-in-transition (RIT). 078 */ 079// TODO: Only works with single hbase:meta region currently. Fix. 080// TODO: Should it start over every time? Could it continue if runs into problem? Only if 081// problem does not mess up 'results'. 082// TODO: Do more by way of 'repair'; see note on unknownServers below. 083@InterfaceAudience.Private 084public class CatalogJanitor extends ScheduledChore { 085 private static final Logger LOG = LoggerFactory.getLogger(CatalogJanitor.class.getName()); 086 private final AtomicBoolean alreadyRunning = new AtomicBoolean(false); 087 private final AtomicBoolean enabled = new AtomicBoolean(true); 088 private final MasterServices services; 089 090 /** 091 * Saved report from last hbase:meta scan to completion. May be stale if having trouble 092 * completing scan. Check its date. 093 */ 094 private volatile Report lastReport; 095 096 CatalogJanitor(final MasterServices services) { 097 super("CatalogJanitor-" + services.getServerName().toShortString(), services, 098 services.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000)); 099 this.services = services; 100 } 101 102 @Override 103 protected boolean initialChore() { 104 try { 105 if (getEnabled()) { 106 scan(); 107 } 108 } catch (IOException e) { 109 LOG.warn("Failed initial janitorial scan of hbase:meta table", e); 110 return false; 111 } 112 return true; 113 } 114 115 boolean setEnabled(final boolean enabled) { 116 boolean alreadyEnabled = this.enabled.getAndSet(enabled); 117 // If disabling is requested on an already enabled chore, we could have an active 118 // scan still going on, callers might not be aware of that and do further action thinkng 119 // that no action would be from this chore. In this case, the right action is to wait for 120 // the active scan to complete before exiting this function. 121 if (!enabled && alreadyEnabled) { 122 while (alreadyRunning.get()) { 123 Threads.sleepWithoutInterrupt(100); 124 } 125 } 126 return alreadyEnabled; 127 } 128 129 boolean getEnabled() { 130 return this.enabled.get(); 131 } 132 133 @Override 134 protected void chore() { 135 try { 136 AssignmentManager am = this.services.getAssignmentManager(); 137 if (getEnabled() && !this.services.isInMaintenanceMode() && 138 !this.services.getServerManager().isClusterShutdown() && 139 isMetaLoaded(am)) { 140 scan(); 141 } else { 142 LOG.warn("CatalogJanitor is disabled! Enabled=" + getEnabled() + 143 ", maintenanceMode=" + this.services.isInMaintenanceMode() + ", am=" + am + 144 ", metaLoaded=" + isMetaLoaded(am) + ", hasRIT=" + isRIT(am) + 145 " clusterShutDown=" + this.services.getServerManager().isClusterShutdown()); 146 } 147 } catch (IOException e) { 148 LOG.warn("Failed janitorial scan of hbase:meta table", e); 149 } 150 } 151 152 private static boolean isMetaLoaded(AssignmentManager am) { 153 return am != null && am.isMetaLoaded(); 154 } 155 156 private static boolean isRIT(AssignmentManager am) { 157 return isMetaLoaded(am) && am.hasRegionsInTransition(); 158 } 159 160 /** 161 * Run janitorial scan of catalog <code>hbase:meta</code> table looking for 162 * garbage to collect. 163 * @return How many items gc'd whether for merge or split. 164 */ 165 int scan() throws IOException { 166 int gcs = 0; 167 try { 168 if (!alreadyRunning.compareAndSet(false, true)) { 169 LOG.debug("CatalogJanitor already running"); 170 return gcs; 171 } 172 this.lastReport = scanForReport(); 173 if (!this.lastReport.isEmpty()) { 174 LOG.warn(this.lastReport.toString()); 175 } 176 177 if (isRIT(this.services.getAssignmentManager())) { 178 LOG.warn("Playing-it-safe skipping merge/split gc'ing of regions from hbase:meta while " + 179 "regions-in-transition (RIT)"); 180 } 181 Map<RegionInfo, Result> mergedRegions = this.lastReport.mergedRegions; 182 for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) { 183 if (this.services.isInMaintenanceMode()) { 184 // Stop cleaning if the master is in maintenance mode 185 break; 186 } 187 188 List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells()); 189 if (parents != null && cleanMergeRegion(e.getKey(), parents)) { 190 gcs++; 191 } 192 } 193 // Clean split parents 194 Map<RegionInfo, Result> splitParents = this.lastReport.splitParents; 195 196 // Now work on our list of found parents. See if any we can clean up. 197 HashSet<String> parentNotCleaned = new HashSet<>(); 198 for (Map.Entry<RegionInfo, Result> e : splitParents.entrySet()) { 199 if (this.services.isInMaintenanceMode()) { 200 // Stop cleaning if the master is in maintenance mode 201 break; 202 } 203 204 if (!parentNotCleaned.contains(e.getKey().getEncodedName()) && 205 cleanParent(e.getKey(), e.getValue())) { 206 gcs++; 207 } else { 208 // We could not clean the parent, so it's daughters should not be 209 // cleaned either (HBASE-6160) 210 PairOfSameType<RegionInfo> daughters = 211 MetaTableAccessor.getDaughterRegions(e.getValue()); 212 parentNotCleaned.add(daughters.getFirst().getEncodedName()); 213 parentNotCleaned.add(daughters.getSecond().getEncodedName()); 214 } 215 } 216 return gcs; 217 } finally { 218 alreadyRunning.set(false); 219 } 220 } 221 222 /** 223 * Scan hbase:meta. 224 * @return Return generated {@link Report} 225 */ 226 Report scanForReport() throws IOException { 227 ReportMakingVisitor visitor = new ReportMakingVisitor(this.services); 228 // Null tablename means scan all of meta. 229 MetaTableAccessor.scanMetaForTableRegions(this.services.getConnection(), visitor, null); 230 return visitor.getReport(); 231 } 232 233 /** 234 * @return Returns last published Report that comes of last successful scan 235 * of hbase:meta. 236 */ 237 public Report getLastReport() { 238 return this.lastReport; 239 } 240 241 /** 242 * If merged region no longer holds reference to the merge regions, archive 243 * merge region on hdfs and perform deleting references in hbase:meta 244 * @return true if we delete references in merged region on hbase:meta and archive 245 * the files on the file system 246 */ 247 private boolean cleanMergeRegion(final RegionInfo mergedRegion, List<RegionInfo> parents) 248 throws IOException { 249 FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); 250 Path rootdir = this.services.getMasterFileSystem().getRootDir(); 251 Path tabledir = FSUtils.getTableDir(rootdir, mergedRegion.getTable()); 252 TableDescriptor htd = getDescriptor(mergedRegion.getTable()); 253 HRegionFileSystem regionFs = null; 254 try { 255 regionFs = HRegionFileSystem.openRegionFromFileSystem( 256 this.services.getConfiguration(), fs, tabledir, mergedRegion, true); 257 } catch (IOException e) { 258 LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName()); 259 } 260 if (regionFs == null || !regionFs.hasReferences(htd)) { 261 LOG.debug("Deleting parents ({}) from fs; merged child {} no longer holds references", 262 parents.stream().map(r -> RegionInfo.getShortNameToLog(r)). 263 collect(Collectors.joining(", ")), 264 mergedRegion); 265 ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor(); 266 pe.submitProcedure(new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), 267 mergedRegion, parents)); 268 for (RegionInfo ri: parents) { 269 // The above scheduled GCMultipleMergedRegionsProcedure does the below. 270 // Do we need this? 271 this.services.getAssignmentManager().getRegionStates().deleteRegion(ri); 272 this.services.getServerManager().removeRegion(ri); 273 } 274 return true; 275 } 276 return false; 277 } 278 279 /** 280 * Compare HRegionInfos in a way that has split parents sort BEFORE their daughters. 281 */ 282 static class SplitParentFirstComparator implements Comparator<RegionInfo> { 283 Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator(); 284 @Override 285 public int compare(RegionInfo left, RegionInfo right) { 286 // This comparator differs from the one RegionInfo in that it sorts 287 // parent before daughters. 288 if (left == null) { 289 return -1; 290 } 291 if (right == null) { 292 return 1; 293 } 294 // Same table name. 295 int result = left.getTable().compareTo(right.getTable()); 296 if (result != 0) { 297 return result; 298 } 299 // Compare start keys. 300 result = Bytes.compareTo(left.getStartKey(), right.getStartKey()); 301 if (result != 0) { 302 return result; 303 } 304 // Compare end keys, but flip the operands so parent comes first 305 result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey()); 306 307 return result; 308 } 309 } 310 311 /** 312 * If daughters no longer hold reference to the parents, delete the parent. 313 * @param parent RegionInfo of split offlined parent 314 * @param rowContent Content of <code>parent</code> row in 315 * <code>metaRegionName</code> 316 * @return True if we removed <code>parent</code> from meta table and from 317 * the filesystem. 318 */ 319 boolean cleanParent(final RegionInfo parent, Result rowContent) 320 throws IOException { 321 // Check whether it is a merged region and if it is clean of references. 322 if (MetaTableAccessor.hasMergeRegions(rowContent.rawCells())) { 323 // Wait until clean of merge parent regions first 324 return false; 325 } 326 // Run checks on each daughter split. 327 PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(rowContent); 328 Pair<Boolean, Boolean> a = checkDaughterInFs(parent, daughters.getFirst()); 329 Pair<Boolean, Boolean> b = checkDaughterInFs(parent, daughters.getSecond()); 330 if (hasNoReferences(a) && hasNoReferences(b)) { 331 String daughterA = daughters.getFirst() != null? 332 daughters.getFirst().getShortNameToLog(): "null"; 333 String daughterB = daughters.getSecond() != null? 334 daughters.getSecond().getShortNameToLog(): "null"; 335 LOG.debug("Deleting region " + parent.getShortNameToLog() + 336 " because daughters -- " + daughterA + ", " + daughterB + 337 " -- no longer hold references"); 338 ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor(); 339 pe.submitProcedure(new GCRegionProcedure(pe.getEnvironment(), parent)); 340 // Remove from in-memory states 341 this.services.getAssignmentManager().getRegionStates().deleteRegion(parent); 342 this.services.getServerManager().removeRegion(parent); 343 return true; 344 } 345 return false; 346 } 347 348 /** 349 * @param p A pair where the first boolean says whether or not the daughter 350 * region directory exists in the filesystem and then the second boolean says 351 * whether the daughter has references to the parent. 352 * @return True the passed <code>p</code> signifies no references. 353 */ 354 private boolean hasNoReferences(final Pair<Boolean, Boolean> p) { 355 return !p.getFirst() || !p.getSecond(); 356 } 357 358 /** 359 * Checks if a daughter region -- either splitA or splitB -- still holds 360 * references to parent. 361 * @param parent Parent region 362 * @param daughter Daughter region 363 * @return A pair where the first boolean says whether or not the daughter 364 * region directory exists in the filesystem and then the second boolean says 365 * whether the daughter has references to the parent. 366 */ 367 private Pair<Boolean, Boolean> checkDaughterInFs(final RegionInfo parent, 368 final RegionInfo daughter) 369 throws IOException { 370 if (daughter == null) { 371 return new Pair<>(Boolean.FALSE, Boolean.FALSE); 372 } 373 374 FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); 375 Path rootdir = this.services.getMasterFileSystem().getRootDir(); 376 Path tabledir = FSUtils.getTableDir(rootdir, daughter.getTable()); 377 378 Path daughterRegionDir = new Path(tabledir, daughter.getEncodedName()); 379 380 HRegionFileSystem regionFs; 381 382 try { 383 if (!FSUtils.isExists(fs, daughterRegionDir)) { 384 return new Pair<>(Boolean.FALSE, Boolean.FALSE); 385 } 386 } catch (IOException ioe) { 387 LOG.error("Error trying to determine if daughter region exists, " + 388 "assuming exists and has references", ioe); 389 return new Pair<>(Boolean.TRUE, Boolean.TRUE); 390 } 391 392 boolean references = false; 393 TableDescriptor parentDescriptor = getDescriptor(parent.getTable()); 394 try { 395 regionFs = HRegionFileSystem.openRegionFromFileSystem( 396 this.services.getConfiguration(), fs, tabledir, daughter, true); 397 398 for (ColumnFamilyDescriptor family: parentDescriptor.getColumnFamilies()) { 399 if ((references = regionFs.hasReferences(family.getNameAsString()))) { 400 break; 401 } 402 } 403 } catch (IOException e) { 404 LOG.error("Error trying to determine referenced files from : " + daughter.getEncodedName() 405 + ", to: " + parent.getEncodedName() + " assuming has references", e); 406 return new Pair<>(Boolean.TRUE, Boolean.TRUE); 407 } 408 return new Pair<>(Boolean.TRUE, references); 409 } 410 411 private TableDescriptor getDescriptor(final TableName tableName) throws IOException { 412 return this.services.getTableDescriptors().get(tableName); 413 } 414 415 /** 416 * Checks if the specified region has merge qualifiers, if so, try to clean them. 417 * @return true if no info:merge* columns; i.e. the specified region doesn't have 418 * any merge qualifiers. 419 */ 420 public boolean cleanMergeQualifier(final RegionInfo region) throws IOException { 421 // Get merge regions if it is a merged region and already has merge qualifier 422 List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(this.services.getConnection(), 423 region.getRegionName()); 424 if (parents == null || parents.isEmpty()) { 425 // It doesn't have merge qualifier, no need to clean 426 return true; 427 } 428 return cleanMergeRegion(region, parents); 429 } 430 431 /** 432 * Report made by ReportMakingVisitor 433 */ 434 public static class Report { 435 private final long now = EnvironmentEdgeManager.currentTime(); 436 437 // Keep Map of found split parents. These are candidates for cleanup. 438 // Use a comparator that has split parents come before its daughters. 439 final Map<RegionInfo, Result> splitParents = new TreeMap<>(new SplitParentFirstComparator()); 440 final Map<RegionInfo, Result> mergedRegions = new TreeMap<>(RegionInfo.COMPARATOR); 441 int count = 0; 442 443 private final List<Pair<RegionInfo, RegionInfo>> holes = new ArrayList<>(); 444 private final List<Pair<RegionInfo, RegionInfo>> overlaps = new ArrayList<>(); 445 446 /** 447 * TODO: If CatalogJanitor finds an 'Unknown Server', it should 'fix' it by queuing 448 * a {@link org.apache.hadoop.hbase.master.procedure.HBCKServerCrashProcedure} for 449 * found server for it to clean up meta. 450 */ 451 private final List<Pair<RegionInfo, ServerName>> unknownServers = new ArrayList<>(); 452 453 private final List<byte []> emptyRegionInfo = new ArrayList<>(); 454 455 @VisibleForTesting 456 Report() {} 457 458 public long getCreateTime() { 459 return this.now; 460 } 461 462 public List<Pair<RegionInfo, RegionInfo>> getHoles() { 463 return this.holes; 464 } 465 466 /** 467 * @return Overlap pairs found as we scanned hbase:meta; ordered by hbase:meta 468 * table sort. Pairs of overlaps may have overlap with subsequent pairs. 469 * @see MetaFixer#calculateMerges(int, List) where we aggregate overlaps 470 * for a single 'merge' call. 471 */ 472 public List<Pair<RegionInfo, RegionInfo>> getOverlaps() { 473 return this.overlaps; 474 } 475 476 public List<Pair<RegionInfo, ServerName>> getUnknownServers() { 477 return unknownServers; 478 } 479 480 public List<byte[]> getEmptyRegionInfo() { 481 return emptyRegionInfo; 482 } 483 484 /** 485 * @return True if an 'empty' lastReport -- no problems found. 486 */ 487 public boolean isEmpty() { 488 return this.holes.isEmpty() && this.overlaps.isEmpty() && this.unknownServers.isEmpty() && 489 this.emptyRegionInfo.isEmpty(); 490 } 491 492 @Override 493 public String toString() { 494 StringBuilder sb = new StringBuilder(); 495 for (Pair<RegionInfo, RegionInfo> p: this.holes) { 496 if (sb.length() > 0) { 497 sb.append(", "); 498 } 499 sb.append("hole=").append(p.getFirst().getRegionNameAsString()).append("/"). 500 append(p.getSecond().getRegionNameAsString()); 501 } 502 for (Pair<RegionInfo, RegionInfo> p: this.overlaps) { 503 if (sb.length() > 0) { 504 sb.append(", "); 505 } 506 sb.append("overlap=").append(p.getFirst().getRegionNameAsString()).append("/"). 507 append(p.getSecond().getRegionNameAsString()); 508 } 509 for (byte [] r: this.emptyRegionInfo) { 510 if (sb.length() > 0) { 511 sb.append(", "); 512 } 513 sb.append("empty=").append(Bytes.toStringBinary(r)); 514 } 515 for (Pair<RegionInfo, ServerName> p: this.unknownServers) { 516 if (sb.length() > 0) { 517 sb.append(", "); 518 } 519 sb.append("unknown_server=").append(p.getSecond()).append("/"). 520 append(p.getFirst().getRegionNameAsString()); 521 } 522 return sb.toString(); 523 } 524 } 525 526 /** 527 * Visitor we use in here in CatalogJanitor to go against hbase:meta table. 528 * Generates a Report made of a collection of split parents and counts of rows 529 * in the hbase:meta table. Also runs hbase:meta consistency checks to 530 * generate more report. Report is NOT ready until after this visitor has been 531 * {@link #close()}'d. 532 */ 533 static class ReportMakingVisitor implements MetaTableAccessor.CloseableVisitor { 534 private final MasterServices services; 535 private volatile boolean closed; 536 537 /** 538 * Report is not done until after the close has been called. 539 * @see #close() 540 * @see #getReport() 541 */ 542 private Report report = new Report(); 543 544 /** 545 * RegionInfo from previous row. 546 */ 547 private RegionInfo previous = null; 548 549 /** 550 * Keep account of the highest end key seen as we move through hbase:meta. 551 * Usually, the current RegionInfo has the highest end key but if an overlap, 552 * this may no longer hold. An overlap may be a region with startkey 'd' and 553 * endkey 'g'. The next region in meta may be 'e' to 'f' and then 'f' to 'g'. 554 * Looking at previous and current meta row, we won't know about the 'd' to 'g' 555 * overlap unless we keep a running 'highest-endpoint-seen'. 556 */ 557 private RegionInfo highestEndKeyRegionInfo = null; 558 559 ReportMakingVisitor(MasterServices services) { 560 this.services = services; 561 } 562 563 /** 564 * Do not call until after {@link #close()}. 565 * Will throw a {@link RuntimeException} if you do. 566 */ 567 Report getReport() { 568 if (!this.closed) { 569 throw new RuntimeException("Report not ready until after close()"); 570 } 571 return this.report; 572 } 573 574 @Override 575 public boolean visit(Result r) { 576 if (r == null || r.isEmpty()) { 577 return true; 578 } 579 this.report.count++; 580 RegionInfo regionInfo = null; 581 try { 582 regionInfo = metaTableConsistencyCheck(r); 583 } catch(Throwable t) { 584 LOG.warn("Failed consistency check on {}", Bytes.toStringBinary(r.getRow()), t); 585 } 586 if (regionInfo != null) { 587 LOG.trace(regionInfo.toString()); 588 if (regionInfo.isSplitParent()) { // splitParent means split and offline. 589 this.report.splitParents.put(regionInfo, r); 590 } 591 if (MetaTableAccessor.hasMergeRegions(r.rawCells())) { 592 this.report.mergedRegions.put(regionInfo, r); 593 } 594 } 595 // Returning true means "keep scanning" 596 return true; 597 } 598 599 /** 600 * Check row. 601 * @param metaTableRow Row from hbase:meta table. 602 * @return Returns default regioninfo found in row parse as a convenience to save 603 * on having to do a double-parse of Result. 604 */ 605 private RegionInfo metaTableConsistencyCheck(Result metaTableRow) { 606 RegionInfo ri; 607 // Locations comes back null if the RegionInfo field is empty. 608 // If locations is null, ensure the regioninfo is for sure empty before progressing. 609 // If really empty, report as missing regioninfo! Otherwise, can run server check 610 // and get RegionInfo from locations. 611 RegionLocations locations = MetaTableAccessor.getRegionLocations(metaTableRow); 612 if (locations == null) { 613 ri = MetaTableAccessor.getRegionInfo(metaTableRow, 614 MetaTableAccessor.getRegionInfoColumn()); 615 } else { 616 ri = locations.getDefaultRegionLocation().getRegion(); 617 checkServer(locations); 618 } 619 620 if (ri == null) { 621 this.report.emptyRegionInfo.add(metaTableRow.getRow()); 622 return ri; 623 } 624 625 if (!Bytes.equals(metaTableRow.getRow(), ri.getRegionName())) { 626 LOG.warn("INCONSISTENCY: Row name is not equal to serialized info:regioninfo content; " + 627 "row={} {}; See if RegionInfo is referenced in another hbase:meta row? Delete?", 628 Bytes.toStringBinary(metaTableRow.getRow()), ri.getRegionNameAsString()); 629 return null; 630 } 631 // Skip split parent region 632 if (ri.isSplitParent()) { 633 return ri; 634 } 635 // If table is disabled, skip integrity check. 636 if (!isTableDisabled(ri)) { 637 if (isTableTransition(ri)) { 638 // On table transition, look to see if last region was last in table 639 // and if this is the first. Report 'hole' if neither is true. 640 // HBCK1 used to have a special category for missing start or end keys. 641 // We'll just lump them in as 'holes'. 642 if ((this.previous != null && !this.previous.isLast()) || !ri.isFirst()) { 643 addHole(this.previous == null? RegionInfo.UNDEFINED: this.previous, ri); 644 } 645 } else { 646 if (!this.previous.isNext(ri)) { 647 if (this.previous.isOverlap(ri)) { 648 addOverlap(this.previous, ri); 649 } else if (ri.isOverlap(this.highestEndKeyRegionInfo)) { 650 // We may have seen a region a few rows back that overlaps this one. 651 addOverlap(this.highestEndKeyRegionInfo, ri); 652 } else { 653 addHole(this.previous, ri); 654 } 655 } else if (ri.isOverlap(this.highestEndKeyRegionInfo)) { 656 // We may have seen a region a few rows back that overlaps this one 657 // even though it properly 'follows' the region just before. 658 addOverlap(this.highestEndKeyRegionInfo, ri); 659 } 660 } 661 } 662 this.previous = ri; 663 this.highestEndKeyRegionInfo = 664 MetaFixer.getRegionInfoWithLargestEndKey(this.highestEndKeyRegionInfo, ri); 665 return ri; 666 } 667 668 private void addOverlap(RegionInfo a, RegionInfo b) { 669 this.report.overlaps.add(new Pair<>(a, b)); 670 } 671 672 private void addHole(RegionInfo a, RegionInfo b) { 673 this.report.holes.add(new Pair<>(a, b)); 674 } 675 676 /** 677 * @return True if table is disabled or disabling; defaults false! 678 */ 679 boolean isTableDisabled(RegionInfo ri) { 680 if (ri == null) { 681 return false; 682 } 683 if (this.services == null) { 684 return false; 685 } 686 if (this.services.getTableStateManager() == null) { 687 return false; 688 } 689 TableState state = null; 690 try { 691 state = this.services.getTableStateManager().getTableState(ri.getTable()); 692 } catch (IOException e) { 693 LOG.warn("Failed getting table state", e); 694 } 695 return state != null && state.isDisabledOrDisabling(); 696 } 697 698 /** 699 * Run through referenced servers and save off unknown and the dead. 700 */ 701 private void checkServer(RegionLocations locations) { 702 if (this.services == null) { 703 // Can't do this test if no services. 704 return; 705 } 706 if (locations == null) { 707 return; 708 } 709 if (locations.getRegionLocations() == null) { 710 return; 711 } 712 // Check referenced servers are known/online. Here we are looking 713 // at both the default replica -- the main replica -- and then replica 714 // locations too. 715 for (HRegionLocation location: locations.getRegionLocations()) { 716 if (location == null) { 717 continue; 718 } 719 ServerName sn = location.getServerName(); 720 if (sn == null) { 721 continue; 722 } 723 if (location.getRegion() == null) { 724 LOG.warn("Empty RegionInfo in {}", location); 725 // This should never happen but if it does, will mess up below. 726 continue; 727 } 728 RegionInfo ri = location.getRegion(); 729 // Skip split parent region 730 if (ri.isSplitParent()) { 731 continue; 732 } 733 // skip the offline regions which belong to disabled table. 734 if (isTableDisabled(ri)) { 735 continue; 736 } 737 RegionState rs = this.services.getAssignmentManager().getRegionStates().getRegionState(ri); 738 if (rs.isClosedOrAbnormallyClosed()) { 739 // If closed against an 'Unknown Server', that is should be fine. 740 continue; 741 } 742 ServerManager.ServerLiveState state = this.services.getServerManager(). 743 isServerKnownAndOnline(sn); 744 switch (state) { 745 case UNKNOWN: 746 this.report.unknownServers.add(new Pair<>(ri, sn)); 747 break; 748 749 default: 750 break; 751 } 752 } 753 } 754 755 /** 756 * @return True iff first row in hbase:meta or if we've broached a new table in hbase:meta 757 */ 758 private boolean isTableTransition(RegionInfo ri) { 759 return this.previous == null || 760 !this.previous.getTable().equals(ri.getTable()); 761 } 762 763 @Override 764 public void close() throws IOException { 765 // This is a table transition... after the last region. Check previous. 766 // Should be last region. If not, its a hole on end of laster table. 767 if (this.previous != null && !this.previous.isLast()) { 768 addHole(this.previous, RegionInfo.UNDEFINED); 769 } 770 this.closed = true; 771 } 772 } 773 774 private static void checkLog4jProperties() { 775 String filename = "log4j.properties"; 776 try { 777 final InputStream inStream = 778 CatalogJanitor.class.getClassLoader().getResourceAsStream(filename); 779 if (inStream != null) { 780 new Properties().load(inStream); 781 } else { 782 System.out.println("No " + filename + " on classpath; Add one else no logging output!"); 783 } 784 } catch (IOException e) { 785 LOG.error("Log4j check failed", e); 786 } 787 } 788 789 /** 790 * For testing against a cluster. 791 * Doesn't have a MasterServices context so does not report on good vs bad servers. 792 */ 793 public static void main(String [] args) throws IOException { 794 checkLog4jProperties(); 795 ReportMakingVisitor visitor = new ReportMakingVisitor(null); 796 Configuration configuration = HBaseConfiguration.create(); 797 configuration.setBoolean("hbase.defaults.for.version.skip", true); 798 try (Connection connection = ConnectionFactory.createConnection(configuration)) { 799 /* Used to generate an overlap. 800 */ 801 Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0.")); 802 g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); 803 try (Table t = connection.getTable(TableName.META_TABLE_NAME)) { 804 Result r = t.get(g); 805 byte [] row = g.getRow(); 806 row[row.length - 2] <<= row[row.length - 2]; 807 Put p = new Put(g.getRow()); 808 p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, 809 r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER)); 810 t.put(p); 811 } 812 MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null); 813 Report report = visitor.getReport(); 814 LOG.info(report != null? report.toString(): "empty"); 815 } 816 } 817}