001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import java.io.IOException; 021import java.io.InputStream; 022import java.util.ArrayList; 023import java.util.Comparator; 024import java.util.HashSet; 025import java.util.List; 026import java.util.Map; 027import java.util.Properties; 028import java.util.TreeMap; 029import java.util.concurrent.atomic.AtomicBoolean; 030import java.util.stream.Collectors; 031import org.apache.hadoop.conf.Configuration; 032import org.apache.hadoop.fs.FileSystem; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.hbase.HBaseConfiguration; 035import org.apache.hadoop.hbase.HConstants; 036import org.apache.hadoop.hbase.HRegionLocation; 037import org.apache.hadoop.hbase.MetaTableAccessor; 038import org.apache.hadoop.hbase.RegionLocations; 039import org.apache.hadoop.hbase.ScheduledChore; 040import org.apache.hadoop.hbase.ServerName; 041import org.apache.hadoop.hbase.TableName; 042import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 043import org.apache.hadoop.hbase.client.Connection; 044import org.apache.hadoop.hbase.client.ConnectionFactory; 045import org.apache.hadoop.hbase.client.Get; 046import org.apache.hadoop.hbase.client.Put; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.Result; 049import org.apache.hadoop.hbase.client.Table; 050import org.apache.hadoop.hbase.client.TableDescriptor; 051import org.apache.hadoop.hbase.client.TableState; 052import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 053import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure; 054import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; 055import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 056import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 057import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 058import org.apache.hadoop.hbase.util.Bytes; 059import org.apache.hadoop.hbase.util.CommonFSUtils; 060import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 061import org.apache.hadoop.hbase.util.Pair; 062import org.apache.hadoop.hbase.util.PairOfSameType; 063import org.apache.hadoop.hbase.util.Threads; 064import org.apache.yetus.audience.InterfaceAudience; 065import org.slf4j.Logger; 066import org.slf4j.LoggerFactory; 067 068import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 069 070/** 071 * A janitor for the catalog tables. Scans the <code>hbase:meta</code> catalog 072 * table on a period. Makes a lastReport on state of hbase:meta. Looks for unused 073 * regions to garbage collect. Scan of hbase:meta runs if we are NOT in maintenance 074 * mode, if we are NOT shutting down, AND if the assignmentmanager is loaded. 075 * Playing it safe, we will garbage collect no-longer needed region references 076 * only if there are no regions-in-transition (RIT). 077 */ 078// TODO: Only works with single hbase:meta region currently. Fix. 079// TODO: Should it start over every time? Could it continue if runs into problem? Only if 080// problem does not mess up 'results'. 081// TODO: Do more by way of 'repair'; see note on unknownServers below. 082@InterfaceAudience.Private 083public class CatalogJanitor extends ScheduledChore { 084 private static final Logger LOG = LoggerFactory.getLogger(CatalogJanitor.class.getName()); 085 private final AtomicBoolean alreadyRunning = new AtomicBoolean(false); 086 private final AtomicBoolean enabled = new AtomicBoolean(true); 087 private final MasterServices services; 088 089 /** 090 * Saved report from last hbase:meta scan to completion. May be stale if having trouble 091 * completing scan. Check its date. 092 */ 093 private volatile Report lastReport; 094 095 CatalogJanitor(final MasterServices services) { 096 super("CatalogJanitor-" + services.getServerName().toShortString(), services, 097 services.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000)); 098 this.services = services; 099 } 100 101 @Override 102 protected boolean initialChore() { 103 try { 104 if (getEnabled()) { 105 scan(); 106 } 107 } catch (IOException e) { 108 LOG.warn("Failed initial janitorial scan of hbase:meta table", e); 109 return false; 110 } 111 return true; 112 } 113 114 boolean setEnabled(final boolean enabled) { 115 boolean alreadyEnabled = this.enabled.getAndSet(enabled); 116 // If disabling is requested on an already enabled chore, we could have an active 117 // scan still going on, callers might not be aware of that and do further action thinkng 118 // that no action would be from this chore. In this case, the right action is to wait for 119 // the active scan to complete before exiting this function. 120 if (!enabled && alreadyEnabled) { 121 while (alreadyRunning.get()) { 122 Threads.sleepWithoutInterrupt(100); 123 } 124 } 125 return alreadyEnabled; 126 } 127 128 boolean getEnabled() { 129 return this.enabled.get(); 130 } 131 132 @Override 133 protected void chore() { 134 try { 135 AssignmentManager am = this.services.getAssignmentManager(); 136 if (getEnabled() && !this.services.isInMaintenanceMode() && 137 !this.services.getServerManager().isClusterShutdown() && 138 isMetaLoaded(am)) { 139 scan(); 140 } else { 141 LOG.warn("CatalogJanitor is disabled! Enabled=" + getEnabled() + 142 ", maintenanceMode=" + this.services.isInMaintenanceMode() + ", am=" + am + 143 ", metaLoaded=" + isMetaLoaded(am) + ", hasRIT=" + isRIT(am) + 144 " clusterShutDown=" + this.services.getServerManager().isClusterShutdown()); 145 } 146 } catch (IOException e) { 147 LOG.warn("Failed janitorial scan of hbase:meta table", e); 148 } 149 } 150 151 private static boolean isMetaLoaded(AssignmentManager am) { 152 return am != null && am.isMetaLoaded(); 153 } 154 155 private static boolean isRIT(AssignmentManager am) { 156 return isMetaLoaded(am) && am.hasRegionsInTransition(); 157 } 158 159 /** 160 * Run janitorial scan of catalog <code>hbase:meta</code> table looking for 161 * garbage to collect. 162 * @return How many items gc'd whether for merge or split. 163 */ 164 int scan() throws IOException { 165 int gcs = 0; 166 try { 167 if (!alreadyRunning.compareAndSet(false, true)) { 168 LOG.debug("CatalogJanitor already running"); 169 return gcs; 170 } 171 this.lastReport = scanForReport(); 172 if (!this.lastReport.isEmpty()) { 173 LOG.warn(this.lastReport.toString()); 174 } 175 176 if (isRIT(this.services.getAssignmentManager())) { 177 LOG.warn("Playing-it-safe skipping merge/split gc'ing of regions from hbase:meta while " + 178 "regions-in-transition (RIT)"); 179 } 180 Map<RegionInfo, Result> mergedRegions = this.lastReport.mergedRegions; 181 for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) { 182 if (this.services.isInMaintenanceMode()) { 183 // Stop cleaning if the master is in maintenance mode 184 break; 185 } 186 187 List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells()); 188 if (parents != null && cleanMergeRegion(e.getKey(), parents)) { 189 gcs++; 190 } 191 } 192 // Clean split parents 193 Map<RegionInfo, Result> splitParents = this.lastReport.splitParents; 194 195 // Now work on our list of found parents. See if any we can clean up. 196 HashSet<String> parentNotCleaned = new HashSet<>(); 197 for (Map.Entry<RegionInfo, Result> e : splitParents.entrySet()) { 198 if (this.services.isInMaintenanceMode()) { 199 // Stop cleaning if the master is in maintenance mode 200 break; 201 } 202 203 if (!parentNotCleaned.contains(e.getKey().getEncodedName()) && 204 cleanParent(e.getKey(), e.getValue())) { 205 gcs++; 206 } else { 207 // We could not clean the parent, so it's daughters should not be 208 // cleaned either (HBASE-6160) 209 PairOfSameType<RegionInfo> daughters = 210 MetaTableAccessor.getDaughterRegions(e.getValue()); 211 parentNotCleaned.add(daughters.getFirst().getEncodedName()); 212 parentNotCleaned.add(daughters.getSecond().getEncodedName()); 213 } 214 } 215 return gcs; 216 } finally { 217 alreadyRunning.set(false); 218 } 219 } 220 221 /** 222 * Scan hbase:meta. 223 * @return Return generated {@link Report} 224 */ 225 Report scanForReport() throws IOException { 226 ReportMakingVisitor visitor = new ReportMakingVisitor(this.services); 227 // Null tablename means scan all of meta. 228 MetaTableAccessor.scanMetaForTableRegions(this.services.getConnection(), visitor, null); 229 return visitor.getReport(); 230 } 231 232 /** 233 * @return Returns last published Report that comes of last successful scan 234 * of hbase:meta. 235 */ 236 public Report getLastReport() { 237 return this.lastReport; 238 } 239 240 /** 241 * If merged region no longer holds reference to the merge regions, archive 242 * merge region on hdfs and perform deleting references in hbase:meta 243 * @return true if we delete references in merged region on hbase:meta and archive 244 * the files on the file system 245 */ 246 private boolean cleanMergeRegion(final RegionInfo mergedRegion, List<RegionInfo> parents) 247 throws IOException { 248 FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); 249 Path rootdir = this.services.getMasterFileSystem().getRootDir(); 250 Path tabledir = CommonFSUtils.getTableDir(rootdir, mergedRegion.getTable()); 251 TableDescriptor htd = getDescriptor(mergedRegion.getTable()); 252 HRegionFileSystem regionFs = null; 253 try { 254 regionFs = HRegionFileSystem.openRegionFromFileSystem( 255 this.services.getConfiguration(), fs, tabledir, mergedRegion, true); 256 } catch (IOException e) { 257 LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName()); 258 } 259 if (regionFs == null || !regionFs.hasReferences(htd)) { 260 LOG.debug("Deleting parents ({}) from fs; merged child {} no longer holds references", 261 parents.stream().map(r -> RegionInfo.getShortNameToLog(r)). 262 collect(Collectors.joining(", ")), 263 mergedRegion); 264 ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor(); 265 pe.submitProcedure(new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), 266 mergedRegion, parents)); 267 for (RegionInfo ri: parents) { 268 // The above scheduled GCMultipleMergedRegionsProcedure does the below. 269 // Do we need this? 270 this.services.getAssignmentManager().getRegionStates().deleteRegion(ri); 271 this.services.getServerManager().removeRegion(ri); 272 } 273 return true; 274 } 275 return false; 276 } 277 278 /** 279 * Compare HRegionInfos in a way that has split parents sort BEFORE their daughters. 280 */ 281 static class SplitParentFirstComparator implements Comparator<RegionInfo> { 282 Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator(); 283 @Override 284 public int compare(RegionInfo left, RegionInfo right) { 285 // This comparator differs from the one RegionInfo in that it sorts 286 // parent before daughters. 287 if (left == null) { 288 return -1; 289 } 290 if (right == null) { 291 return 1; 292 } 293 // Same table name. 294 int result = left.getTable().compareTo(right.getTable()); 295 if (result != 0) { 296 return result; 297 } 298 // Compare start keys. 299 result = Bytes.compareTo(left.getStartKey(), right.getStartKey()); 300 if (result != 0) { 301 return result; 302 } 303 // Compare end keys, but flip the operands so parent comes first 304 result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey()); 305 306 return result; 307 } 308 } 309 310 /** 311 * If daughters no longer hold reference to the parents, delete the parent. 312 * @param parent RegionInfo of split offlined parent 313 * @param rowContent Content of <code>parent</code> row in 314 * <code>metaRegionName</code> 315 * @return True if we removed <code>parent</code> from meta table and from 316 * the filesystem. 317 */ 318 boolean cleanParent(final RegionInfo parent, Result rowContent) 319 throws IOException { 320 // Check whether it is a merged region and if it is clean of references. 321 if (MetaTableAccessor.hasMergeRegions(rowContent.rawCells())) { 322 // Wait until clean of merge parent regions first 323 return false; 324 } 325 // Run checks on each daughter split. 326 PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(rowContent); 327 Pair<Boolean, Boolean> a = checkDaughterInFs(parent, daughters.getFirst()); 328 Pair<Boolean, Boolean> b = checkDaughterInFs(parent, daughters.getSecond()); 329 if (hasNoReferences(a) && hasNoReferences(b)) { 330 String daughterA = daughters.getFirst() != null? 331 daughters.getFirst().getShortNameToLog(): "null"; 332 String daughterB = daughters.getSecond() != null? 333 daughters.getSecond().getShortNameToLog(): "null"; 334 LOG.debug("Deleting region " + parent.getShortNameToLog() + 335 " because daughters -- " + daughterA + ", " + daughterB + 336 " -- no longer hold references"); 337 ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor(); 338 pe.submitProcedure(new GCRegionProcedure(pe.getEnvironment(), parent)); 339 // Remove from in-memory states 340 this.services.getAssignmentManager().getRegionStates().deleteRegion(parent); 341 this.services.getServerManager().removeRegion(parent); 342 return true; 343 } 344 return false; 345 } 346 347 /** 348 * @param p A pair where the first boolean says whether or not the daughter 349 * region directory exists in the filesystem and then the second boolean says 350 * whether the daughter has references to the parent. 351 * @return True the passed <code>p</code> signifies no references. 352 */ 353 private boolean hasNoReferences(final Pair<Boolean, Boolean> p) { 354 return !p.getFirst() || !p.getSecond(); 355 } 356 357 /** 358 * Checks if a daughter region -- either splitA or splitB -- still holds 359 * references to parent. 360 * @param parent Parent region 361 * @param daughter Daughter region 362 * @return A pair where the first boolean says whether or not the daughter 363 * region directory exists in the filesystem and then the second boolean says 364 * whether the daughter has references to the parent. 365 */ 366 private Pair<Boolean, Boolean> checkDaughterInFs(final RegionInfo parent, 367 final RegionInfo daughter) 368 throws IOException { 369 if (daughter == null) { 370 return new Pair<>(Boolean.FALSE, Boolean.FALSE); 371 } 372 373 FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); 374 Path rootdir = this.services.getMasterFileSystem().getRootDir(); 375 Path tabledir = CommonFSUtils.getTableDir(rootdir, daughter.getTable()); 376 377 Path daughterRegionDir = new Path(tabledir, daughter.getEncodedName()); 378 379 HRegionFileSystem regionFs; 380 381 try { 382 if (!CommonFSUtils.isExists(fs, daughterRegionDir)) { 383 return new Pair<>(Boolean.FALSE, Boolean.FALSE); 384 } 385 } catch (IOException ioe) { 386 LOG.error("Error trying to determine if daughter region exists, " + 387 "assuming exists and has references", ioe); 388 return new Pair<>(Boolean.TRUE, Boolean.TRUE); 389 } 390 391 boolean references = false; 392 TableDescriptor parentDescriptor = getDescriptor(parent.getTable()); 393 try { 394 regionFs = HRegionFileSystem.openRegionFromFileSystem( 395 this.services.getConfiguration(), fs, tabledir, daughter, true); 396 397 for (ColumnFamilyDescriptor family: parentDescriptor.getColumnFamilies()) { 398 if ((references = regionFs.hasReferences(family.getNameAsString()))) { 399 break; 400 } 401 } 402 } catch (IOException e) { 403 LOG.error("Error trying to determine referenced files from : " + daughter.getEncodedName() 404 + ", to: " + parent.getEncodedName() + " assuming has references", e); 405 return new Pair<>(Boolean.TRUE, Boolean.TRUE); 406 } 407 return new Pair<>(Boolean.TRUE, references); 408 } 409 410 private TableDescriptor getDescriptor(final TableName tableName) throws IOException { 411 return this.services.getTableDescriptors().get(tableName); 412 } 413 414 /** 415 * Checks if the specified region has merge qualifiers, if so, try to clean them. 416 * @return true if no info:merge* columns; i.e. the specified region doesn't have 417 * any merge qualifiers. 418 */ 419 public boolean cleanMergeQualifier(final RegionInfo region) throws IOException { 420 // Get merge regions if it is a merged region and already has merge qualifier 421 List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(this.services.getConnection(), 422 region.getRegionName()); 423 if (parents == null || parents.isEmpty()) { 424 // It doesn't have merge qualifier, no need to clean 425 return true; 426 } 427 428 // If a parent region is a merged child region and GC has not kicked in/finish its work yet, 429 // return false in this case to avoid kicking in a merge, trying later. 430 cleanMergeRegion(region, parents); 431 return false; 432 } 433 434 /** 435 * Report made by ReportMakingVisitor 436 */ 437 public static class Report { 438 private final long now = EnvironmentEdgeManager.currentTime(); 439 440 // Keep Map of found split parents. These are candidates for cleanup. 441 // Use a comparator that has split parents come before its daughters. 442 final Map<RegionInfo, Result> splitParents = new TreeMap<>(new SplitParentFirstComparator()); 443 final Map<RegionInfo, Result> mergedRegions = new TreeMap<>(RegionInfo.COMPARATOR); 444 int count = 0; 445 446 private final List<Pair<RegionInfo, RegionInfo>> holes = new ArrayList<>(); 447 private final List<Pair<RegionInfo, RegionInfo>> overlaps = new ArrayList<>(); 448 449 /** 450 * TODO: If CatalogJanitor finds an 'Unknown Server', it should 'fix' it by queuing 451 * a {@link org.apache.hadoop.hbase.master.procedure.HBCKServerCrashProcedure} for 452 * found server for it to clean up meta. 453 */ 454 private final List<Pair<RegionInfo, ServerName>> unknownServers = new ArrayList<>(); 455 456 private final List<byte []> emptyRegionInfo = new ArrayList<>(); 457 458 @VisibleForTesting 459 Report() {} 460 461 public long getCreateTime() { 462 return this.now; 463 } 464 465 public List<Pair<RegionInfo, RegionInfo>> getHoles() { 466 return this.holes; 467 } 468 469 /** 470 * @return Overlap pairs found as we scanned hbase:meta; ordered by hbase:meta 471 * table sort. Pairs of overlaps may have overlap with subsequent pairs. 472 * @see MetaFixer#calculateMerges(int, List) where we aggregate overlaps 473 * for a single 'merge' call. 474 */ 475 public List<Pair<RegionInfo, RegionInfo>> getOverlaps() { 476 return this.overlaps; 477 } 478 479 public Map<RegionInfo, Result> getMergedRegions() { 480 return this.mergedRegions; 481 } 482 483 public List<Pair<RegionInfo, ServerName>> getUnknownServers() { 484 return unknownServers; 485 } 486 487 public List<byte[]> getEmptyRegionInfo() { 488 return emptyRegionInfo; 489 } 490 491 /** 492 * @return True if an 'empty' lastReport -- no problems found. 493 */ 494 public boolean isEmpty() { 495 return this.holes.isEmpty() && this.overlaps.isEmpty() && this.unknownServers.isEmpty() && 496 this.emptyRegionInfo.isEmpty(); 497 } 498 499 @Override 500 public String toString() { 501 StringBuilder sb = new StringBuilder(); 502 for (Pair<RegionInfo, RegionInfo> p: this.holes) { 503 if (sb.length() > 0) { 504 sb.append(", "); 505 } 506 sb.append("hole=").append(p.getFirst().getRegionNameAsString()).append("/"). 507 append(p.getSecond().getRegionNameAsString()); 508 } 509 for (Pair<RegionInfo, RegionInfo> p: this.overlaps) { 510 if (sb.length() > 0) { 511 sb.append(", "); 512 } 513 sb.append("overlap=").append(p.getFirst().getRegionNameAsString()).append("/"). 514 append(p.getSecond().getRegionNameAsString()); 515 } 516 for (byte [] r: this.emptyRegionInfo) { 517 if (sb.length() > 0) { 518 sb.append(", "); 519 } 520 sb.append("empty=").append(Bytes.toStringBinary(r)); 521 } 522 for (Pair<RegionInfo, ServerName> p: this.unknownServers) { 523 if (sb.length() > 0) { 524 sb.append(", "); 525 } 526 sb.append("unknown_server=").append(p.getSecond()).append("/"). 527 append(p.getFirst().getRegionNameAsString()); 528 } 529 return sb.toString(); 530 } 531 } 532 533 /** 534 * Visitor we use in here in CatalogJanitor to go against hbase:meta table. 535 * Generates a Report made of a collection of split parents and counts of rows 536 * in the hbase:meta table. Also runs hbase:meta consistency checks to 537 * generate more report. Report is NOT ready until after this visitor has been 538 * {@link #close()}'d. 539 */ 540 static class ReportMakingVisitor implements MetaTableAccessor.CloseableVisitor { 541 private final MasterServices services; 542 private volatile boolean closed; 543 544 /** 545 * Report is not done until after the close has been called. 546 * @see #close() 547 * @see #getReport() 548 */ 549 private Report report = new Report(); 550 551 /** 552 * RegionInfo from previous row. 553 */ 554 private RegionInfo previous = null; 555 556 /** 557 * Keep account of the highest end key seen as we move through hbase:meta. 558 * Usually, the current RegionInfo has the highest end key but if an overlap, 559 * this may no longer hold. An overlap may be a region with startkey 'd' and 560 * endkey 'g'. The next region in meta may be 'e' to 'f' and then 'f' to 'g'. 561 * Looking at previous and current meta row, we won't know about the 'd' to 'g' 562 * overlap unless we keep a running 'highest-endpoint-seen'. 563 */ 564 private RegionInfo highestEndKeyRegionInfo = null; 565 566 ReportMakingVisitor(MasterServices services) { 567 this.services = services; 568 } 569 570 /** 571 * Do not call until after {@link #close()}. 572 * Will throw a {@link RuntimeException} if you do. 573 */ 574 Report getReport() { 575 if (!this.closed) { 576 throw new RuntimeException("Report not ready until after close()"); 577 } 578 return this.report; 579 } 580 581 @Override 582 public boolean visit(Result r) { 583 if (r == null || r.isEmpty()) { 584 return true; 585 } 586 this.report.count++; 587 RegionInfo regionInfo = null; 588 try { 589 regionInfo = metaTableConsistencyCheck(r); 590 } catch(Throwable t) { 591 LOG.warn("Failed consistency check on {}", Bytes.toStringBinary(r.getRow()), t); 592 } 593 if (regionInfo != null) { 594 LOG.trace(regionInfo.toString()); 595 if (regionInfo.isSplitParent()) { // splitParent means split and offline. 596 this.report.splitParents.put(regionInfo, r); 597 } 598 if (MetaTableAccessor.hasMergeRegions(r.rawCells())) { 599 this.report.mergedRegions.put(regionInfo, r); 600 } 601 } 602 // Returning true means "keep scanning" 603 return true; 604 } 605 606 /** 607 * Check row. 608 * @param metaTableRow Row from hbase:meta table. 609 * @return Returns default regioninfo found in row parse as a convenience to save 610 * on having to do a double-parse of Result. 611 */ 612 private RegionInfo metaTableConsistencyCheck(Result metaTableRow) { 613 RegionInfo ri; 614 // Locations comes back null if the RegionInfo field is empty. 615 // If locations is null, ensure the regioninfo is for sure empty before progressing. 616 // If really empty, report as missing regioninfo! Otherwise, can run server check 617 // and get RegionInfo from locations. 618 RegionLocations locations = MetaTableAccessor.getRegionLocations(metaTableRow); 619 if (locations == null) { 620 ri = MetaTableAccessor.getRegionInfo(metaTableRow, 621 MetaTableAccessor.getRegionInfoColumn()); 622 } else { 623 ri = locations.getDefaultRegionLocation().getRegion(); 624 checkServer(locations); 625 } 626 627 if (ri == null) { 628 this.report.emptyRegionInfo.add(metaTableRow.getRow()); 629 return ri; 630 } 631 632 if (!Bytes.equals(metaTableRow.getRow(), ri.getRegionName())) { 633 LOG.warn("INCONSISTENCY: Row name is not equal to serialized info:regioninfo content; " + 634 "row={} {}; See if RegionInfo is referenced in another hbase:meta row? Delete?", 635 Bytes.toStringBinary(metaTableRow.getRow()), ri.getRegionNameAsString()); 636 return null; 637 } 638 // Skip split parent region 639 if (ri.isSplitParent()) { 640 return ri; 641 } 642 // If table is disabled, skip integrity check. 643 if (!isTableDisabled(ri)) { 644 if (isTableTransition(ri)) { 645 // On table transition, look to see if last region was last in table 646 // and if this is the first. Report 'hole' if neither is true. 647 // HBCK1 used to have a special category for missing start or end keys. 648 // We'll just lump them in as 'holes'. 649 if ((this.previous != null && !this.previous.isLast()) || !ri.isFirst()) { 650 addHole(this.previous == null? RegionInfo.UNDEFINED: this.previous, ri); 651 } 652 } else { 653 if (!this.previous.isNext(ri)) { 654 if (this.previous.isOverlap(ri)) { 655 addOverlap(this.previous, ri); 656 } else if (ri.isOverlap(this.highestEndKeyRegionInfo)) { 657 // We may have seen a region a few rows back that overlaps this one. 658 addOverlap(this.highestEndKeyRegionInfo, ri); 659 } else if (!this.highestEndKeyRegionInfo.isNext(ri)) { 660 // Need to check the case if this.highestEndKeyRegionInfo.isNext(ri). If no, 661 // report a hole, otherwise, it is ok. For an example, 662 // previous: [aa, bb), ri: [cc, dd), highestEndKeyRegionInfo: [a, cc) 663 // In this case, it should not report a hole, as highestEndKeyRegionInfo covers 664 // the hole between previous and ri. 665 addHole(this.previous, ri); 666 } 667 } else if (ri.isOverlap(this.highestEndKeyRegionInfo)) { 668 // We may have seen a region a few rows back that overlaps this one 669 // even though it properly 'follows' the region just before. 670 addOverlap(this.highestEndKeyRegionInfo, ri); 671 } 672 } 673 } 674 this.previous = ri; 675 this.highestEndKeyRegionInfo = 676 MetaFixer.getRegionInfoWithLargestEndKey(this.highestEndKeyRegionInfo, ri); 677 return ri; 678 } 679 680 private void addOverlap(RegionInfo a, RegionInfo b) { 681 this.report.overlaps.add(new Pair<>(a, b)); 682 } 683 684 private void addHole(RegionInfo a, RegionInfo b) { 685 this.report.holes.add(new Pair<>(a, b)); 686 } 687 688 /** 689 * @return True if table is disabled or disabling; defaults false! 690 */ 691 boolean isTableDisabled(RegionInfo ri) { 692 if (ri == null) { 693 return false; 694 } 695 if (this.services == null) { 696 return false; 697 } 698 if (this.services.getTableStateManager() == null) { 699 return false; 700 } 701 TableState state = null; 702 try { 703 state = this.services.getTableStateManager().getTableState(ri.getTable()); 704 } catch (IOException e) { 705 LOG.warn("Failed getting table state", e); 706 } 707 return state != null && state.isDisabledOrDisabling(); 708 } 709 710 /** 711 * Run through referenced servers and save off unknown and the dead. 712 */ 713 private void checkServer(RegionLocations locations) { 714 if (this.services == null) { 715 // Can't do this test if no services. 716 return; 717 } 718 if (locations == null) { 719 return; 720 } 721 if (locations.getRegionLocations() == null) { 722 return; 723 } 724 // Check referenced servers are known/online. Here we are looking 725 // at both the default replica -- the main replica -- and then replica 726 // locations too. 727 for (HRegionLocation location: locations.getRegionLocations()) { 728 if (location == null) { 729 continue; 730 } 731 ServerName sn = location.getServerName(); 732 if (sn == null) { 733 continue; 734 } 735 if (location.getRegion() == null) { 736 LOG.warn("Empty RegionInfo in {}", location); 737 // This should never happen but if it does, will mess up below. 738 continue; 739 } 740 RegionInfo ri = location.getRegion(); 741 // Skip split parent region 742 if (ri.isSplitParent()) { 743 continue; 744 } 745 // skip the offline regions which belong to disabled table. 746 if (isTableDisabled(ri)) { 747 continue; 748 } 749 RegionState rs = this.services.getAssignmentManager().getRegionStates().getRegionState(ri); 750 if (rs == null || rs.isClosedOrAbnormallyClosed()) { 751 // If closed against an 'Unknown Server', that is should be fine. 752 continue; 753 } 754 ServerManager.ServerLiveState state = this.services.getServerManager(). 755 isServerKnownAndOnline(sn); 756 switch (state) { 757 case UNKNOWN: 758 this.report.unknownServers.add(new Pair<>(ri, sn)); 759 break; 760 761 default: 762 break; 763 } 764 } 765 } 766 767 /** 768 * @return True iff first row in hbase:meta or if we've broached a new table in hbase:meta 769 */ 770 private boolean isTableTransition(RegionInfo ri) { 771 return this.previous == null || 772 !this.previous.getTable().equals(ri.getTable()); 773 } 774 775 @Override 776 public void close() throws IOException { 777 // This is a table transition... after the last region. Check previous. 778 // Should be last region. If not, its a hole on end of laster table. 779 if (this.previous != null && !this.previous.isLast()) { 780 addHole(this.previous, RegionInfo.UNDEFINED); 781 } 782 this.closed = true; 783 } 784 } 785 786 private static void checkLog4jProperties() { 787 String filename = "log4j.properties"; 788 try { 789 final InputStream inStream = 790 CatalogJanitor.class.getClassLoader().getResourceAsStream(filename); 791 if (inStream != null) { 792 new Properties().load(inStream); 793 } else { 794 System.out.println("No " + filename + " on classpath; Add one else no logging output!"); 795 } 796 } catch (IOException e) { 797 LOG.error("Log4j check failed", e); 798 } 799 } 800 801 /** 802 * For testing against a cluster. 803 * Doesn't have a MasterServices context so does not report on good vs bad servers. 804 */ 805 public static void main(String [] args) throws IOException { 806 checkLog4jProperties(); 807 ReportMakingVisitor visitor = new ReportMakingVisitor(null); 808 Configuration configuration = HBaseConfiguration.create(); 809 configuration.setBoolean("hbase.defaults.for.version.skip", true); 810 try (Connection connection = ConnectionFactory.createConnection(configuration)) { 811 /* Used to generate an overlap. 812 */ 813 Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0.")); 814 g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); 815 try (Table t = connection.getTable(TableName.META_TABLE_NAME)) { 816 Result r = t.get(g); 817 byte [] row = g.getRow(); 818 row[row.length - 2] <<= row[row.length - 2]; 819 Put p = new Put(g.getRow()); 820 p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, 821 r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER)); 822 t.put(p); 823 } 824 MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null); 825 Report report = visitor.getReport(); 826 LOG.info(report != null? report.toString(): "empty"); 827 } 828 } 829}