001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.ArrayList;
023import java.util.Comparator;
024import java.util.HashSet;
025import java.util.List;
026import java.util.Map;
027import java.util.Properties;
028import java.util.TreeMap;
029import java.util.concurrent.atomic.AtomicBoolean;
030import java.util.stream.Collectors;
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.fs.FileSystem;
033import org.apache.hadoop.fs.Path;
034import org.apache.hadoop.hbase.CatalogFamilyFormat;
035import org.apache.hadoop.hbase.ClientMetaTableAccessor;
036import org.apache.hadoop.hbase.HBaseConfiguration;
037import org.apache.hadoop.hbase.HConstants;
038import org.apache.hadoop.hbase.HRegionLocation;
039import org.apache.hadoop.hbase.MetaTableAccessor;
040import org.apache.hadoop.hbase.RegionLocations;
041import org.apache.hadoop.hbase.ScheduledChore;
042import org.apache.hadoop.hbase.ServerName;
043import org.apache.hadoop.hbase.TableName;
044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
045import org.apache.hadoop.hbase.client.Connection;
046import org.apache.hadoop.hbase.client.ConnectionFactory;
047import org.apache.hadoop.hbase.client.Get;
048import org.apache.hadoop.hbase.client.Put;
049import org.apache.hadoop.hbase.client.RegionInfo;
050import org.apache.hadoop.hbase.client.Result;
051import org.apache.hadoop.hbase.client.Table;
052import org.apache.hadoop.hbase.client.TableDescriptor;
053import org.apache.hadoop.hbase.client.TableState;
054import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
055import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
056import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
057import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
058import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
059import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
060import org.apache.hadoop.hbase.util.Bytes;
061import org.apache.hadoop.hbase.util.CommonFSUtils;
062import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
063import org.apache.hadoop.hbase.util.Pair;
064import org.apache.hadoop.hbase.util.PairOfSameType;
065import org.apache.hadoop.hbase.util.Threads;
066import org.apache.yetus.audience.InterfaceAudience;
067import org.slf4j.Logger;
068import org.slf4j.LoggerFactory;
069
070import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
071
072/**
073 * A janitor for the catalog tables. Scans the <code>hbase:meta</code> catalog
074 * table on a period. Makes a lastReport on state of hbase:meta. Looks for unused
075 * regions to garbage collect. Scan of hbase:meta runs if we are NOT in maintenance
076 * mode, if we are NOT shutting down, AND if the assignmentmanager is loaded.
077 * Playing it safe, we will garbage collect no-longer needed region references
078 * only if there are no regions-in-transition (RIT).
079 */
080// TODO: Only works with single hbase:meta region currently.  Fix.
081// TODO: Should it start over every time? Could it continue if runs into problem? Only if
082// problem does not mess up 'results'.
083// TODO: Do more by way of 'repair'; see note on unknownServers below.
084@InterfaceAudience.Private
085public class CatalogJanitor extends ScheduledChore {
086  private static final Logger LOG = LoggerFactory.getLogger(CatalogJanitor.class.getName());
087  private final AtomicBoolean alreadyRunning = new AtomicBoolean(false);
088  private final AtomicBoolean enabled = new AtomicBoolean(true);
089  private final MasterServices services;
090
091  /**
092   * Saved report from last hbase:meta scan to completion. May be stale if having trouble
093   * completing scan. Check its date.
094   */
095  private volatile Report lastReport;
096
097  CatalogJanitor(final MasterServices services) {
098    super("CatalogJanitor-" + services.getServerName().toShortString(), services,
099      services.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000));
100    this.services = services;
101  }
102
103  @Override
104  protected boolean initialChore() {
105    try {
106      if (getEnabled()) {
107        scan();
108      }
109    } catch (IOException e) {
110      LOG.warn("Failed initial janitorial scan of hbase:meta table", e);
111      return false;
112    }
113    return true;
114  }
115
116  boolean setEnabled(final boolean enabled) {
117    boolean alreadyEnabled = this.enabled.getAndSet(enabled);
118    // If disabling is requested on an already enabled chore, we could have an active
119    // scan still going on, callers might not be aware of that and do further action thinkng
120    // that no action would be from this chore.  In this case, the right action is to wait for
121    // the active scan to complete before exiting this function.
122    if (!enabled && alreadyEnabled) {
123      while (alreadyRunning.get()) {
124        Threads.sleepWithoutInterrupt(100);
125      }
126    }
127    return alreadyEnabled;
128  }
129
130  boolean getEnabled() {
131    return this.enabled.get();
132  }
133
134  @Override
135  protected void chore() {
136    try {
137      AssignmentManager am = this.services.getAssignmentManager();
138      if (getEnabled() && !this.services.isInMaintenanceMode() &&
139          !this.services.getServerManager().isClusterShutdown() &&
140          isMetaLoaded(am)) {
141        scan();
142      } else {
143        LOG.warn("CatalogJanitor is disabled! Enabled=" + getEnabled() + 
144          ", maintenanceMode=" + this.services.isInMaintenanceMode() + ", am=" + am +
145          ", metaLoaded=" + isMetaLoaded(am) + ", hasRIT=" + isRIT(am) +
146          " clusterShutDown=" + this.services.getServerManager().isClusterShutdown());
147      }
148    } catch (IOException e) {
149      LOG.warn("Failed janitorial scan of hbase:meta table", e);
150    }
151  }
152
153  private static boolean isMetaLoaded(AssignmentManager am) {
154    return am != null && am.isMetaLoaded();
155  }
156
157  private static boolean isRIT(AssignmentManager am) {
158    return isMetaLoaded(am) && am.hasRegionsInTransition();
159  }
160
161  /**
162   * Run janitorial scan of catalog <code>hbase:meta</code> table looking for
163   * garbage to collect.
164   * @return How many items gc'd whether for merge or split.
165   */
166  int scan() throws IOException {
167    int gcs = 0;
168    try {
169      if (!alreadyRunning.compareAndSet(false, true)) {
170        LOG.debug("CatalogJanitor already running");
171        return gcs;
172      }
173      this.lastReport = scanForReport();
174      if (!this.lastReport.isEmpty()) {
175        LOG.warn(this.lastReport.toString());
176      }
177
178      if (isRIT(this.services.getAssignmentManager())) {
179        LOG.warn("Playing-it-safe skipping merge/split gc'ing of regions from hbase:meta while " +
180            "regions-in-transition (RIT)");
181      }
182      Map<RegionInfo, Result> mergedRegions = this.lastReport.mergedRegions;
183      for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
184        if (this.services.isInMaintenanceMode()) {
185          // Stop cleaning if the master is in maintenance mode
186          break;
187        }
188
189        List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells());
190        if (parents != null && cleanMergeRegion(e.getKey(), parents)) {
191          gcs++;
192        }
193      }
194      // Clean split parents
195      Map<RegionInfo, Result> splitParents = this.lastReport.splitParents;
196
197      // Now work on our list of found parents. See if any we can clean up.
198      HashSet<String> parentNotCleaned = new HashSet<>();
199      for (Map.Entry<RegionInfo, Result> e : splitParents.entrySet()) {
200        if (this.services.isInMaintenanceMode()) {
201          // Stop cleaning if the master is in maintenance mode
202          break;
203        }
204
205        if (!parentNotCleaned.contains(e.getKey().getEncodedName()) &&
206            cleanParent(e.getKey(), e.getValue())) {
207          gcs++;
208        } else {
209          // We could not clean the parent, so it's daughters should not be
210          // cleaned either (HBASE-6160)
211          PairOfSameType<RegionInfo> daughters =
212              MetaTableAccessor.getDaughterRegions(e.getValue());
213          parentNotCleaned.add(daughters.getFirst().getEncodedName());
214          parentNotCleaned.add(daughters.getSecond().getEncodedName());
215        }
216      }
217      return gcs;
218    } finally {
219      alreadyRunning.set(false);
220    }
221  }
222
223  /**
224   * Scan hbase:meta.
225   * @return Return generated {@link Report}
226   */
227  Report scanForReport() throws IOException {
228    ReportMakingVisitor visitor = new ReportMakingVisitor(this.services);
229    // Null tablename means scan all of meta.
230    MetaTableAccessor.scanMetaForTableRegions(this.services.getConnection(), visitor, null);
231    return visitor.getReport();
232  }
233
234  /**
235   * @return Returns last published Report that comes of last successful scan
236   *   of hbase:meta.
237   */
238  public Report getLastReport() {
239    return this.lastReport;
240  }
241
242  /**
243   * If merged region no longer holds reference to the merge regions, archive
244   * merge region on hdfs and perform deleting references in hbase:meta
245   * @return true if we delete references in merged region on hbase:meta and archive
246   *   the files on the file system
247   */
248  private boolean cleanMergeRegion(final RegionInfo mergedRegion, List<RegionInfo> parents)
249      throws IOException {
250    FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
251    Path rootdir = this.services.getMasterFileSystem().getRootDir();
252    Path tabledir = CommonFSUtils.getTableDir(rootdir, mergedRegion.getTable());
253    TableDescriptor htd = getDescriptor(mergedRegion.getTable());
254    HRegionFileSystem regionFs = null;
255    try {
256      regionFs = HRegionFileSystem.openRegionFromFileSystem(
257          this.services.getConfiguration(), fs, tabledir, mergedRegion, true);
258    } catch (IOException e) {
259      LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName());
260    }
261    if (regionFs == null || !regionFs.hasReferences(htd)) {
262      LOG.debug("Deleting parents ({}) from fs; merged child {} no longer holds references",
263           parents.stream().map(r -> RegionInfo.getShortNameToLog(r)).
264              collect(Collectors.joining(", ")),
265          mergedRegion);
266      ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor();
267      pe.submitProcedure(new GCMultipleMergedRegionsProcedure(pe.getEnvironment(),
268          mergedRegion,  parents));
269      for (RegionInfo ri:  parents) {
270        // The above scheduled GCMultipleMergedRegionsProcedure does the below.
271        // Do we need this?
272        this.services.getAssignmentManager().getRegionStates().deleteRegion(ri);
273        this.services.getServerManager().removeRegion(ri);
274      }
275      return true;
276    }
277    return false;
278  }
279
280  /**
281   * Compare HRegionInfos in a way that has split parents sort BEFORE their daughters.
282   */
283  static class SplitParentFirstComparator implements Comparator<RegionInfo> {
284    Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();
285    @Override
286    public int compare(RegionInfo left, RegionInfo right) {
287      // This comparator differs from the one RegionInfo in that it sorts
288      // parent before daughters.
289      if (left == null) {
290        return -1;
291      }
292      if (right == null) {
293        return 1;
294      }
295      // Same table name.
296      int result = left.getTable().compareTo(right.getTable());
297      if (result != 0) {
298        return result;
299      }
300      // Compare start keys.
301      result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
302      if (result != 0) {
303        return result;
304      }
305      // Compare end keys, but flip the operands so parent comes first
306      result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey());
307
308      return result;
309    }
310  }
311
312  /**
313   * If daughters no longer hold reference to the parents, delete the parent.
314   * @param parent RegionInfo of split offlined parent
315   * @param rowContent Content of <code>parent</code> row in
316   * <code>metaRegionName</code>
317   * @return True if we removed <code>parent</code> from meta table and from
318   * the filesystem.
319   */
320  boolean cleanParent(final RegionInfo parent, Result rowContent)
321  throws IOException {
322    // Check whether it is a merged region and if it is clean of references.
323    if (MetaTableAccessor.hasMergeRegions(rowContent.rawCells())) {
324      // Wait until clean of merge parent regions first
325      return false;
326    }
327    // Run checks on each daughter split.
328    PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(rowContent);
329    Pair<Boolean, Boolean> a = checkDaughterInFs(parent, daughters.getFirst());
330    Pair<Boolean, Boolean> b = checkDaughterInFs(parent, daughters.getSecond());
331    if (hasNoReferences(a) && hasNoReferences(b)) {
332      String daughterA = daughters.getFirst() != null?
333          daughters.getFirst().getShortNameToLog(): "null";
334      String daughterB = daughters.getSecond() != null?
335          daughters.getSecond().getShortNameToLog(): "null";
336      LOG.debug("Deleting region " + parent.getShortNameToLog() +
337        " because daughters -- " + daughterA + ", " + daughterB +
338        " -- no longer hold references");
339      ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor();
340      pe.submitProcedure(new GCRegionProcedure(pe.getEnvironment(), parent));
341      // Remove from in-memory states
342      this.services.getAssignmentManager().getRegionStates().deleteRegion(parent);
343      this.services.getServerManager().removeRegion(parent);
344      return true;
345    }
346    return false;
347  }
348
349  /**
350   * @param p A pair where the first boolean says whether or not the daughter
351   * region directory exists in the filesystem and then the second boolean says
352   * whether the daughter has references to the parent.
353   * @return True the passed <code>p</code> signifies no references.
354   */
355  private boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
356    return !p.getFirst() || !p.getSecond();
357  }
358
359  /**
360   * Checks if a daughter region -- either splitA or splitB -- still holds
361   * references to parent.
362   * @param parent Parent region
363   * @param daughter Daughter region
364   * @return A pair where the first boolean says whether or not the daughter
365   *   region directory exists in the filesystem and then the second boolean says
366   *   whether the daughter has references to the parent.
367   */
368  private Pair<Boolean, Boolean> checkDaughterInFs(final RegionInfo parent,
369    final RegionInfo daughter)
370  throws IOException {
371    if (daughter == null)  {
372      return new Pair<>(Boolean.FALSE, Boolean.FALSE);
373    }
374
375    FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
376    Path rootdir = this.services.getMasterFileSystem().getRootDir();
377    Path tabledir = CommonFSUtils.getTableDir(rootdir, daughter.getTable());
378
379    Path daughterRegionDir = new Path(tabledir, daughter.getEncodedName());
380
381    HRegionFileSystem regionFs;
382
383    try {
384      if (!CommonFSUtils.isExists(fs, daughterRegionDir)) {
385        return new Pair<>(Boolean.FALSE, Boolean.FALSE);
386      }
387    } catch (IOException ioe) {
388      LOG.error("Error trying to determine if daughter region exists, " +
389               "assuming exists and has references", ioe);
390      return new Pair<>(Boolean.TRUE, Boolean.TRUE);
391    }
392
393    boolean references = false;
394    TableDescriptor parentDescriptor = getDescriptor(parent.getTable());
395    try {
396      regionFs = HRegionFileSystem.openRegionFromFileSystem(
397          this.services.getConfiguration(), fs, tabledir, daughter, true);
398
399      for (ColumnFamilyDescriptor family: parentDescriptor.getColumnFamilies()) {
400        if ((references = regionFs.hasReferences(family.getNameAsString()))) {
401          break;
402        }
403      }
404    } catch (IOException e) {
405      LOG.error("Error trying to determine referenced files from : " + daughter.getEncodedName()
406          + ", to: " + parent.getEncodedName() + " assuming has references", e);
407      return new Pair<>(Boolean.TRUE, Boolean.TRUE);
408    }
409    return new Pair<>(Boolean.TRUE, references);
410  }
411
412  private TableDescriptor getDescriptor(final TableName tableName) throws IOException {
413    return this.services.getTableDescriptors().get(tableName);
414  }
415
416  /**
417   * Checks if the specified region has merge qualifiers, if so, try to clean them.
418   * @return true if no info:merge* columns; i.e. the specified region doesn't have
419   *   any merge qualifiers.
420   */
421  public boolean cleanMergeQualifier(final RegionInfo region) throws IOException {
422    // Get merge regions if it is a merged region and already has merge qualifier
423    List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(this.services.getConnection(),
424        region.getRegionName());
425    if (parents == null || parents.isEmpty()) {
426      // It doesn't have merge qualifier, no need to clean
427      return true;
428    }
429
430    // If a parent region is a merged child region and GC has not kicked in/finish its work yet,
431    // return false in this case to avoid kicking in a merge, trying later.
432    cleanMergeRegion(region, parents);
433    return false;
434  }
435
436  /**
437   * Report made by ReportMakingVisitor
438   */
439  public static class Report {
440    private final long now = EnvironmentEdgeManager.currentTime();
441
442    // Keep Map of found split parents. These are candidates for cleanup.
443    // Use a comparator that has split parents come before its daughters.
444    final Map<RegionInfo, Result> splitParents = new TreeMap<>(new SplitParentFirstComparator());
445    final Map<RegionInfo, Result> mergedRegions = new TreeMap<>(RegionInfo.COMPARATOR);
446    int count = 0;
447
448    private final List<Pair<RegionInfo, RegionInfo>> holes = new ArrayList<>();
449    private final List<Pair<RegionInfo, RegionInfo>> overlaps = new ArrayList<>();
450
451    /**
452     * TODO: If CatalogJanitor finds an 'Unknown Server', it should 'fix' it by queuing
453     * a {@link org.apache.hadoop.hbase.master.procedure.HBCKServerCrashProcedure} for
454     * found server for it to clean up meta.
455     */
456    private final List<Pair<RegionInfo, ServerName>> unknownServers = new ArrayList<>();
457
458    private final List<byte []> emptyRegionInfo = new ArrayList<>();
459
460    @VisibleForTesting
461    Report() {}
462
463    public long getCreateTime() {
464      return this.now;
465    }
466
467    public List<Pair<RegionInfo, RegionInfo>> getHoles() {
468      return this.holes;
469    }
470
471    /**
472     * @return Overlap pairs found as we scanned hbase:meta; ordered by hbase:meta
473     *   table sort. Pairs of overlaps may have overlap with subsequent pairs.
474     * @see MetaFixer#calculateMerges(int, List) where we aggregate overlaps
475     *   for a single 'merge' call.
476     */
477    public List<Pair<RegionInfo, RegionInfo>> getOverlaps() {
478      return this.overlaps;
479    }
480
481    public Map<RegionInfo, Result> getMergedRegions() {
482      return this.mergedRegions;
483    }
484
485    public List<Pair<RegionInfo, ServerName>> getUnknownServers() {
486      return unknownServers;
487    }
488
489    public List<byte[]> getEmptyRegionInfo() {
490      return emptyRegionInfo;
491    }
492
493    /**
494     * @return True if an 'empty' lastReport -- no problems found.
495     */
496    public boolean isEmpty() {
497      return this.holes.isEmpty() && this.overlaps.isEmpty() && this.unknownServers.isEmpty() &&
498          this.emptyRegionInfo.isEmpty();
499    }
500
501    @Override
502    public String toString() {
503      StringBuilder sb = new StringBuilder();
504      for (Pair<RegionInfo, RegionInfo> p: this.holes) {
505        if (sb.length() > 0) {
506          sb.append(", ");
507        }
508        sb.append("hole=").append(p.getFirst().getRegionNameAsString()).append("/").
509            append(p.getSecond().getRegionNameAsString());
510      }
511      for (Pair<RegionInfo, RegionInfo> p: this.overlaps) {
512        if (sb.length() > 0) {
513          sb.append(", ");
514        }
515        sb.append("overlap=").append(p.getFirst().getRegionNameAsString()).append("/").
516            append(p.getSecond().getRegionNameAsString());
517      }
518      for (byte [] r: this.emptyRegionInfo) {
519        if (sb.length() > 0) {
520          sb.append(", ");
521        }
522        sb.append("empty=").append(Bytes.toStringBinary(r));
523      }
524      for (Pair<RegionInfo, ServerName> p: this.unknownServers) {
525        if (sb.length() > 0) {
526          sb.append(", ");
527        }
528        sb.append("unknown_server=").append(p.getSecond()).append("/").
529            append(p.getFirst().getRegionNameAsString());
530      }
531      return sb.toString();
532    }
533  }
534
535  /**
536   * Visitor we use in here in CatalogJanitor to go against hbase:meta table.
537   * Generates a Report made of a collection of split parents and counts of rows
538   * in the hbase:meta table. Also runs hbase:meta consistency checks to
539   * generate more report. Report is NOT ready until after this visitor has been
540   * {@link #close()}'d.
541   */
542  static class ReportMakingVisitor implements ClientMetaTableAccessor.CloseableVisitor {
543    private final MasterServices services;
544    private volatile boolean closed;
545
546    /**
547     * Report is not done until after the close has been called.
548     * @see #close()
549     * @see #getReport()
550     */
551    private Report report = new Report();
552
553    /**
554     * RegionInfo from previous row.
555     */
556    private RegionInfo previous = null;
557
558    /**
559     * Keep account of the highest end key seen as we move through hbase:meta.
560     * Usually, the current RegionInfo has the highest end key but if an overlap,
561     * this may no longer hold. An overlap may be a region with startkey 'd' and
562     * endkey 'g'. The next region in meta may be 'e' to 'f' and then 'f' to 'g'.
563     * Looking at previous and current meta row, we won't know about the 'd' to 'g'
564     * overlap unless we keep a running 'highest-endpoint-seen'.
565     */
566    private RegionInfo highestEndKeyRegionInfo = null;
567
568    ReportMakingVisitor(MasterServices services) {
569      this.services = services;
570    }
571
572    /**
573     * Do not call until after {@link #close()}.
574     * Will throw a {@link RuntimeException} if you do.
575     */
576    Report getReport() {
577      if (!this.closed) {
578        throw new RuntimeException("Report not ready until after close()");
579      }
580      return this.report;
581    }
582
583    @Override
584    public boolean visit(Result r) {
585      if (r == null || r.isEmpty()) {
586        return true;
587      }
588      this.report.count++;
589      RegionInfo regionInfo = null;
590      try {
591        regionInfo = metaTableConsistencyCheck(r);
592      } catch(Throwable t) {
593        LOG.warn("Failed consistency check on {}", Bytes.toStringBinary(r.getRow()), t);
594      }
595      if (regionInfo != null) {
596        LOG.trace(regionInfo.toString());
597        if (regionInfo.isSplitParent()) { // splitParent means split and offline.
598          this.report.splitParents.put(regionInfo, r);
599        }
600        if (MetaTableAccessor.hasMergeRegions(r.rawCells())) {
601          this.report.mergedRegions.put(regionInfo, r);
602        }
603      }
604      // Returning true means "keep scanning"
605      return true;
606    }
607
608    /**
609     * Check row.
610     * @param metaTableRow Row from hbase:meta table.
611     * @return Returns default regioninfo found in row parse as a convenience to save
612     *   on having to do a double-parse of Result.
613     */
614    private RegionInfo metaTableConsistencyCheck(Result metaTableRow) {
615      RegionInfo ri;
616      // Locations comes back null if the RegionInfo field is empty.
617      // If locations is null, ensure the regioninfo is for sure empty before progressing.
618      // If really empty, report as missing regioninfo!  Otherwise, can run server check
619      // and get RegionInfo from locations.
620      RegionLocations locations = CatalogFamilyFormat.getRegionLocations(metaTableRow);
621      if (locations == null) {
622        ri = CatalogFamilyFormat.getRegionInfo(metaTableRow,
623            HConstants.REGIONINFO_QUALIFIER);
624      } else {
625        ri = locations.getDefaultRegionLocation().getRegion();
626        checkServer(locations);
627      }
628
629      if (ri == null) {
630        this.report.emptyRegionInfo.add(metaTableRow.getRow());
631        return ri;
632      }
633
634      if (!Bytes.equals(metaTableRow.getRow(), ri.getRegionName())) {
635        LOG.warn("INCONSISTENCY: Row name is not equal to serialized info:regioninfo content; " +
636                "row={} {}; See if RegionInfo is referenced in another hbase:meta row? Delete?",
637            Bytes.toStringBinary(metaTableRow.getRow()), ri.getRegionNameAsString());
638        return null;
639      }
640      // Skip split parent region
641      if (ri.isSplitParent()) {
642        return ri;
643      }
644      // If table is disabled, skip integrity check.
645      if (!isTableDisabled(ri)) {
646        if (isTableTransition(ri)) {
647          // On table transition, look to see if last region was last in table
648          // and if this is the first. Report 'hole' if neither is true.
649          // HBCK1 used to have a special category for missing start or end keys.
650          // We'll just lump them in as 'holes'.
651          if ((this.previous != null && !this.previous.isLast()) || !ri.isFirst()) {
652            addHole(this.previous == null? RegionInfo.UNDEFINED: this.previous, ri);
653          }
654        } else {
655          if (!this.previous.isNext(ri)) {
656            if (this.previous.isOverlap(ri)) {
657              addOverlap(this.previous, ri);
658            } else if (ri.isOverlap(this.highestEndKeyRegionInfo)) {
659              // We may have seen a region a few rows back that overlaps this one.
660              addOverlap(this.highestEndKeyRegionInfo, ri);
661            } else if (!this.highestEndKeyRegionInfo.isNext(ri)) {
662              // Need to check the case if this.highestEndKeyRegionInfo.isNext(ri). If no,
663              // report a hole, otherwise, it is ok. For an example,
664              // previous: [aa, bb), ri: [cc, dd), highestEndKeyRegionInfo: [a, cc)
665              // In this case, it should not report a hole, as highestEndKeyRegionInfo covers
666              // the hole between previous and ri.
667              addHole(this.previous, ri);
668            }
669          } else if (ri.isOverlap(this.highestEndKeyRegionInfo)) {
670            // We may have seen a region a few rows back that overlaps this one
671            // even though it properly 'follows' the region just before.
672            addOverlap(this.highestEndKeyRegionInfo, ri);
673          }
674        }
675      }
676      this.previous = ri;
677      this.highestEndKeyRegionInfo =
678          MetaFixer.getRegionInfoWithLargestEndKey(this.highestEndKeyRegionInfo, ri);
679      return ri;
680    }
681
682    private void addOverlap(RegionInfo a, RegionInfo b) {
683      this.report.overlaps.add(new Pair<>(a, b));
684    }
685
686    private void addHole(RegionInfo a, RegionInfo b) {
687      this.report.holes.add(new Pair<>(a, b));
688    }
689
690    /**
691     * @return True if table is disabled or disabling; defaults false!
692     */
693    boolean isTableDisabled(RegionInfo ri) {
694      if (ri == null) {
695        return false;
696      }
697      if (this.services == null) {
698        return false;
699      }
700      if (this.services.getTableStateManager() == null) {
701        return false;
702      }
703      TableState state = null;
704      try {
705        state = this.services.getTableStateManager().getTableState(ri.getTable());
706      } catch (IOException e) {
707        LOG.warn("Failed getting table state", e);
708      }
709      return state != null && state.isDisabledOrDisabling();
710    }
711
712    /**
713     * Run through referenced servers and save off unknown and the dead.
714     */
715    private void checkServer(RegionLocations locations) {
716      if (this.services == null) {
717        // Can't do this test if no services.
718        return;
719      }
720      if (locations == null) {
721        return;
722      }
723      if (locations.getRegionLocations() == null) {
724        return;
725      }
726      // Check referenced servers are known/online. Here we are looking
727      // at both the default replica -- the main replica -- and then replica
728      // locations too.
729      for (HRegionLocation location: locations.getRegionLocations()) {
730        if (location == null) {
731          continue;
732        }
733        ServerName sn = location.getServerName();
734        if (sn == null) {
735          continue;
736        }
737        if (location.getRegion() == null) {
738          LOG.warn("Empty RegionInfo in {}", location);
739          // This should never happen but if it does, will mess up below.
740          continue;
741        }
742        RegionInfo ri = location.getRegion();
743        // Skip split parent region
744        if (ri.isSplitParent()) {
745          continue;
746        }
747        // skip the offline regions which belong to disabled table.
748        if (isTableDisabled(ri)) {
749          continue;
750        }
751        RegionState rs = this.services.getAssignmentManager().getRegionStates().getRegionState(ri);
752        if (rs == null || rs.isClosedOrAbnormallyClosed()) {
753          // If closed against an 'Unknown Server', that is should be fine.
754          continue;
755        }
756        ServerManager.ServerLiveState state = this.services.getServerManager().
757            isServerKnownAndOnline(sn);
758        switch (state) {
759          case UNKNOWN:
760            this.report.unknownServers.add(new Pair<>(ri, sn));
761            break;
762
763          default:
764            break;
765        }
766      }
767    }
768
769    /**
770     * @return True iff first row in hbase:meta or if we've broached a new table in hbase:meta
771     */
772    private boolean isTableTransition(RegionInfo ri) {
773      return this.previous == null ||
774          !this.previous.getTable().equals(ri.getTable());
775    }
776
777    @Override
778    public void close() throws IOException {
779      // This is a table transition... after the last region. Check previous.
780      // Should be last region. If not, its a hole on end of laster table.
781      if (this.previous != null && !this.previous.isLast()) {
782        addHole(this.previous, RegionInfo.UNDEFINED);
783      }
784      this.closed = true;
785    }
786  }
787
788  private static void checkLog4jProperties() {
789    String filename = "log4j.properties";
790    try {
791      final InputStream inStream =
792          CatalogJanitor.class.getClassLoader().getResourceAsStream(filename);
793      if (inStream != null) {
794        new Properties().load(inStream);
795      } else {
796        System.out.println("No " + filename + " on classpath; Add one else no logging output!");
797      }
798    } catch (IOException e) {
799      LOG.error("Log4j check failed", e);
800    }
801  }
802
803  /**
804   * For testing against a cluster.
805   * Doesn't have a MasterServices context so does not report on good vs bad servers.
806   */
807  public static void main(String [] args) throws IOException {
808    checkLog4jProperties();
809    ReportMakingVisitor visitor = new ReportMakingVisitor(null);
810    Configuration configuration = HBaseConfiguration.create();
811    configuration.setBoolean("hbase.defaults.for.version.skip", true);
812    try (Connection connection = ConnectionFactory.createConnection(configuration)) {
813      /* Used to generate an overlap.
814      */
815      Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0."));
816      g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
817      try (Table t = connection.getTable(TableName.META_TABLE_NAME)) {
818        Result r = t.get(g);
819        byte [] row = g.getRow();
820        row[row.length - 2] <<= row[row.length - 2];
821        Put p = new Put(g.getRow());
822        p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
823            r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER));
824        t.put(p);
825      }
826      MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null);
827      Report report = visitor.getReport();
828      LOG.info(report != null? report.toString(): "empty");
829    }
830  }
831}