001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.ArrayList;
023import java.util.Comparator;
024import java.util.HashSet;
025import java.util.List;
026import java.util.Map;
027import java.util.Properties;
028import java.util.TreeMap;
029import java.util.concurrent.atomic.AtomicBoolean;
030import java.util.stream.Collectors;
031
032import org.apache.hadoop.conf.Configuration;
033import org.apache.hadoop.fs.FileSystem;
034import org.apache.hadoop.fs.Path;
035import org.apache.hadoop.hbase.HBaseConfiguration;
036import org.apache.hadoop.hbase.HConstants;
037import org.apache.hadoop.hbase.HRegionLocation;
038import org.apache.hadoop.hbase.MetaTableAccessor;
039import org.apache.hadoop.hbase.RegionLocations;
040import org.apache.hadoop.hbase.ScheduledChore;
041import org.apache.hadoop.hbase.ServerName;
042import org.apache.hadoop.hbase.TableName;
043import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
044import org.apache.hadoop.hbase.client.Connection;
045import org.apache.hadoop.hbase.client.ConnectionFactory;
046import org.apache.hadoop.hbase.client.Get;
047import org.apache.hadoop.hbase.client.Put;
048import org.apache.hadoop.hbase.client.RegionInfo;
049import org.apache.hadoop.hbase.client.Result;
050import org.apache.hadoop.hbase.client.Table;
051import org.apache.hadoop.hbase.client.TableDescriptor;
052import org.apache.hadoop.hbase.client.TableState;
053import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
054import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
055import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
057import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
058import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
059import org.apache.hadoop.hbase.util.Bytes;
060import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
061import org.apache.hadoop.hbase.util.FSUtils;
062import org.apache.hadoop.hbase.util.Pair;
063import org.apache.hadoop.hbase.util.PairOfSameType;
064import org.apache.hadoop.hbase.util.Threads;
065import org.apache.yetus.audience.InterfaceAudience;
066import org.slf4j.Logger;
067import org.slf4j.LoggerFactory;
068
069import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
070
071/**
072 * A janitor for the catalog tables. Scans the <code>hbase:meta</code> catalog
073 * table on a period. Makes a lastReport on state of hbase:meta. Looks for unused
074 * regions to garbage collect. Scan of hbase:meta runs if we are NOT in maintenance
075 * mode, if we are NOT shutting down, AND if the assignmentmanager is loaded.
076 * Playing it safe, we will garbage collect no-longer needed region references
077 * only if there are no regions-in-transition (RIT).
078 */
079// TODO: Only works with single hbase:meta region currently.  Fix.
080// TODO: Should it start over every time? Could it continue if runs into problem? Only if
081// problem does not mess up 'results'.
082// TODO: Do more by way of 'repair'; see note on unknownServers below.
083@InterfaceAudience.Private
084public class CatalogJanitor extends ScheduledChore {
085  private static final Logger LOG = LoggerFactory.getLogger(CatalogJanitor.class.getName());
086  private final AtomicBoolean alreadyRunning = new AtomicBoolean(false);
087  private final AtomicBoolean enabled = new AtomicBoolean(true);
088  private final MasterServices services;
089
090  /**
091   * Saved report from last hbase:meta scan to completion. May be stale if having trouble
092   * completing scan. Check its date.
093   */
094  private volatile Report lastReport;
095
096  CatalogJanitor(final MasterServices services) {
097    super("CatalogJanitor-" + services.getServerName().toShortString(), services,
098      services.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000));
099    this.services = services;
100  }
101
102  @Override
103  protected boolean initialChore() {
104    try {
105      if (getEnabled()) {
106        scan();
107      }
108    } catch (IOException e) {
109      LOG.warn("Failed initial janitorial scan of hbase:meta table", e);
110      return false;
111    }
112    return true;
113  }
114
115  boolean setEnabled(final boolean enabled) {
116    boolean alreadyEnabled = this.enabled.getAndSet(enabled);
117    // If disabling is requested on an already enabled chore, we could have an active
118    // scan still going on, callers might not be aware of that and do further action thinkng
119    // that no action would be from this chore.  In this case, the right action is to wait for
120    // the active scan to complete before exiting this function.
121    if (!enabled && alreadyEnabled) {
122      while (alreadyRunning.get()) {
123        Threads.sleepWithoutInterrupt(100);
124      }
125    }
126    return alreadyEnabled;
127  }
128
129  boolean getEnabled() {
130    return this.enabled.get();
131  }
132
133  @Override
134  protected void chore() {
135    try {
136      AssignmentManager am = this.services.getAssignmentManager();
137      if (getEnabled() && !this.services.isInMaintenanceMode() &&
138          !this.services.getServerManager().isClusterShutdown() &&
139          isMetaLoaded(am)) {
140        scan();
141      } else {
142        LOG.warn("CatalogJanitor is disabled! Enabled=" + getEnabled() + 
143          ", maintenanceMode=" + this.services.isInMaintenanceMode() + ", am=" + am +
144          ", metaLoaded=" + isMetaLoaded(am) + ", hasRIT=" + isRIT(am) +
145          " clusterShutDown=" + this.services.getServerManager().isClusterShutdown());
146      }
147    } catch (IOException e) {
148      LOG.warn("Failed janitorial scan of hbase:meta table", e);
149    }
150  }
151
152  private static boolean isMetaLoaded(AssignmentManager am) {
153    return am != null && am.isMetaLoaded();
154  }
155
156  private static boolean isRIT(AssignmentManager am) {
157    return isMetaLoaded(am) && am.hasRegionsInTransition();
158  }
159
160  /**
161   * Run janitorial scan of catalog <code>hbase:meta</code> table looking for
162   * garbage to collect.
163   * @return How many items gc'd whether for merge or split.
164   */
165  int scan() throws IOException {
166    int gcs = 0;
167    try {
168      if (!alreadyRunning.compareAndSet(false, true)) {
169        LOG.debug("CatalogJanitor already running");
170        return gcs;
171      }
172      this.lastReport = scanForReport();
173      if (!this.lastReport.isEmpty()) {
174        LOG.warn(this.lastReport.toString());
175      }
176
177      if (isRIT(this.services.getAssignmentManager())) {
178        LOG.warn("Playing-it-safe skipping merge/split gc'ing of regions from hbase:meta while " +
179            "regions-in-transition (RIT)");
180      }
181      Map<RegionInfo, Result> mergedRegions = this.lastReport.mergedRegions;
182      for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
183        if (this.services.isInMaintenanceMode()) {
184          // Stop cleaning if the master is in maintenance mode
185          break;
186        }
187
188        List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells());
189        if (parents != null && cleanMergeRegion(e.getKey(), parents)) {
190          gcs++;
191        }
192      }
193      // Clean split parents
194      Map<RegionInfo, Result> splitParents = this.lastReport.splitParents;
195
196      // Now work on our list of found parents. See if any we can clean up.
197      HashSet<String> parentNotCleaned = new HashSet<>();
198      for (Map.Entry<RegionInfo, Result> e : splitParents.entrySet()) {
199        if (this.services.isInMaintenanceMode()) {
200          // Stop cleaning if the master is in maintenance mode
201          break;
202        }
203
204        if (!parentNotCleaned.contains(e.getKey().getEncodedName()) &&
205            cleanParent(e.getKey(), e.getValue())) {
206          gcs++;
207        } else {
208          // We could not clean the parent, so it's daughters should not be
209          // cleaned either (HBASE-6160)
210          PairOfSameType<RegionInfo> daughters =
211              MetaTableAccessor.getDaughterRegions(e.getValue());
212          parentNotCleaned.add(daughters.getFirst().getEncodedName());
213          parentNotCleaned.add(daughters.getSecond().getEncodedName());
214        }
215      }
216      return gcs;
217    } finally {
218      alreadyRunning.set(false);
219    }
220  }
221
222  /**
223   * Scan hbase:meta.
224   * @return Return generated {@link Report}
225   */
226  Report scanForReport() throws IOException {
227    ReportMakingVisitor visitor = new ReportMakingVisitor(this.services);
228    // Null tablename means scan all of meta.
229    MetaTableAccessor.scanMetaForTableRegions(this.services.getConnection(), visitor, null);
230    return visitor.getReport();
231  }
232
233  /**
234   * @return Returns last published Report that comes of last successful scan
235   *   of hbase:meta.
236   */
237  public Report getLastReport() {
238    return this.lastReport;
239  }
240
241  /**
242   * If merged region no longer holds reference to the merge regions, archive
243   * merge region on hdfs and perform deleting references in hbase:meta
244   * @return true if we delete references in merged region on hbase:meta and archive
245   *   the files on the file system
246   */
247  private boolean cleanMergeRegion(final RegionInfo mergedRegion, List<RegionInfo> parents)
248      throws IOException {
249    FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
250    Path rootdir = this.services.getMasterFileSystem().getRootDir();
251    Path tabledir = FSUtils.getTableDir(rootdir, mergedRegion.getTable());
252    TableDescriptor htd = getDescriptor(mergedRegion.getTable());
253    HRegionFileSystem regionFs = null;
254    try {
255      regionFs = HRegionFileSystem.openRegionFromFileSystem(
256          this.services.getConfiguration(), fs, tabledir, mergedRegion, true);
257    } catch (IOException e) {
258      LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName());
259    }
260    if (regionFs == null || !regionFs.hasReferences(htd)) {
261      LOG.debug("Deleting parents ({}) from fs; merged child {} no longer holds references",
262           parents.stream().map(r -> RegionInfo.getShortNameToLog(r)).
263              collect(Collectors.joining(", ")),
264          mergedRegion);
265      ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor();
266      pe.submitProcedure(new GCMultipleMergedRegionsProcedure(pe.getEnvironment(),
267          mergedRegion,  parents));
268      for (RegionInfo ri:  parents) {
269        // The above scheduled GCMultipleMergedRegionsProcedure does the below.
270        // Do we need this?
271        this.services.getAssignmentManager().getRegionStates().deleteRegion(ri);
272        this.services.getServerManager().removeRegion(ri);
273      }
274      return true;
275    }
276    return false;
277  }
278
279  /**
280   * Compare HRegionInfos in a way that has split parents sort BEFORE their daughters.
281   */
282  static class SplitParentFirstComparator implements Comparator<RegionInfo> {
283    Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();
284    @Override
285    public int compare(RegionInfo left, RegionInfo right) {
286      // This comparator differs from the one RegionInfo in that it sorts
287      // parent before daughters.
288      if (left == null) {
289        return -1;
290      }
291      if (right == null) {
292        return 1;
293      }
294      // Same table name.
295      int result = left.getTable().compareTo(right.getTable());
296      if (result != 0) {
297        return result;
298      }
299      // Compare start keys.
300      result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
301      if (result != 0) {
302        return result;
303      }
304      // Compare end keys, but flip the operands so parent comes first
305      result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey());
306
307      return result;
308    }
309  }
310
311  /**
312   * If daughters no longer hold reference to the parents, delete the parent.
313   * @param parent RegionInfo of split offlined parent
314   * @param rowContent Content of <code>parent</code> row in
315   * <code>metaRegionName</code>
316   * @return True if we removed <code>parent</code> from meta table and from
317   * the filesystem.
318   */
319  boolean cleanParent(final RegionInfo parent, Result rowContent)
320  throws IOException {
321    // Check whether it is a merged region and if it is clean of references.
322    if (MetaTableAccessor.hasMergeRegions(rowContent.rawCells())) {
323      // Wait until clean of merge parent regions first
324      return false;
325    }
326    // Run checks on each daughter split.
327    PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(rowContent);
328    Pair<Boolean, Boolean> a = checkDaughterInFs(parent, daughters.getFirst());
329    Pair<Boolean, Boolean> b = checkDaughterInFs(parent, daughters.getSecond());
330    if (hasNoReferences(a) && hasNoReferences(b)) {
331      String daughterA = daughters.getFirst() != null?
332          daughters.getFirst().getShortNameToLog(): "null";
333      String daughterB = daughters.getSecond() != null?
334          daughters.getSecond().getShortNameToLog(): "null";
335      LOG.debug("Deleting region " + parent.getShortNameToLog() +
336        " because daughters -- " + daughterA + ", " + daughterB +
337        " -- no longer hold references");
338      ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor();
339      pe.submitProcedure(new GCRegionProcedure(pe.getEnvironment(), parent));
340      // Remove from in-memory states
341      this.services.getAssignmentManager().getRegionStates().deleteRegion(parent);
342      this.services.getServerManager().removeRegion(parent);
343      return true;
344    }
345    return false;
346  }
347
348  /**
349   * @param p A pair where the first boolean says whether or not the daughter
350   * region directory exists in the filesystem and then the second boolean says
351   * whether the daughter has references to the parent.
352   * @return True the passed <code>p</code> signifies no references.
353   */
354  private boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
355    return !p.getFirst() || !p.getSecond();
356  }
357
358  /**
359   * Checks if a daughter region -- either splitA or splitB -- still holds
360   * references to parent.
361   * @param parent Parent region
362   * @param daughter Daughter region
363   * @return A pair where the first boolean says whether or not the daughter
364   *   region directory exists in the filesystem and then the second boolean says
365   *   whether the daughter has references to the parent.
366   */
367  private Pair<Boolean, Boolean> checkDaughterInFs(final RegionInfo parent,
368    final RegionInfo daughter)
369  throws IOException {
370    if (daughter == null)  {
371      return new Pair<>(Boolean.FALSE, Boolean.FALSE);
372    }
373
374    FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
375    Path rootdir = this.services.getMasterFileSystem().getRootDir();
376    Path tabledir = FSUtils.getTableDir(rootdir, daughter.getTable());
377
378    Path daughterRegionDir = new Path(tabledir, daughter.getEncodedName());
379
380    HRegionFileSystem regionFs;
381
382    try {
383      if (!FSUtils.isExists(fs, daughterRegionDir)) {
384        return new Pair<>(Boolean.FALSE, Boolean.FALSE);
385      }
386    } catch (IOException ioe) {
387      LOG.error("Error trying to determine if daughter region exists, " +
388               "assuming exists and has references", ioe);
389      return new Pair<>(Boolean.TRUE, Boolean.TRUE);
390    }
391
392    boolean references = false;
393    TableDescriptor parentDescriptor = getDescriptor(parent.getTable());
394    try {
395      regionFs = HRegionFileSystem.openRegionFromFileSystem(
396          this.services.getConfiguration(), fs, tabledir, daughter, true);
397
398      for (ColumnFamilyDescriptor family: parentDescriptor.getColumnFamilies()) {
399        if ((references = regionFs.hasReferences(family.getNameAsString()))) {
400          break;
401        }
402      }
403    } catch (IOException e) {
404      LOG.error("Error trying to determine referenced files from : " + daughter.getEncodedName()
405          + ", to: " + parent.getEncodedName() + " assuming has references", e);
406      return new Pair<>(Boolean.TRUE, Boolean.TRUE);
407    }
408    return new Pair<>(Boolean.TRUE, references);
409  }
410
411  private TableDescriptor getDescriptor(final TableName tableName) throws IOException {
412    return this.services.getTableDescriptors().get(tableName);
413  }
414
415  /**
416   * Checks if the specified region has merge qualifiers, if so, try to clean them.
417   * @return true if no info:merge* columns; i.e. the specified region doesn't have
418   *   any merge qualifiers.
419   */
420  public boolean cleanMergeQualifier(final RegionInfo region) throws IOException {
421    // Get merge regions if it is a merged region and already has merge qualifier
422    List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(this.services.getConnection(),
423        region.getRegionName());
424    if (parents == null || parents.isEmpty()) {
425      // It doesn't have merge qualifier, no need to clean
426      return true;
427    }
428    return cleanMergeRegion(region, parents);
429  }
430
431  /**
432   * Report made by ReportMakingVisitor
433   */
434  public static class Report {
435    private final long now = EnvironmentEdgeManager.currentTime();
436
437    // Keep Map of found split parents. These are candidates for cleanup.
438    // Use a comparator that has split parents come before its daughters.
439    final Map<RegionInfo, Result> splitParents = new TreeMap<>(new SplitParentFirstComparator());
440    final Map<RegionInfo, Result> mergedRegions = new TreeMap<>(RegionInfo.COMPARATOR);
441    int count = 0;
442
443    private final List<Pair<RegionInfo, RegionInfo>> holes = new ArrayList<>();
444    private final List<Pair<RegionInfo, RegionInfo>> overlaps = new ArrayList<>();
445
446    /**
447     * TODO: If CatalogJanitor finds an 'Unknown Server', it should 'fix' it by queuing
448     * a {@link org.apache.hadoop.hbase.master.procedure.HBCKServerCrashProcedure} for
449     * found server for it to clean up meta.
450     */
451    private final List<Pair<RegionInfo, ServerName>> unknownServers = new ArrayList<>();
452
453    private final List<byte []> emptyRegionInfo = new ArrayList<>();
454
455    @VisibleForTesting
456    Report() {}
457
458    public long getCreateTime() {
459      return this.now;
460    }
461
462    public List<Pair<RegionInfo, RegionInfo>> getHoles() {
463      return this.holes;
464    }
465
466    /**
467     * @return Overlap pairs found as we scanned hbase:meta; ordered by hbase:meta
468     *   table sort. Pairs of overlaps may have overlap with subsequent pairs.
469     * @see MetaFixer#calculateMerges(int, List) where we aggregate overlaps
470     *   for a single 'merge' call.
471     */
472    public List<Pair<RegionInfo, RegionInfo>> getOverlaps() {
473      return this.overlaps;
474    }
475
476    public List<Pair<RegionInfo, ServerName>> getUnknownServers() {
477      return unknownServers;
478    }
479
480    public List<byte[]> getEmptyRegionInfo() {
481      return emptyRegionInfo;
482    }
483
484    /**
485     * @return True if an 'empty' lastReport -- no problems found.
486     */
487    public boolean isEmpty() {
488      return this.holes.isEmpty() && this.overlaps.isEmpty() && this.unknownServers.isEmpty() &&
489          this.emptyRegionInfo.isEmpty();
490    }
491
492    @Override
493    public String toString() {
494      StringBuilder sb = new StringBuilder();
495      for (Pair<RegionInfo, RegionInfo> p: this.holes) {
496        if (sb.length() > 0) {
497          sb.append(", ");
498        }
499        sb.append("hole=").append(p.getFirst().getRegionNameAsString()).append("/").
500            append(p.getSecond().getRegionNameAsString());
501      }
502      for (Pair<RegionInfo, RegionInfo> p: this.overlaps) {
503        if (sb.length() > 0) {
504          sb.append(", ");
505        }
506        sb.append("overlap=").append(p.getFirst().getRegionNameAsString()).append("/").
507            append(p.getSecond().getRegionNameAsString());
508      }
509      for (byte [] r: this.emptyRegionInfo) {
510        if (sb.length() > 0) {
511          sb.append(", ");
512        }
513        sb.append("empty=").append(Bytes.toStringBinary(r));
514      }
515      for (Pair<RegionInfo, ServerName> p: this.unknownServers) {
516        if (sb.length() > 0) {
517          sb.append(", ");
518        }
519        sb.append("unknown_server=").append(p.getSecond()).append("/").
520            append(p.getFirst().getRegionNameAsString());
521      }
522      return sb.toString();
523    }
524  }
525
526  /**
527   * Visitor we use in here in CatalogJanitor to go against hbase:meta table.
528   * Generates a Report made of a collection of split parents and counts of rows
529   * in the hbase:meta table. Also runs hbase:meta consistency checks to
530   * generate more report. Report is NOT ready until after this visitor has been
531   * {@link #close()}'d.
532   */
533  static class ReportMakingVisitor implements MetaTableAccessor.CloseableVisitor {
534    private final MasterServices services;
535    private volatile boolean closed;
536
537    /**
538     * Report is not done until after the close has been called.
539     * @see #close()
540     * @see #getReport()
541     */
542    private Report report = new Report();
543
544    /**
545     * RegionInfo from previous row.
546     */
547    private RegionInfo previous = null;
548
549    /**
550     * Keep account of the highest end key seen as we move through hbase:meta.
551     * Usually, the current RegionInfo has the highest end key but if an overlap,
552     * this may no longer hold. An overlap may be a region with startkey 'd' and
553     * endkey 'g'. The next region in meta may be 'e' to 'f' and then 'f' to 'g'.
554     * Looking at previous and current meta row, we won't know about the 'd' to 'g'
555     * overlap unless we keep a running 'highest-endpoint-seen'.
556     */
557    private RegionInfo highestEndKeyRegionInfo = null;
558
559    ReportMakingVisitor(MasterServices services) {
560      this.services = services;
561    }
562
563    /**
564     * Do not call until after {@link #close()}.
565     * Will throw a {@link RuntimeException} if you do.
566     */
567    Report getReport() {
568      if (!this.closed) {
569        throw new RuntimeException("Report not ready until after close()");
570      }
571      return this.report;
572    }
573
574    @Override
575    public boolean visit(Result r) {
576      if (r == null || r.isEmpty()) {
577        return true;
578      }
579      this.report.count++;
580      RegionInfo regionInfo = null;
581      try {
582        regionInfo = metaTableConsistencyCheck(r);
583      } catch(Throwable t) {
584        LOG.warn("Failed consistency check on {}", Bytes.toStringBinary(r.getRow()), t);
585      }
586      if (regionInfo != null) {
587        LOG.trace(regionInfo.toString());
588        if (regionInfo.isSplitParent()) { // splitParent means split and offline.
589          this.report.splitParents.put(regionInfo, r);
590        }
591        if (MetaTableAccessor.hasMergeRegions(r.rawCells())) {
592          this.report.mergedRegions.put(regionInfo, r);
593        }
594      }
595      // Returning true means "keep scanning"
596      return true;
597    }
598
599    /**
600     * Check row.
601     * @param metaTableRow Row from hbase:meta table.
602     * @return Returns default regioninfo found in row parse as a convenience to save
603     *   on having to do a double-parse of Result.
604     */
605    private RegionInfo metaTableConsistencyCheck(Result metaTableRow) {
606      RegionInfo ri;
607      // Locations comes back null if the RegionInfo field is empty.
608      // If locations is null, ensure the regioninfo is for sure empty before progressing.
609      // If really empty, report as missing regioninfo!  Otherwise, can run server check
610      // and get RegionInfo from locations.
611      RegionLocations locations = MetaTableAccessor.getRegionLocations(metaTableRow);
612      if (locations == null) {
613        ri = MetaTableAccessor.getRegionInfo(metaTableRow,
614            MetaTableAccessor.getRegionInfoColumn());
615      } else {
616        ri = locations.getDefaultRegionLocation().getRegion();
617        checkServer(locations);
618      }
619
620      if (ri == null) {
621        this.report.emptyRegionInfo.add(metaTableRow.getRow());
622        return ri;
623      }
624
625      if (!Bytes.equals(metaTableRow.getRow(), ri.getRegionName())) {
626        LOG.warn("INCONSISTENCY: Row name is not equal to serialized info:regioninfo content; " +
627                "row={} {}; See if RegionInfo is referenced in another hbase:meta row? Delete?",
628            Bytes.toStringBinary(metaTableRow.getRow()), ri.getRegionNameAsString());
629        return null;
630      }
631      // Skip split parent region
632      if (ri.isSplitParent()) {
633        return ri;
634      }
635      // If table is disabled, skip integrity check.
636      if (!isTableDisabled(ri)) {
637        if (isTableTransition(ri)) {
638          // On table transition, look to see if last region was last in table
639          // and if this is the first. Report 'hole' if neither is true.
640          // HBCK1 used to have a special category for missing start or end keys.
641          // We'll just lump them in as 'holes'.
642          if ((this.previous != null && !this.previous.isLast()) || !ri.isFirst()) {
643            addHole(this.previous == null? RegionInfo.UNDEFINED: this.previous, ri);
644          }
645        } else {
646          if (!this.previous.isNext(ri)) {
647            if (this.previous.isOverlap(ri)) {
648              addOverlap(this.previous, ri);
649            } else if (ri.isOverlap(this.highestEndKeyRegionInfo)) {
650              // We may have seen a region a few rows back that overlaps this one.
651              addOverlap(this.highestEndKeyRegionInfo, ri);
652            } else {
653              addHole(this.previous, ri);
654            }
655          } else if (ri.isOverlap(this.highestEndKeyRegionInfo)) {
656            // We may have seen a region a few rows back that overlaps this one
657            // even though it properly 'follows' the region just before.
658            addOverlap(this.highestEndKeyRegionInfo, ri);
659          }
660        }
661      }
662      this.previous = ri;
663      this.highestEndKeyRegionInfo =
664          MetaFixer.getRegionInfoWithLargestEndKey(this.highestEndKeyRegionInfo, ri);
665      return ri;
666    }
667
668    private void addOverlap(RegionInfo a, RegionInfo b) {
669      this.report.overlaps.add(new Pair<>(a, b));
670    }
671
672    private void addHole(RegionInfo a, RegionInfo b) {
673      this.report.holes.add(new Pair<>(a, b));
674    }
675
676    /**
677     * @return True if table is disabled or disabling; defaults false!
678     */
679    boolean isTableDisabled(RegionInfo ri) {
680      if (ri == null) {
681        return false;
682      }
683      if (this.services == null) {
684        return false;
685      }
686      if (this.services.getTableStateManager() == null) {
687        return false;
688      }
689      TableState state = null;
690      try {
691        state = this.services.getTableStateManager().getTableState(ri.getTable());
692      } catch (IOException e) {
693        LOG.warn("Failed getting table state", e);
694      }
695      return state != null && state.isDisabledOrDisabling();
696    }
697
698    /**
699     * Run through referenced servers and save off unknown and the dead.
700     */
701    private void checkServer(RegionLocations locations) {
702      if (this.services == null) {
703        // Can't do this test if no services.
704        return;
705      }
706      if (locations == null) {
707        return;
708      }
709      if (locations.getRegionLocations() == null) {
710        return;
711      }
712      // Check referenced servers are known/online. Here we are looking
713      // at both the default replica -- the main replica -- and then replica
714      // locations too.
715      for (HRegionLocation location: locations.getRegionLocations()) {
716        if (location == null) {
717          continue;
718        }
719        ServerName sn = location.getServerName();
720        if (sn == null) {
721          continue;
722        }
723        if (location.getRegion() == null) {
724          LOG.warn("Empty RegionInfo in {}", location);
725          // This should never happen but if it does, will mess up below.
726          continue;
727        }
728        RegionInfo ri = location.getRegion();
729        // Skip split parent region
730        if (ri.isSplitParent()) {
731          continue;
732        }
733        // skip the offline regions which belong to disabled table.
734        if (isTableDisabled(ri)) {
735          continue;
736        }
737        RegionState rs = this.services.getAssignmentManager().getRegionStates().getRegionState(ri);
738        if (rs.isClosedOrAbnormallyClosed()) {
739          // If closed against an 'Unknown Server', that is should be fine.
740          continue;
741        }
742        ServerManager.ServerLiveState state = this.services.getServerManager().
743            isServerKnownAndOnline(sn);
744        switch (state) {
745          case UNKNOWN:
746            this.report.unknownServers.add(new Pair<>(ri, sn));
747            break;
748
749          default:
750            break;
751        }
752      }
753    }
754
755    /**
756     * @return True iff first row in hbase:meta or if we've broached a new table in hbase:meta
757     */
758    private boolean isTableTransition(RegionInfo ri) {
759      return this.previous == null ||
760          !this.previous.getTable().equals(ri.getTable());
761    }
762
763    @Override
764    public void close() throws IOException {
765      // This is a table transition... after the last region. Check previous.
766      // Should be last region. If not, its a hole on end of laster table.
767      if (this.previous != null && !this.previous.isLast()) {
768        addHole(this.previous, RegionInfo.UNDEFINED);
769      }
770      this.closed = true;
771    }
772  }
773
774  private static void checkLog4jProperties() {
775    String filename = "log4j.properties";
776    try {
777      final InputStream inStream =
778          CatalogJanitor.class.getClassLoader().getResourceAsStream(filename);
779      if (inStream != null) {
780        new Properties().load(inStream);
781      } else {
782        System.out.println("No " + filename + " on classpath; Add one else no logging output!");
783      }
784    } catch (IOException e) {
785      LOG.error("Log4j check failed", e);
786    }
787  }
788
789  /**
790   * For testing against a cluster.
791   * Doesn't have a MasterServices context so does not report on good vs bad servers.
792   */
793  public static void main(String [] args) throws IOException {
794    checkLog4jProperties();
795    ReportMakingVisitor visitor = new ReportMakingVisitor(null);
796    Configuration configuration = HBaseConfiguration.create();
797    configuration.setBoolean("hbase.defaults.for.version.skip", true);
798    try (Connection connection = ConnectionFactory.createConnection(configuration)) {
799      /* Used to generate an overlap.
800      */
801      Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0."));
802      g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
803      try (Table t = connection.getTable(TableName.META_TABLE_NAME)) {
804        Result r = t.get(g);
805        byte [] row = g.getRow();
806        row[row.length - 2] <<= row[row.length - 2];
807        Put p = new Put(g.getRow());
808        p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
809            r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER));
810        t.put(p);
811      }
812      MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null);
813      Report report = visitor.getReport();
814      LOG.info(report != null? report.toString(): "empty");
815    }
816  }
817}