001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.ArrayList;
023import java.util.Comparator;
024import java.util.HashSet;
025import java.util.List;
026import java.util.Map;
027import java.util.Properties;
028import java.util.TreeMap;
029import java.util.concurrent.atomic.AtomicBoolean;
030import java.util.stream.Collectors;
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.fs.FileSystem;
033import org.apache.hadoop.fs.Path;
034import org.apache.hadoop.hbase.HBaseConfiguration;
035import org.apache.hadoop.hbase.HConstants;
036import org.apache.hadoop.hbase.HRegionLocation;
037import org.apache.hadoop.hbase.MetaTableAccessor;
038import org.apache.hadoop.hbase.RegionLocations;
039import org.apache.hadoop.hbase.ScheduledChore;
040import org.apache.hadoop.hbase.ServerName;
041import org.apache.hadoop.hbase.TableName;
042import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
043import org.apache.hadoop.hbase.client.Connection;
044import org.apache.hadoop.hbase.client.ConnectionFactory;
045import org.apache.hadoop.hbase.client.Get;
046import org.apache.hadoop.hbase.client.Put;
047import org.apache.hadoop.hbase.client.RegionInfo;
048import org.apache.hadoop.hbase.client.Result;
049import org.apache.hadoop.hbase.client.Table;
050import org.apache.hadoop.hbase.client.TableDescriptor;
051import org.apache.hadoop.hbase.client.TableState;
052import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
053import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
054import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
055import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
056import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
057import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
058import org.apache.hadoop.hbase.util.Bytes;
059import org.apache.hadoop.hbase.util.CommonFSUtils;
060import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
061import org.apache.hadoop.hbase.util.Pair;
062import org.apache.hadoop.hbase.util.PairOfSameType;
063import org.apache.hadoop.hbase.util.Threads;
064import org.apache.yetus.audience.InterfaceAudience;
065import org.slf4j.Logger;
066import org.slf4j.LoggerFactory;
067
068import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
069
070/**
071 * A janitor for the catalog tables. Scans the <code>hbase:meta</code> catalog
072 * table on a period. Makes a lastReport on state of hbase:meta. Looks for unused
073 * regions to garbage collect. Scan of hbase:meta runs if we are NOT in maintenance
074 * mode, if we are NOT shutting down, AND if the assignmentmanager is loaded.
075 * Playing it safe, we will garbage collect no-longer needed region references
076 * only if there are no regions-in-transition (RIT).
077 */
078// TODO: Only works with single hbase:meta region currently.  Fix.
079// TODO: Should it start over every time? Could it continue if runs into problem? Only if
080// problem does not mess up 'results'.
081// TODO: Do more by way of 'repair'; see note on unknownServers below.
082@InterfaceAudience.Private
083public class CatalogJanitor extends ScheduledChore {
084  private static final Logger LOG = LoggerFactory.getLogger(CatalogJanitor.class.getName());
085  private final AtomicBoolean alreadyRunning = new AtomicBoolean(false);
086  private final AtomicBoolean enabled = new AtomicBoolean(true);
087  private final MasterServices services;
088
089  /**
090   * Saved report from last hbase:meta scan to completion. May be stale if having trouble
091   * completing scan. Check its date.
092   */
093  private volatile Report lastReport;
094
095  CatalogJanitor(final MasterServices services) {
096    super("CatalogJanitor-" + services.getServerName().toShortString(), services,
097      services.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000));
098    this.services = services;
099  }
100
101  @Override
102  protected boolean initialChore() {
103    try {
104      if (getEnabled()) {
105        scan();
106      }
107    } catch (IOException e) {
108      LOG.warn("Failed initial janitorial scan of hbase:meta table", e);
109      return false;
110    }
111    return true;
112  }
113
114  boolean setEnabled(final boolean enabled) {
115    boolean alreadyEnabled = this.enabled.getAndSet(enabled);
116    // If disabling is requested on an already enabled chore, we could have an active
117    // scan still going on, callers might not be aware of that and do further action thinkng
118    // that no action would be from this chore.  In this case, the right action is to wait for
119    // the active scan to complete before exiting this function.
120    if (!enabled && alreadyEnabled) {
121      while (alreadyRunning.get()) {
122        Threads.sleepWithoutInterrupt(100);
123      }
124    }
125    return alreadyEnabled;
126  }
127
128  boolean getEnabled() {
129    return this.enabled.get();
130  }
131
132  @Override
133  protected void chore() {
134    try {
135      AssignmentManager am = this.services.getAssignmentManager();
136      if (getEnabled() && !this.services.isInMaintenanceMode() &&
137          !this.services.getServerManager().isClusterShutdown() &&
138          isMetaLoaded(am)) {
139        scan();
140      } else {
141        LOG.warn("CatalogJanitor is disabled! Enabled=" + getEnabled() + 
142          ", maintenanceMode=" + this.services.isInMaintenanceMode() + ", am=" + am +
143          ", metaLoaded=" + isMetaLoaded(am) + ", hasRIT=" + isRIT(am) +
144          " clusterShutDown=" + this.services.getServerManager().isClusterShutdown());
145      }
146    } catch (IOException e) {
147      LOG.warn("Failed janitorial scan of hbase:meta table", e);
148    }
149  }
150
151  private static boolean isMetaLoaded(AssignmentManager am) {
152    return am != null && am.isMetaLoaded();
153  }
154
155  private static boolean isRIT(AssignmentManager am) {
156    return isMetaLoaded(am) && am.hasRegionsInTransition();
157  }
158
159  /**
160   * Run janitorial scan of catalog <code>hbase:meta</code> table looking for
161   * garbage to collect.
162   * @return How many items gc'd whether for merge or split.
163   */
164  int scan() throws IOException {
165    int gcs = 0;
166    try {
167      if (!alreadyRunning.compareAndSet(false, true)) {
168        LOG.debug("CatalogJanitor already running");
169        return gcs;
170      }
171      this.lastReport = scanForReport();
172      if (!this.lastReport.isEmpty()) {
173        LOG.warn(this.lastReport.toString());
174      }
175
176      if (isRIT(this.services.getAssignmentManager())) {
177        LOG.warn("Playing-it-safe skipping merge/split gc'ing of regions from hbase:meta while " +
178            "regions-in-transition (RIT)");
179      }
180      Map<RegionInfo, Result> mergedRegions = this.lastReport.mergedRegions;
181      for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
182        if (this.services.isInMaintenanceMode()) {
183          // Stop cleaning if the master is in maintenance mode
184          break;
185        }
186
187        List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells());
188        if (parents != null && cleanMergeRegion(e.getKey(), parents)) {
189          gcs++;
190        }
191      }
192      // Clean split parents
193      Map<RegionInfo, Result> splitParents = this.lastReport.splitParents;
194
195      // Now work on our list of found parents. See if any we can clean up.
196      HashSet<String> parentNotCleaned = new HashSet<>();
197      for (Map.Entry<RegionInfo, Result> e : splitParents.entrySet()) {
198        if (this.services.isInMaintenanceMode()) {
199          // Stop cleaning if the master is in maintenance mode
200          break;
201        }
202
203        if (!parentNotCleaned.contains(e.getKey().getEncodedName()) &&
204            cleanParent(e.getKey(), e.getValue())) {
205          gcs++;
206        } else {
207          // We could not clean the parent, so it's daughters should not be
208          // cleaned either (HBASE-6160)
209          PairOfSameType<RegionInfo> daughters =
210              MetaTableAccessor.getDaughterRegions(e.getValue());
211          parentNotCleaned.add(daughters.getFirst().getEncodedName());
212          parentNotCleaned.add(daughters.getSecond().getEncodedName());
213        }
214      }
215      return gcs;
216    } finally {
217      alreadyRunning.set(false);
218    }
219  }
220
221  /**
222   * Scan hbase:meta.
223   * @return Return generated {@link Report}
224   */
225  Report scanForReport() throws IOException {
226    ReportMakingVisitor visitor = new ReportMakingVisitor(this.services);
227    // Null tablename means scan all of meta.
228    MetaTableAccessor.scanMetaForTableRegions(this.services.getConnection(), visitor, null);
229    return visitor.getReport();
230  }
231
232  /**
233   * @return Returns last published Report that comes of last successful scan
234   *   of hbase:meta.
235   */
236  public Report getLastReport() {
237    return this.lastReport;
238  }
239
240  /**
241   * If merged region no longer holds reference to the merge regions, archive
242   * merge region on hdfs and perform deleting references in hbase:meta
243   * @return true if we delete references in merged region on hbase:meta and archive
244   *   the files on the file system
245   */
246  private boolean cleanMergeRegion(final RegionInfo mergedRegion, List<RegionInfo> parents)
247      throws IOException {
248    FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
249    Path rootdir = this.services.getMasterFileSystem().getRootDir();
250    Path tabledir = CommonFSUtils.getTableDir(rootdir, mergedRegion.getTable());
251    TableDescriptor htd = getDescriptor(mergedRegion.getTable());
252    HRegionFileSystem regionFs = null;
253    try {
254      regionFs = HRegionFileSystem.openRegionFromFileSystem(
255          this.services.getConfiguration(), fs, tabledir, mergedRegion, true);
256    } catch (IOException e) {
257      LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName());
258    }
259    if (regionFs == null || !regionFs.hasReferences(htd)) {
260      LOG.debug("Deleting parents ({}) from fs; merged child {} no longer holds references",
261           parents.stream().map(r -> RegionInfo.getShortNameToLog(r)).
262              collect(Collectors.joining(", ")),
263          mergedRegion);
264      ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor();
265      pe.submitProcedure(new GCMultipleMergedRegionsProcedure(pe.getEnvironment(),
266          mergedRegion,  parents));
267      for (RegionInfo ri:  parents) {
268        // The above scheduled GCMultipleMergedRegionsProcedure does the below.
269        // Do we need this?
270        this.services.getAssignmentManager().getRegionStates().deleteRegion(ri);
271        this.services.getServerManager().removeRegion(ri);
272      }
273      return true;
274    }
275    return false;
276  }
277
278  /**
279   * Compare HRegionInfos in a way that has split parents sort BEFORE their daughters.
280   */
281  static class SplitParentFirstComparator implements Comparator<RegionInfo> {
282    Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();
283    @Override
284    public int compare(RegionInfo left, RegionInfo right) {
285      // This comparator differs from the one RegionInfo in that it sorts
286      // parent before daughters.
287      if (left == null) {
288        return -1;
289      }
290      if (right == null) {
291        return 1;
292      }
293      // Same table name.
294      int result = left.getTable().compareTo(right.getTable());
295      if (result != 0) {
296        return result;
297      }
298      // Compare start keys.
299      result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
300      if (result != 0) {
301        return result;
302      }
303      // Compare end keys, but flip the operands so parent comes first
304      result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey());
305
306      return result;
307    }
308  }
309
310  /**
311   * If daughters no longer hold reference to the parents, delete the parent.
312   * @param parent RegionInfo of split offlined parent
313   * @param rowContent Content of <code>parent</code> row in
314   * <code>metaRegionName</code>
315   * @return True if we removed <code>parent</code> from meta table and from
316   * the filesystem.
317   */
318  boolean cleanParent(final RegionInfo parent, Result rowContent)
319  throws IOException {
320    // Check whether it is a merged region and if it is clean of references.
321    if (MetaTableAccessor.hasMergeRegions(rowContent.rawCells())) {
322      // Wait until clean of merge parent regions first
323      return false;
324    }
325    // Run checks on each daughter split.
326    PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(rowContent);
327    Pair<Boolean, Boolean> a = checkDaughterInFs(parent, daughters.getFirst());
328    Pair<Boolean, Boolean> b = checkDaughterInFs(parent, daughters.getSecond());
329    if (hasNoReferences(a) && hasNoReferences(b)) {
330      String daughterA = daughters.getFirst() != null?
331          daughters.getFirst().getShortNameToLog(): "null";
332      String daughterB = daughters.getSecond() != null?
333          daughters.getSecond().getShortNameToLog(): "null";
334      LOG.debug("Deleting region " + parent.getShortNameToLog() +
335        " because daughters -- " + daughterA + ", " + daughterB +
336        " -- no longer hold references");
337      ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor();
338      pe.submitProcedure(new GCRegionProcedure(pe.getEnvironment(), parent));
339      // Remove from in-memory states
340      this.services.getAssignmentManager().getRegionStates().deleteRegion(parent);
341      this.services.getServerManager().removeRegion(parent);
342      return true;
343    }
344    return false;
345  }
346
347  /**
348   * @param p A pair where the first boolean says whether or not the daughter
349   * region directory exists in the filesystem and then the second boolean says
350   * whether the daughter has references to the parent.
351   * @return True the passed <code>p</code> signifies no references.
352   */
353  private boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
354    return !p.getFirst() || !p.getSecond();
355  }
356
357  /**
358   * Checks if a daughter region -- either splitA or splitB -- still holds
359   * references to parent.
360   * @param parent Parent region
361   * @param daughter Daughter region
362   * @return A pair where the first boolean says whether or not the daughter
363   *   region directory exists in the filesystem and then the second boolean says
364   *   whether the daughter has references to the parent.
365   */
366  private Pair<Boolean, Boolean> checkDaughterInFs(final RegionInfo parent,
367    final RegionInfo daughter)
368  throws IOException {
369    if (daughter == null)  {
370      return new Pair<>(Boolean.FALSE, Boolean.FALSE);
371    }
372
373    FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
374    Path rootdir = this.services.getMasterFileSystem().getRootDir();
375    Path tabledir = CommonFSUtils.getTableDir(rootdir, daughter.getTable());
376
377    Path daughterRegionDir = new Path(tabledir, daughter.getEncodedName());
378
379    HRegionFileSystem regionFs;
380
381    try {
382      if (!CommonFSUtils.isExists(fs, daughterRegionDir)) {
383        return new Pair<>(Boolean.FALSE, Boolean.FALSE);
384      }
385    } catch (IOException ioe) {
386      LOG.error("Error trying to determine if daughter region exists, " +
387               "assuming exists and has references", ioe);
388      return new Pair<>(Boolean.TRUE, Boolean.TRUE);
389    }
390
391    boolean references = false;
392    TableDescriptor parentDescriptor = getDescriptor(parent.getTable());
393    try {
394      regionFs = HRegionFileSystem.openRegionFromFileSystem(
395          this.services.getConfiguration(), fs, tabledir, daughter, true);
396
397      for (ColumnFamilyDescriptor family: parentDescriptor.getColumnFamilies()) {
398        if ((references = regionFs.hasReferences(family.getNameAsString()))) {
399          break;
400        }
401      }
402    } catch (IOException e) {
403      LOG.error("Error trying to determine referenced files from : " + daughter.getEncodedName()
404          + ", to: " + parent.getEncodedName() + " assuming has references", e);
405      return new Pair<>(Boolean.TRUE, Boolean.TRUE);
406    }
407    return new Pair<>(Boolean.TRUE, references);
408  }
409
410  private TableDescriptor getDescriptor(final TableName tableName) throws IOException {
411    return this.services.getTableDescriptors().get(tableName);
412  }
413
414  /**
415   * Checks if the specified region has merge qualifiers, if so, try to clean them.
416   * @return true if no info:merge* columns; i.e. the specified region doesn't have
417   *   any merge qualifiers.
418   */
419  public boolean cleanMergeQualifier(final RegionInfo region) throws IOException {
420    // Get merge regions if it is a merged region and already has merge qualifier
421    List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(this.services.getConnection(),
422        region.getRegionName());
423    if (parents == null || parents.isEmpty()) {
424      // It doesn't have merge qualifier, no need to clean
425      return true;
426    }
427
428    // If a parent region is a merged child region and GC has not kicked in/finish its work yet,
429    // return false in this case to avoid kicking in a merge, trying later.
430    cleanMergeRegion(region, parents);
431    return false;
432  }
433
434  /**
435   * Report made by ReportMakingVisitor
436   */
437  public static class Report {
438    private final long now = EnvironmentEdgeManager.currentTime();
439
440    // Keep Map of found split parents. These are candidates for cleanup.
441    // Use a comparator that has split parents come before its daughters.
442    final Map<RegionInfo, Result> splitParents = new TreeMap<>(new SplitParentFirstComparator());
443    final Map<RegionInfo, Result> mergedRegions = new TreeMap<>(RegionInfo.COMPARATOR);
444    int count = 0;
445
446    private final List<Pair<RegionInfo, RegionInfo>> holes = new ArrayList<>();
447    private final List<Pair<RegionInfo, RegionInfo>> overlaps = new ArrayList<>();
448
449    /**
450     * TODO: If CatalogJanitor finds an 'Unknown Server', it should 'fix' it by queuing
451     * a {@link org.apache.hadoop.hbase.master.procedure.HBCKServerCrashProcedure} for
452     * found server for it to clean up meta.
453     */
454    private final List<Pair<RegionInfo, ServerName>> unknownServers = new ArrayList<>();
455
456    private final List<byte []> emptyRegionInfo = new ArrayList<>();
457
458    @VisibleForTesting
459    Report() {}
460
461    public long getCreateTime() {
462      return this.now;
463    }
464
465    public List<Pair<RegionInfo, RegionInfo>> getHoles() {
466      return this.holes;
467    }
468
469    /**
470     * @return Overlap pairs found as we scanned hbase:meta; ordered by hbase:meta
471     *   table sort. Pairs of overlaps may have overlap with subsequent pairs.
472     * @see MetaFixer#calculateMerges(int, List) where we aggregate overlaps
473     *   for a single 'merge' call.
474     */
475    public List<Pair<RegionInfo, RegionInfo>> getOverlaps() {
476      return this.overlaps;
477    }
478
479    public Map<RegionInfo, Result> getMergedRegions() {
480      return this.mergedRegions;
481    }
482
483    public List<Pair<RegionInfo, ServerName>> getUnknownServers() {
484      return unknownServers;
485    }
486
487    public List<byte[]> getEmptyRegionInfo() {
488      return emptyRegionInfo;
489    }
490
491    /**
492     * @return True if an 'empty' lastReport -- no problems found.
493     */
494    public boolean isEmpty() {
495      return this.holes.isEmpty() && this.overlaps.isEmpty() && this.unknownServers.isEmpty() &&
496          this.emptyRegionInfo.isEmpty();
497    }
498
499    @Override
500    public String toString() {
501      StringBuilder sb = new StringBuilder();
502      for (Pair<RegionInfo, RegionInfo> p: this.holes) {
503        if (sb.length() > 0) {
504          sb.append(", ");
505        }
506        sb.append("hole=").append(p.getFirst().getRegionNameAsString()).append("/").
507            append(p.getSecond().getRegionNameAsString());
508      }
509      for (Pair<RegionInfo, RegionInfo> p: this.overlaps) {
510        if (sb.length() > 0) {
511          sb.append(", ");
512        }
513        sb.append("overlap=").append(p.getFirst().getRegionNameAsString()).append("/").
514            append(p.getSecond().getRegionNameAsString());
515      }
516      for (byte [] r: this.emptyRegionInfo) {
517        if (sb.length() > 0) {
518          sb.append(", ");
519        }
520        sb.append("empty=").append(Bytes.toStringBinary(r));
521      }
522      for (Pair<RegionInfo, ServerName> p: this.unknownServers) {
523        if (sb.length() > 0) {
524          sb.append(", ");
525        }
526        sb.append("unknown_server=").append(p.getSecond()).append("/").
527            append(p.getFirst().getRegionNameAsString());
528      }
529      return sb.toString();
530    }
531  }
532
533  /**
534   * Visitor we use in here in CatalogJanitor to go against hbase:meta table.
535   * Generates a Report made of a collection of split parents and counts of rows
536   * in the hbase:meta table. Also runs hbase:meta consistency checks to
537   * generate more report. Report is NOT ready until after this visitor has been
538   * {@link #close()}'d.
539   */
540  static class ReportMakingVisitor implements MetaTableAccessor.CloseableVisitor {
541    private final MasterServices services;
542    private volatile boolean closed;
543
544    /**
545     * Report is not done until after the close has been called.
546     * @see #close()
547     * @see #getReport()
548     */
549    private Report report = new Report();
550
551    /**
552     * RegionInfo from previous row.
553     */
554    private RegionInfo previous = null;
555
556    /**
557     * Keep account of the highest end key seen as we move through hbase:meta.
558     * Usually, the current RegionInfo has the highest end key but if an overlap,
559     * this may no longer hold. An overlap may be a region with startkey 'd' and
560     * endkey 'g'. The next region in meta may be 'e' to 'f' and then 'f' to 'g'.
561     * Looking at previous and current meta row, we won't know about the 'd' to 'g'
562     * overlap unless we keep a running 'highest-endpoint-seen'.
563     */
564    private RegionInfo highestEndKeyRegionInfo = null;
565
566    ReportMakingVisitor(MasterServices services) {
567      this.services = services;
568    }
569
570    /**
571     * Do not call until after {@link #close()}.
572     * Will throw a {@link RuntimeException} if you do.
573     */
574    Report getReport() {
575      if (!this.closed) {
576        throw new RuntimeException("Report not ready until after close()");
577      }
578      return this.report;
579    }
580
581    @Override
582    public boolean visit(Result r) {
583      if (r == null || r.isEmpty()) {
584        return true;
585      }
586      this.report.count++;
587      RegionInfo regionInfo = null;
588      try {
589        regionInfo = metaTableConsistencyCheck(r);
590      } catch(Throwable t) {
591        LOG.warn("Failed consistency check on {}", Bytes.toStringBinary(r.getRow()), t);
592      }
593      if (regionInfo != null) {
594        LOG.trace(regionInfo.toString());
595        if (regionInfo.isSplitParent()) { // splitParent means split and offline.
596          this.report.splitParents.put(regionInfo, r);
597        }
598        if (MetaTableAccessor.hasMergeRegions(r.rawCells())) {
599          this.report.mergedRegions.put(regionInfo, r);
600        }
601      }
602      // Returning true means "keep scanning"
603      return true;
604    }
605
606    /**
607     * Check row.
608     * @param metaTableRow Row from hbase:meta table.
609     * @return Returns default regioninfo found in row parse as a convenience to save
610     *   on having to do a double-parse of Result.
611     */
612    private RegionInfo metaTableConsistencyCheck(Result metaTableRow) {
613      RegionInfo ri;
614      // Locations comes back null if the RegionInfo field is empty.
615      // If locations is null, ensure the regioninfo is for sure empty before progressing.
616      // If really empty, report as missing regioninfo!  Otherwise, can run server check
617      // and get RegionInfo from locations.
618      RegionLocations locations = MetaTableAccessor.getRegionLocations(metaTableRow);
619      if (locations == null) {
620        ri = MetaTableAccessor.getRegionInfo(metaTableRow,
621            MetaTableAccessor.getRegionInfoColumn());
622      } else {
623        ri = locations.getDefaultRegionLocation().getRegion();
624        checkServer(locations);
625      }
626
627      if (ri == null) {
628        this.report.emptyRegionInfo.add(metaTableRow.getRow());
629        return ri;
630      }
631
632      if (!Bytes.equals(metaTableRow.getRow(), ri.getRegionName())) {
633        LOG.warn("INCONSISTENCY: Row name is not equal to serialized info:regioninfo content; " +
634                "row={} {}; See if RegionInfo is referenced in another hbase:meta row? Delete?",
635            Bytes.toStringBinary(metaTableRow.getRow()), ri.getRegionNameAsString());
636        return null;
637      }
638      // Skip split parent region
639      if (ri.isSplitParent()) {
640        return ri;
641      }
642      // If table is disabled, skip integrity check.
643      if (!isTableDisabled(ri)) {
644        if (isTableTransition(ri)) {
645          // On table transition, look to see if last region was last in table
646          // and if this is the first. Report 'hole' if neither is true.
647          // HBCK1 used to have a special category for missing start or end keys.
648          // We'll just lump them in as 'holes'.
649          if ((this.previous != null && !this.previous.isLast()) || !ri.isFirst()) {
650            addHole(this.previous == null? RegionInfo.UNDEFINED: this.previous, ri);
651          }
652        } else {
653          if (!this.previous.isNext(ri)) {
654            if (this.previous.isOverlap(ri)) {
655              addOverlap(this.previous, ri);
656            } else if (ri.isOverlap(this.highestEndKeyRegionInfo)) {
657              // We may have seen a region a few rows back that overlaps this one.
658              addOverlap(this.highestEndKeyRegionInfo, ri);
659            } else if (!this.highestEndKeyRegionInfo.isNext(ri)) {
660              // Need to check the case if this.highestEndKeyRegionInfo.isNext(ri). If no,
661              // report a hole, otherwise, it is ok. For an example,
662              // previous: [aa, bb), ri: [cc, dd), highestEndKeyRegionInfo: [a, cc)
663              // In this case, it should not report a hole, as highestEndKeyRegionInfo covers
664              // the hole between previous and ri.
665              addHole(this.previous, ri);
666            }
667          } else if (ri.isOverlap(this.highestEndKeyRegionInfo)) {
668            // We may have seen a region a few rows back that overlaps this one
669            // even though it properly 'follows' the region just before.
670            addOverlap(this.highestEndKeyRegionInfo, ri);
671          }
672        }
673      }
674      this.previous = ri;
675      this.highestEndKeyRegionInfo =
676          MetaFixer.getRegionInfoWithLargestEndKey(this.highestEndKeyRegionInfo, ri);
677      return ri;
678    }
679
680    private void addOverlap(RegionInfo a, RegionInfo b) {
681      this.report.overlaps.add(new Pair<>(a, b));
682    }
683
684    private void addHole(RegionInfo a, RegionInfo b) {
685      this.report.holes.add(new Pair<>(a, b));
686    }
687
688    /**
689     * @return True if table is disabled or disabling; defaults false!
690     */
691    boolean isTableDisabled(RegionInfo ri) {
692      if (ri == null) {
693        return false;
694      }
695      if (this.services == null) {
696        return false;
697      }
698      if (this.services.getTableStateManager() == null) {
699        return false;
700      }
701      TableState state = null;
702      try {
703        state = this.services.getTableStateManager().getTableState(ri.getTable());
704      } catch (IOException e) {
705        LOG.warn("Failed getting table state", e);
706      }
707      return state != null && state.isDisabledOrDisabling();
708    }
709
710    /**
711     * Run through referenced servers and save off unknown and the dead.
712     */
713    private void checkServer(RegionLocations locations) {
714      if (this.services == null) {
715        // Can't do this test if no services.
716        return;
717      }
718      if (locations == null) {
719        return;
720      }
721      if (locations.getRegionLocations() == null) {
722        return;
723      }
724      // Check referenced servers are known/online. Here we are looking
725      // at both the default replica -- the main replica -- and then replica
726      // locations too.
727      for (HRegionLocation location: locations.getRegionLocations()) {
728        if (location == null) {
729          continue;
730        }
731        ServerName sn = location.getServerName();
732        if (sn == null) {
733          continue;
734        }
735        if (location.getRegion() == null) {
736          LOG.warn("Empty RegionInfo in {}", location);
737          // This should never happen but if it does, will mess up below.
738          continue;
739        }
740        RegionInfo ri = location.getRegion();
741        // Skip split parent region
742        if (ri.isSplitParent()) {
743          continue;
744        }
745        // skip the offline regions which belong to disabled table.
746        if (isTableDisabled(ri)) {
747          continue;
748        }
749        RegionState rs = this.services.getAssignmentManager().getRegionStates().getRegionState(ri);
750        if (rs == null || rs.isClosedOrAbnormallyClosed()) {
751          // If closed against an 'Unknown Server', that is should be fine.
752          continue;
753        }
754        ServerManager.ServerLiveState state = this.services.getServerManager().
755            isServerKnownAndOnline(sn);
756        switch (state) {
757          case UNKNOWN:
758            this.report.unknownServers.add(new Pair<>(ri, sn));
759            break;
760
761          default:
762            break;
763        }
764      }
765    }
766
767    /**
768     * @return True iff first row in hbase:meta or if we've broached a new table in hbase:meta
769     */
770    private boolean isTableTransition(RegionInfo ri) {
771      return this.previous == null ||
772          !this.previous.getTable().equals(ri.getTable());
773    }
774
775    @Override
776    public void close() throws IOException {
777      // This is a table transition... after the last region. Check previous.
778      // Should be last region. If not, its a hole on end of laster table.
779      if (this.previous != null && !this.previous.isLast()) {
780        addHole(this.previous, RegionInfo.UNDEFINED);
781      }
782      this.closed = true;
783    }
784  }
785
786  private static void checkLog4jProperties() {
787    String filename = "log4j.properties";
788    try {
789      final InputStream inStream =
790          CatalogJanitor.class.getClassLoader().getResourceAsStream(filename);
791      if (inStream != null) {
792        new Properties().load(inStream);
793      } else {
794        System.out.println("No " + filename + " on classpath; Add one else no logging output!");
795      }
796    } catch (IOException e) {
797      LOG.error("Log4j check failed", e);
798    }
799  }
800
801  /**
802   * For testing against a cluster.
803   * Doesn't have a MasterServices context so does not report on good vs bad servers.
804   */
805  public static void main(String [] args) throws IOException {
806    checkLog4jProperties();
807    ReportMakingVisitor visitor = new ReportMakingVisitor(null);
808    Configuration configuration = HBaseConfiguration.create();
809    configuration.setBoolean("hbase.defaults.for.version.skip", true);
810    try (Connection connection = ConnectionFactory.createConnection(configuration)) {
811      /* Used to generate an overlap.
812      */
813      Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0."));
814      g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
815      try (Table t = connection.getTable(TableName.META_TABLE_NAME)) {
816        Result r = t.get(g);
817        byte [] row = g.getRow();
818        row[row.length - 2] <<= row[row.length - 2];
819        Put p = new Put(g.getRow());
820        p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
821            r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER));
822        t.put(p);
823      }
824      MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null);
825      Report report = visitor.getReport();
826      LOG.info(report != null? report.toString(): "empty");
827    }
828  }
829}