001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.janitor;
019
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.Comparator;
023import java.util.HashSet;
024import java.util.List;
025import java.util.Map;
026import java.util.Properties;
027import java.util.concurrent.atomic.AtomicBoolean;
028import java.util.stream.Collectors;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Path;
032import org.apache.hadoop.hbase.CatalogFamilyFormat;
033import org.apache.hadoop.hbase.HBaseConfiguration;
034import org.apache.hadoop.hbase.HConstants;
035import org.apache.hadoop.hbase.MetaTableAccessor;
036import org.apache.hadoop.hbase.ScheduledChore;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
039import org.apache.hadoop.hbase.client.Connection;
040import org.apache.hadoop.hbase.client.ConnectionFactory;
041import org.apache.hadoop.hbase.client.Get;
042import org.apache.hadoop.hbase.client.Put;
043import org.apache.hadoop.hbase.client.RegionInfo;
044import org.apache.hadoop.hbase.client.Result;
045import org.apache.hadoop.hbase.client.Table;
046import org.apache.hadoop.hbase.client.TableDescriptor;
047import org.apache.hadoop.hbase.master.MasterServices;
048import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
049import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
050import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
051import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
052import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
053import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
054import org.apache.hadoop.hbase.util.Bytes;
055import org.apache.hadoop.hbase.util.CommonFSUtils;
056import org.apache.hadoop.hbase.util.Pair;
057import org.apache.hadoop.hbase.util.PairOfSameType;
058import org.apache.hadoop.hbase.util.Threads;
059import org.apache.yetus.audience.InterfaceAudience;
060import org.slf4j.Logger;
061import org.slf4j.LoggerFactory;
062
063/**
064 * A janitor for the catalog tables. Scans the <code>hbase:meta</code> catalog table on a period.
065 * Makes a lastReport on state of hbase:meta. Looks for unused regions to garbage collect. Scan of
066 * hbase:meta runs if we are NOT in maintenance mode, if we are NOT shutting down, AND if the
067 * assignmentmanager is loaded. Playing it safe, we will garbage collect no-longer needed region
068 * references only if there are no regions-in-transition (RIT).
069 */
070// TODO: Only works with single hbase:meta region currently. Fix.
071// TODO: Should it start over every time? Could it continue if runs into problem? Only if
072// problem does not mess up 'results'.
073// TODO: Do more by way of 'repair'; see note on unknownServers below.
074@InterfaceAudience.Private
075public class CatalogJanitor extends ScheduledChore {
076
077  public static final int DEFAULT_HBASE_CATALOGJANITOR_INTERVAL = 300 * 1000;
078
079  private static final Logger LOG = LoggerFactory.getLogger(CatalogJanitor.class.getName());
080
081  private final AtomicBoolean alreadyRunning = new AtomicBoolean(false);
082  private final AtomicBoolean enabled = new AtomicBoolean(true);
083  private final MasterServices services;
084
085  /**
086   * Saved report from last hbase:meta scan to completion. May be stale if having trouble completing
087   * scan. Check its date.
088   */
089  private volatile CatalogJanitorReport lastReport;
090
091  public CatalogJanitor(final MasterServices services) {
092    super("CatalogJanitor-" + services.getServerName().toShortString(), services,
093      services.getConfiguration().getInt("hbase.catalogjanitor.interval",
094        DEFAULT_HBASE_CATALOGJANITOR_INTERVAL));
095    this.services = services;
096  }
097
098  @Override
099  protected boolean initialChore() {
100    try {
101      if (getEnabled()) {
102        scan();
103      }
104    } catch (IOException e) {
105      LOG.warn("Failed initial janitorial scan of hbase:meta table", e);
106      return false;
107    }
108    return true;
109  }
110
111  public boolean setEnabled(final boolean enabled) {
112    boolean alreadyEnabled = this.enabled.getAndSet(enabled);
113    // If disabling is requested on an already enabled chore, we could have an active
114    // scan still going on, callers might not be aware of that and do further action thinkng
115    // that no action would be from this chore. In this case, the right action is to wait for
116    // the active scan to complete before exiting this function.
117    if (!enabled && alreadyEnabled) {
118      while (alreadyRunning.get()) {
119        Threads.sleepWithoutInterrupt(100);
120      }
121    }
122    return alreadyEnabled;
123  }
124
125  public boolean getEnabled() {
126    return this.enabled.get();
127  }
128
129  @Override
130  protected void chore() {
131    try {
132      AssignmentManager am = this.services.getAssignmentManager();
133      if (
134        getEnabled() && !this.services.isInMaintenanceMode()
135          && !this.services.getServerManager().isClusterShutdown() && isMetaLoaded(am)
136      ) {
137        scan();
138      } else {
139        LOG.warn("CatalogJanitor is disabled! Enabled=" + getEnabled() + ", maintenanceMode="
140          + this.services.isInMaintenanceMode() + ", am=" + am + ", metaLoaded=" + isMetaLoaded(am)
141          + ", hasRIT=" + isRIT(am) + " clusterShutDown="
142          + this.services.getServerManager().isClusterShutdown());
143      }
144    } catch (IOException e) {
145      LOG.warn("Failed janitorial scan of hbase:meta table", e);
146    }
147  }
148
149  private static boolean isMetaLoaded(AssignmentManager am) {
150    return am != null && am.isMetaLoaded();
151  }
152
153  private static boolean isRIT(AssignmentManager am) {
154    return isMetaLoaded(am) && am.hasRegionsInTransition();
155  }
156
157  /**
158   * Run janitorial scan of catalog <code>hbase:meta</code> table looking for garbage to collect.
159   * @return How many items gc'd whether for merge or split. Returns -1 if previous scan is in
160   *         progress.
161   */
162  public int scan() throws IOException {
163    int gcs = 0;
164    try {
165      if (!alreadyRunning.compareAndSet(false, true)) {
166        if (LOG.isDebugEnabled()) {
167          LOG.debug("CatalogJanitor already running");
168        }
169        // -1 indicates previous scan is in progress
170        return -1;
171      }
172      this.lastReport = scanForReport();
173      if (!this.lastReport.isEmpty()) {
174        LOG.warn(this.lastReport.toString());
175      } else {
176        if (LOG.isDebugEnabled()) {
177          LOG.debug(this.lastReport.toString());
178        }
179      }
180
181      updateAssignmentManagerMetrics();
182
183      Map<RegionInfo, Result> mergedRegions = this.lastReport.mergedRegions;
184      for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
185        if (this.services.isInMaintenanceMode()) {
186          // Stop cleaning if the master is in maintenance mode
187          LOG.debug("In maintenence mode, not cleaning");
188          break;
189        }
190
191        List<RegionInfo> parents = CatalogFamilyFormat.getMergeRegions(e.getValue().rawCells());
192        if (parents != null && cleanMergeRegion(e.getKey(), parents)) {
193          gcs++;
194        }
195      }
196      // Clean split parents
197      Map<RegionInfo, Result> splitParents = this.lastReport.splitParents;
198
199      // Now work on our list of found parents. See if any we can clean up.
200      HashSet<String> parentNotCleaned = new HashSet<>();
201      for (Map.Entry<RegionInfo, Result> e : splitParents.entrySet()) {
202        if (this.services.isInMaintenanceMode()) {
203          // Stop cleaning if the master is in maintenance mode
204          if (LOG.isDebugEnabled()) {
205            LOG.debug("In maintenence mode, not cleaning");
206          }
207          break;
208        }
209
210        if (
211          !parentNotCleaned.contains(e.getKey().getEncodedName())
212            && cleanParent(e.getKey(), e.getValue())
213        ) {
214          gcs++;
215        } else {
216          // We could not clean the parent, so it's daughters should not be
217          // cleaned either (HBASE-6160)
218          PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(e.getValue());
219          parentNotCleaned.add(daughters.getFirst().getEncodedName());
220          parentNotCleaned.add(daughters.getSecond().getEncodedName());
221        }
222      }
223      return gcs;
224    } finally {
225      alreadyRunning.set(false);
226    }
227  }
228
229  /**
230   * Scan hbase:meta.
231   * @return Return generated {@link CatalogJanitorReport}
232   */
233  // will be override in tests.
234  protected CatalogJanitorReport scanForReport() throws IOException {
235    ReportMakingVisitor visitor = new ReportMakingVisitor(this.services);
236    // Null tablename means scan all of meta.
237    MetaTableAccessor.scanMetaForTableRegions(this.services.getConnection(), visitor, null);
238    return visitor.getReport();
239  }
240
241  /**
242   * @return Returns last published Report that comes of last successful scan of hbase:meta.
243   */
244  public CatalogJanitorReport getLastReport() {
245    return this.lastReport;
246  }
247
248  /**
249   * If merged region no longer holds reference to the merge regions, archive merge region on hdfs
250   * and perform deleting references in hbase:meta
251   * @return true if we delete references in merged region on hbase:meta and archive the files on
252   *         the file system
253   */
254  private boolean cleanMergeRegion(final RegionInfo mergedRegion, List<RegionInfo> parents)
255    throws IOException {
256    if (LOG.isDebugEnabled()) {
257      LOG.debug("Cleaning merged region {}", mergedRegion);
258    }
259    FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
260    Path rootdir = this.services.getMasterFileSystem().getRootDir();
261    Path tabledir = CommonFSUtils.getTableDir(rootdir, mergedRegion.getTable());
262    TableDescriptor htd = getDescriptor(mergedRegion.getTable());
263    HRegionFileSystem regionFs = null;
264    try {
265      regionFs = HRegionFileSystem.openRegionFromFileSystem(this.services.getConfiguration(), fs,
266        tabledir, mergedRegion, true);
267    } catch (IOException e) {
268      LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName());
269    }
270    if (regionFs == null || !regionFs.hasReferences(htd)) {
271      if (LOG.isDebugEnabled()) {
272        LOG.debug(
273          "Deleting parents ({}) from fs; merged child {} no longer holds references", parents
274            .stream().map(r -> RegionInfo.getShortNameToLog(r)).collect(Collectors.joining(", ")),
275          mergedRegion);
276      }
277      ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor();
278      GCMultipleMergedRegionsProcedure mergeRegionProcedure =
279        new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), mergedRegion, parents);
280      pe.submitProcedure(mergeRegionProcedure);
281      if (LOG.isDebugEnabled()) {
282        LOG.debug("Submitted procedure {} for merged region {}", mergeRegionProcedure,
283          mergedRegion);
284      }
285      return true;
286    }
287    return false;
288  }
289
290  /**
291   * Compare HRegionInfos in a way that has split parents sort BEFORE their daughters.
292   */
293  static class SplitParentFirstComparator implements Comparator<RegionInfo> {
294    Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();
295
296    @Override
297    public int compare(RegionInfo left, RegionInfo right) {
298      // This comparator differs from the one RegionInfo in that it sorts
299      // parent before daughters.
300      if (left == null) {
301        return -1;
302      }
303      if (right == null) {
304        return 1;
305      }
306      // Same table name.
307      int result = left.getTable().compareTo(right.getTable());
308      if (result != 0) {
309        return result;
310      }
311      // Compare start keys.
312      result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
313      if (result != 0) {
314        return result;
315      }
316      // Compare end keys, but flip the operands so parent comes first
317      result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey());
318
319      return result;
320    }
321  }
322
323  static boolean cleanParent(MasterServices services, RegionInfo parent, Result rowContent)
324    throws IOException {
325    if (LOG.isDebugEnabled()) {
326      LOG.debug("Cleaning parent region {}", parent);
327    }
328    // Check whether it is a merged region and if it is clean of references.
329    if (CatalogFamilyFormat.hasMergeRegions(rowContent.rawCells())) {
330      // Wait until clean of merge parent regions first
331      if (LOG.isDebugEnabled()) {
332        LOG.debug("Region {} has merge parents, cleaning them first", parent);
333      }
334      return false;
335    }
336    // Run checks on each daughter split.
337    PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(rowContent);
338    Pair<Boolean, Boolean> a = checkDaughterInFs(services, parent, daughters.getFirst());
339    Pair<Boolean, Boolean> b = checkDaughterInFs(services, parent, daughters.getSecond());
340    if (hasNoReferences(a) && hasNoReferences(b)) {
341      String daughterA =
342        daughters.getFirst() != null ? daughters.getFirst().getShortNameToLog() : "null";
343      String daughterB =
344        daughters.getSecond() != null ? daughters.getSecond().getShortNameToLog() : "null";
345      if (LOG.isDebugEnabled()) {
346        LOG.debug("Deleting region " + parent.getShortNameToLog() + " because daughters -- "
347          + daughterA + ", " + daughterB + " -- no longer hold references");
348      }
349      ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
350      GCRegionProcedure gcRegionProcedure = new GCRegionProcedure(pe.getEnvironment(), parent);
351      pe.submitProcedure(gcRegionProcedure);
352      if (LOG.isDebugEnabled()) {
353        LOG.debug("Submitted procedure {} for split parent {}", gcRegionProcedure, parent);
354      }
355      return true;
356    } else {
357      if (LOG.isDebugEnabled()) {
358        if (!hasNoReferences(a)) {
359          LOG.debug("Deferring removal of region {} because daughter {} still has references",
360            parent, daughters.getFirst());
361        }
362        if (!hasNoReferences(b)) {
363          LOG.debug("Deferring removal of region {} because daughter {} still has references",
364            parent, daughters.getSecond());
365        }
366      }
367    }
368    return false;
369  }
370
371  /**
372   * If daughters no longer hold reference to the parents, delete the parent.
373   * @param parent     RegionInfo of split offlined parent
374   * @param rowContent Content of <code>parent</code> row in <code>metaRegionName</code>
375   * @return True if we removed <code>parent</code> from meta table and from the filesystem.
376   */
377  private boolean cleanParent(final RegionInfo parent, Result rowContent) throws IOException {
378    return cleanParent(services, parent, rowContent);
379  }
380
381  /**
382   * @param p A pair where the first boolean says whether or not the daughter region directory
383   *          exists in the filesystem and then the second boolean says whether the daughter has
384   *          references to the parent.
385   * @return True the passed <code>p</code> signifies no references.
386   */
387  private static boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
388    return !p.getFirst() || !p.getSecond();
389  }
390
391  /**
392   * Checks if a daughter region -- either splitA or splitB -- still holds references to parent.
393   * @param parent   Parent region
394   * @param daughter Daughter region
395   * @return A pair where the first boolean says whether or not the daughter region directory exists
396   *         in the filesystem and then the second boolean says whether the daughter has references
397   *         to the parent.
398   */
399  private static Pair<Boolean, Boolean> checkDaughterInFs(MasterServices services,
400    final RegionInfo parent, final RegionInfo daughter) throws IOException {
401    if (daughter == null) {
402      return new Pair<>(Boolean.FALSE, Boolean.FALSE);
403    }
404
405    FileSystem fs = services.getMasterFileSystem().getFileSystem();
406    Path rootdir = services.getMasterFileSystem().getRootDir();
407    Path tabledir = CommonFSUtils.getTableDir(rootdir, daughter.getTable());
408
409    Path daughterRegionDir = new Path(tabledir, daughter.getEncodedName());
410
411    HRegionFileSystem regionFs;
412
413    try {
414      if (!CommonFSUtils.isExists(fs, daughterRegionDir)) {
415        return new Pair<>(Boolean.FALSE, Boolean.FALSE);
416      }
417    } catch (IOException ioe) {
418      LOG.error("Error trying to determine if daughter region exists, "
419        + "assuming exists and has references", ioe);
420      return new Pair<>(Boolean.TRUE, Boolean.TRUE);
421    }
422
423    boolean references = false;
424    TableDescriptor parentDescriptor = services.getTableDescriptors().get(parent.getTable());
425    try {
426      regionFs = HRegionFileSystem.openRegionFromFileSystem(services.getConfiguration(), fs,
427        tabledir, daughter, true);
428
429      for (ColumnFamilyDescriptor family : parentDescriptor.getColumnFamilies()) {
430        references = regionFs.hasReferences(family.getNameAsString());
431        if (references) {
432          break;
433        }
434      }
435    } catch (IOException e) {
436      LOG.error("Error trying to determine referenced files from : " + daughter.getEncodedName()
437        + ", to: " + parent.getEncodedName() + " assuming has references", e);
438      return new Pair<>(Boolean.TRUE, Boolean.TRUE);
439    }
440    return new Pair<>(Boolean.TRUE, references);
441  }
442
443  private TableDescriptor getDescriptor(final TableName tableName) throws IOException {
444    return this.services.getTableDescriptors().get(tableName);
445  }
446
447  private void updateAssignmentManagerMetrics() {
448    services.getAssignmentManager().getAssignmentManagerMetrics()
449      .updateHoles(lastReport.getHoles().size());
450    services.getAssignmentManager().getAssignmentManagerMetrics()
451      .updateOverlaps(lastReport.getOverlaps().size());
452    services.getAssignmentManager().getAssignmentManagerMetrics()
453      .updateUnknownServerRegions(lastReport.getUnknownServers().size());
454    services.getAssignmentManager().getAssignmentManagerMetrics()
455      .updateEmptyRegionInfoRegions(lastReport.getEmptyRegionInfo().size());
456  }
457
458  private static void checkLog4jProperties() {
459    String filename = "log4j.properties";
460    try (final InputStream inStream =
461      CatalogJanitor.class.getClassLoader().getResourceAsStream(filename)) {
462      if (inStream != null) {
463        new Properties().load(inStream);
464      } else {
465        System.out.println("No " + filename + " on classpath; Add one else no logging output!");
466      }
467    } catch (IOException e) {
468      LOG.error("Log4j check failed", e);
469    }
470  }
471
472  /**
473   * For testing against a cluster. Doesn't have a MasterServices context so does not report on good
474   * vs bad servers.
475   */
476  public static void main(String[] args) throws IOException {
477    checkLog4jProperties();
478    ReportMakingVisitor visitor = new ReportMakingVisitor(null);
479    Configuration configuration = HBaseConfiguration.create();
480    configuration.setBoolean("hbase.defaults.for.version.skip", true);
481    try (Connection connection = ConnectionFactory.createConnection(configuration)) {
482      /*
483       * Used to generate an overlap.
484       */
485      Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0."));
486      g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
487      try (Table t = connection.getTable(TableName.META_TABLE_NAME)) {
488        Result r = t.get(g);
489        byte[] row = g.getRow();
490        row[row.length - 2] <<= row[row.length - 2];
491        Put p = new Put(g.getRow());
492        p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
493          r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER));
494        t.put(p);
495      }
496      MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null);
497      CatalogJanitorReport report = visitor.getReport();
498      LOG.info(report != null ? report.toString() : "empty");
499    }
500  }
501}