001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.janitor;
019
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.Comparator;
023import java.util.HashSet;
024import java.util.List;
025import java.util.Map;
026import java.util.Properties;
027import java.util.concurrent.atomic.AtomicBoolean;
028import java.util.stream.Collectors;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Path;
032import org.apache.hadoop.hbase.CatalogFamilyFormat;
033import org.apache.hadoop.hbase.HBaseConfiguration;
034import org.apache.hadoop.hbase.HConstants;
035import org.apache.hadoop.hbase.MetaTableAccessor;
036import org.apache.hadoop.hbase.ScheduledChore;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.client.Connection;
039import org.apache.hadoop.hbase.client.ConnectionFactory;
040import org.apache.hadoop.hbase.client.Get;
041import org.apache.hadoop.hbase.client.Put;
042import org.apache.hadoop.hbase.client.RegionInfo;
043import org.apache.hadoop.hbase.client.Result;
044import org.apache.hadoop.hbase.client.Table;
045import org.apache.hadoop.hbase.client.TableDescriptor;
046import org.apache.hadoop.hbase.master.MasterServices;
047import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
048import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
049import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
050import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
051import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
052import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
053import org.apache.hadoop.hbase.util.Bytes;
054import org.apache.hadoop.hbase.util.CommonFSUtils;
055import org.apache.hadoop.hbase.util.Pair;
056import org.apache.hadoop.hbase.util.PairOfSameType;
057import org.apache.hadoop.hbase.util.Threads;
058import org.apache.yetus.audience.InterfaceAudience;
059import org.slf4j.Logger;
060import org.slf4j.LoggerFactory;
061
062/**
063 * A janitor for the catalog tables. Scans the <code>hbase:meta</code> catalog table on a period.
064 * Makes a lastReport on state of hbase:meta. Looks for unused regions to garbage collect. Scan of
065 * hbase:meta runs if we are NOT in maintenance mode, if we are NOT shutting down, AND if the
066 * assignmentmanager is loaded. Playing it safe, we will garbage collect no-longer needed region
067 * references only if there are no regions-in-transition (RIT).
068 */
069// TODO: Only works with single hbase:meta region currently. Fix.
070// TODO: Should it start over every time? Could it continue if runs into problem? Only if
071// problem does not mess up 'results'.
072// TODO: Do more by way of 'repair'; see note on unknownServers below.
073@InterfaceAudience.Private
074public class CatalogJanitor extends ScheduledChore {
075
076  public static final int DEFAULT_HBASE_CATALOGJANITOR_INTERVAL = 300 * 1000;
077
078  private static final Logger LOG = LoggerFactory.getLogger(CatalogJanitor.class.getName());
079
080  private final AtomicBoolean alreadyRunning = new AtomicBoolean(false);
081  private final AtomicBoolean enabled = new AtomicBoolean(true);
082  private final MasterServices services;
083
084  /**
085   * Saved report from last hbase:meta scan to completion. May be stale if having trouble completing
086   * scan. Check its date.
087   */
088  private volatile CatalogJanitorReport lastReport;
089
090  public CatalogJanitor(final MasterServices services) {
091    super("CatalogJanitor-" + services.getServerName().toShortString(), services,
092      services.getConfiguration().getInt("hbase.catalogjanitor.interval",
093        DEFAULT_HBASE_CATALOGJANITOR_INTERVAL));
094    this.services = services;
095  }
096
097  @Override
098  protected boolean initialChore() {
099    try {
100      if (getEnabled()) {
101        scan();
102      }
103    } catch (IOException e) {
104      LOG.warn("Failed initial janitorial scan of hbase:meta table", e);
105      return false;
106    }
107    return true;
108  }
109
110  public boolean setEnabled(final boolean enabled) {
111    boolean alreadyEnabled = this.enabled.getAndSet(enabled);
112    // If disabling is requested on an already enabled chore, we could have an active
113    // scan still going on, callers might not be aware of that and do further action thinkng
114    // that no action would be from this chore. In this case, the right action is to wait for
115    // the active scan to complete before exiting this function.
116    if (!enabled && alreadyEnabled) {
117      while (alreadyRunning.get()) {
118        Threads.sleepWithoutInterrupt(100);
119      }
120    }
121    return alreadyEnabled;
122  }
123
124  public boolean getEnabled() {
125    return this.enabled.get();
126  }
127
128  @Override
129  protected void chore() {
130    try {
131      AssignmentManager am = this.services.getAssignmentManager();
132      if (
133        getEnabled() && !this.services.isInMaintenanceMode()
134          && !this.services.getServerManager().isClusterShutdown() && isMetaLoaded(am)
135      ) {
136        scan();
137      } else {
138        LOG.warn("CatalogJanitor is disabled! Enabled=" + getEnabled() + ", maintenanceMode="
139          + this.services.isInMaintenanceMode() + ", am=" + am + ", metaLoaded=" + isMetaLoaded(am)
140          + ", hasRIT=" + isRIT(am) + " clusterShutDown="
141          + this.services.getServerManager().isClusterShutdown());
142      }
143    } catch (IOException e) {
144      LOG.warn("Failed janitorial scan of hbase:meta table", e);
145    }
146  }
147
148  private static boolean isMetaLoaded(AssignmentManager am) {
149    return am != null && am.isMetaLoaded();
150  }
151
152  private static boolean isRIT(AssignmentManager am) {
153    return isMetaLoaded(am) && am.hasRegionsInTransition();
154  }
155
156  /**
157   * Run janitorial scan of catalog <code>hbase:meta</code> table looking for garbage to collect.
158   * @return How many items gc'd whether for merge or split. Returns -1 if previous scan is in
159   *         progress.
160   */
161  public int scan() throws IOException {
162    int gcs = 0;
163    try {
164      if (!alreadyRunning.compareAndSet(false, true)) {
165        if (LOG.isDebugEnabled()) {
166          LOG.debug("CatalogJanitor already running");
167        }
168        // -1 indicates previous scan is in progress
169        return -1;
170      }
171      this.lastReport = scanForReport();
172      if (!this.lastReport.isEmpty()) {
173        LOG.warn(this.lastReport.toString());
174      } else {
175        if (LOG.isDebugEnabled()) {
176          LOG.debug(this.lastReport.toString());
177        }
178      }
179
180      updateAssignmentManagerMetrics();
181
182      Map<RegionInfo, Result> mergedRegions = this.lastReport.mergedRegions;
183      for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
184        if (this.services.isInMaintenanceMode()) {
185          // Stop cleaning if the master is in maintenance mode
186          LOG.debug("In maintenance mode, not cleaning");
187          break;
188        }
189
190        List<RegionInfo> parents = CatalogFamilyFormat.getMergeRegions(e.getValue().rawCells());
191        if (parents != null && cleanMergeRegion(this.services, e.getKey(), parents)) {
192          gcs++;
193        }
194      }
195      // Clean split parents
196      Map<RegionInfo, Result> splitParents = this.lastReport.splitParents;
197
198      // Now work on our list of found parents. See if any we can clean up.
199      HashSet<String> parentNotCleaned = new HashSet<>();
200      for (Map.Entry<RegionInfo, Result> e : splitParents.entrySet()) {
201        if (this.services.isInMaintenanceMode()) {
202          // Stop cleaning if the master is in maintenance mode
203          if (LOG.isDebugEnabled()) {
204            LOG.debug("In maintenance mode, not cleaning");
205          }
206          break;
207        }
208
209        if (
210          !parentNotCleaned.contains(e.getKey().getEncodedName())
211            && cleanParent(e.getKey(), e.getValue())
212        ) {
213          gcs++;
214        } else {
215          // We could not clean the parent, so it's daughters should not be
216          // cleaned either (HBASE-6160)
217          PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(e.getValue());
218          parentNotCleaned.add(daughters.getFirst().getEncodedName());
219          parentNotCleaned.add(daughters.getSecond().getEncodedName());
220        }
221      }
222      return gcs;
223    } finally {
224      alreadyRunning.set(false);
225    }
226  }
227
228  /**
229   * Scan hbase:meta.
230   * @return Return generated {@link CatalogJanitorReport}
231   */
232  // will be override in tests.
233  protected CatalogJanitorReport scanForReport() throws IOException {
234    ReportMakingVisitor visitor = new ReportMakingVisitor(this.services);
235    // Null tablename means scan all of meta.
236    MetaTableAccessor.scanMetaForTableRegions(this.services.getConnection(), visitor, null);
237    return visitor.getReport();
238  }
239
240  /** Returns Returns last published Report that comes of last successful scan of hbase:meta. */
241  public CatalogJanitorReport getLastReport() {
242    return this.lastReport;
243  }
244
245  /**
246   * If merged region no longer holds reference to the merge regions, archive merge region on hdfs
247   * and perform deleting references in hbase:meta
248   * @return true if we delete references in merged region on hbase:meta and archive the files on
249   *         the file system
250   */
251  static boolean cleanMergeRegion(MasterServices services, final RegionInfo mergedRegion,
252    List<RegionInfo> parents) throws IOException {
253    if (LOG.isDebugEnabled()) {
254      LOG.debug("Cleaning merged region {}", mergedRegion);
255    }
256
257    Pair<Boolean, Boolean> result =
258      checkRegionReferences(services, mergedRegion.getTable(), mergedRegion);
259
260    if (hasNoReferences(result)) {
261      if (LOG.isDebugEnabled()) {
262        LOG.debug(
263          "Deleting parents ({}) from fs; merged child {} no longer holds references", parents
264            .stream().map(r -> RegionInfo.getShortNameToLog(r)).collect(Collectors.joining(", ")),
265          mergedRegion);
266      }
267
268      ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
269      GCMultipleMergedRegionsProcedure mergeRegionProcedure =
270        new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), mergedRegion, parents);
271      pe.submitProcedure(mergeRegionProcedure);
272      if (LOG.isDebugEnabled()) {
273        LOG.debug("Submitted procedure {} for merged region {}", mergeRegionProcedure,
274          mergedRegion);
275      }
276      return true;
277    } else {
278      if (LOG.isDebugEnabled()) {
279        LOG.debug(
280          "Deferring cleanup up of {} parents of merged region {}, because references "
281            + "still exist in merged region or we encountered an exception in checking",
282          parents.size(), mergedRegion.getEncodedName());
283      }
284    }
285
286    return false;
287  }
288
289  /**
290   * Compare HRegionInfos in a way that has split parents sort BEFORE their daughters.
291   */
292  static class SplitParentFirstComparator implements Comparator<RegionInfo> {
293    Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();
294
295    @Override
296    public int compare(RegionInfo left, RegionInfo right) {
297      // This comparator differs from the one RegionInfo in that it sorts
298      // parent before daughters.
299      if (left == null) {
300        return -1;
301      }
302      if (right == null) {
303        return 1;
304      }
305      // Same table name.
306      int result = left.getTable().compareTo(right.getTable());
307      if (result != 0) {
308        return result;
309      }
310      // Compare start keys.
311      result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
312      if (result != 0) {
313        return result;
314      }
315      // Compare end keys, but flip the operands so parent comes first
316      result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey());
317
318      return result;
319    }
320  }
321
322  static boolean cleanParent(MasterServices services, RegionInfo parent, Result rowContent)
323    throws IOException {
324    if (LOG.isDebugEnabled()) {
325      LOG.debug("Cleaning parent region {}", parent);
326    }
327    // Check whether it is a merged region and if it is clean of references.
328    if (CatalogFamilyFormat.hasMergeRegions(rowContent.rawCells())) {
329      // Wait until clean of merge parent regions first
330      if (LOG.isDebugEnabled()) {
331        LOG.debug("Region {} has merge parents, cleaning them first", parent);
332      }
333      return false;
334    }
335    // Run checks on each daughter split.
336    PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(rowContent);
337    Pair<Boolean, Boolean> a =
338      checkRegionReferences(services, parent.getTable(), daughters.getFirst());
339    Pair<Boolean, Boolean> b =
340      checkRegionReferences(services, parent.getTable(), daughters.getSecond());
341    if (hasNoReferences(a) && hasNoReferences(b)) {
342      String daughterA =
343        daughters.getFirst() != null ? daughters.getFirst().getShortNameToLog() : "null";
344      String daughterB =
345        daughters.getSecond() != null ? daughters.getSecond().getShortNameToLog() : "null";
346      if (LOG.isDebugEnabled()) {
347        LOG.debug("Deleting region " + parent.getShortNameToLog() + " because daughters -- "
348          + daughterA + ", " + daughterB + " -- no longer hold references");
349      }
350      ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
351      GCRegionProcedure gcRegionProcedure = new GCRegionProcedure(pe.getEnvironment(), parent);
352      pe.submitProcedure(gcRegionProcedure);
353      if (LOG.isDebugEnabled()) {
354        LOG.debug("Submitted procedure {} for split parent {}", gcRegionProcedure, parent);
355      }
356      return true;
357    } else {
358      if (LOG.isDebugEnabled()) {
359        if (!hasNoReferences(a)) {
360          LOG.debug("Deferring removal of region {} because daughter {} still has references",
361            parent, daughters.getFirst());
362        }
363        if (!hasNoReferences(b)) {
364          LOG.debug("Deferring removal of region {} because daughter {} still has references",
365            parent, daughters.getSecond());
366        }
367      }
368    }
369    return false;
370  }
371
372  /**
373   * If daughters no longer hold reference to the parents, delete the parent.
374   * @param parent     RegionInfo of split offlined parent
375   * @param rowContent Content of <code>parent</code> row in <code>metaRegionName</code>
376   * @return True if we removed <code>parent</code> from meta table and from the filesystem.
377   */
378  private boolean cleanParent(final RegionInfo parent, Result rowContent) throws IOException {
379    return cleanParent(services, parent, rowContent);
380  }
381
382  /**
383   * @param p A pair where the first boolean says whether or not the daughter region directory
384   *          exists in the filesystem and then the second boolean says whether the daughter has
385   *          references to the parent.
386   * @return True the passed <code>p</code> signifies no references.
387   */
388  private static boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
389    return !p.getFirst() || !p.getSecond();
390  }
391
392  /**
393   * Checks if a region still holds references to parent.
394   * @param tableName The table for the region
395   * @param region    The region to check
396   * @return A pair where the first boolean says whether the region directory exists in the
397   *         filesystem and then the second boolean says whether the region has references to a
398   *         parent.
399   */
400  private static Pair<Boolean, Boolean> checkRegionReferences(MasterServices services,
401    TableName tableName, RegionInfo region) throws IOException {
402    if (region == null) {
403      return new Pair<>(Boolean.FALSE, Boolean.FALSE);
404    }
405
406    FileSystem fs = services.getMasterFileSystem().getFileSystem();
407    Path rootdir = services.getMasterFileSystem().getRootDir();
408    Path tabledir = CommonFSUtils.getTableDir(rootdir, tableName);
409    Path regionDir = new Path(tabledir, region.getEncodedName());
410
411    try {
412      if (!CommonFSUtils.isExists(fs, regionDir)) {
413        return new Pair<>(Boolean.FALSE, Boolean.FALSE);
414      }
415    } catch (IOException ioe) {
416      LOG.error("Error trying to determine if region exists, assuming exists and has references",
417        ioe);
418      return new Pair<>(Boolean.TRUE, Boolean.TRUE);
419    }
420
421    TableDescriptor tableDescriptor = services.getTableDescriptors().get(tableName);
422    try {
423      HRegionFileSystem regionFs = HRegionFileSystem
424        .openRegionFromFileSystem(services.getConfiguration(), fs, tabledir, region, true);
425      boolean references = regionFs.hasReferences(tableDescriptor);
426      return new Pair<>(Boolean.TRUE, references);
427    } catch (IOException e) {
428      LOG.error("Error trying to determine if region {} has references, assuming it does",
429        region.getEncodedName(), e);
430      return new Pair<>(Boolean.TRUE, Boolean.TRUE);
431    }
432  }
433
434  private void updateAssignmentManagerMetrics() {
435    services.getAssignmentManager().getAssignmentManagerMetrics()
436      .updateHoles(lastReport.getHoles().size());
437    services.getAssignmentManager().getAssignmentManagerMetrics()
438      .updateOverlaps(lastReport.getOverlaps().size());
439    services.getAssignmentManager().getAssignmentManagerMetrics()
440      .updateUnknownServerRegions(lastReport.getUnknownServers().size());
441    services.getAssignmentManager().getAssignmentManagerMetrics()
442      .updateEmptyRegionInfoRegions(lastReport.getEmptyRegionInfo().size());
443  }
444
445  private static void checkLog4jProperties() {
446    String filename = "log4j.properties";
447    try (final InputStream inStream =
448      CatalogJanitor.class.getClassLoader().getResourceAsStream(filename)) {
449      if (inStream != null) {
450        new Properties().load(inStream);
451      } else {
452        System.out.println("No " + filename + " on classpath; Add one else no logging output!");
453      }
454    } catch (IOException e) {
455      LOG.error("Log4j check failed", e);
456    }
457  }
458
459  /**
460   * For testing against a cluster. Doesn't have a MasterServices context so does not report on good
461   * vs bad servers.
462   */
463  public static void main(String[] args) throws IOException {
464    checkLog4jProperties();
465    ReportMakingVisitor visitor = new ReportMakingVisitor(null);
466    Configuration configuration = HBaseConfiguration.create();
467    configuration.setBoolean("hbase.defaults.for.version.skip", true);
468    try (Connection connection = ConnectionFactory.createConnection(configuration)) {
469      /*
470       * Used to generate an overlap.
471       */
472      Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0."));
473      g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
474      try (Table t = connection.getTable(TableName.META_TABLE_NAME)) {
475        Result r = t.get(g);
476        byte[] row = g.getRow();
477        row[row.length - 2] <<= row[row.length - 2];
478        Put p = new Put(g.getRow());
479        p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
480          r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER));
481        t.put(p);
482      }
483      MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null);
484      CatalogJanitorReport report = visitor.getReport();
485      LOG.info(report != null ? report.toString() : "empty");
486    }
487  }
488}