001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.janitor;
019
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.Comparator;
023import java.util.HashSet;
024import java.util.List;
025import java.util.Map;
026import java.util.Properties;
027import java.util.concurrent.atomic.AtomicBoolean;
028import java.util.stream.Collectors;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Path;
032import org.apache.hadoop.hbase.CatalogFamilyFormat;
033import org.apache.hadoop.hbase.HBaseConfiguration;
034import org.apache.hadoop.hbase.HConstants;
035import org.apache.hadoop.hbase.MetaTableAccessor;
036import org.apache.hadoop.hbase.ScheduledChore;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
039import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
040import org.apache.hadoop.hbase.client.Connection;
041import org.apache.hadoop.hbase.client.ConnectionFactory;
042import org.apache.hadoop.hbase.client.Get;
043import org.apache.hadoop.hbase.client.Put;
044import org.apache.hadoop.hbase.client.RegionInfo;
045import org.apache.hadoop.hbase.client.Result;
046import org.apache.hadoop.hbase.client.Table;
047import org.apache.hadoop.hbase.client.TableDescriptor;
048import org.apache.hadoop.hbase.master.MasterServices;
049import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
050import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
051import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
052import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
053import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
054import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
055import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
056import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
057import org.apache.hadoop.hbase.util.Bytes;
058import org.apache.hadoop.hbase.util.CommonFSUtils;
059import org.apache.hadoop.hbase.util.Pair;
060import org.apache.hadoop.hbase.util.PairOfSameType;
061import org.apache.hadoop.hbase.util.Threads;
062import org.apache.yetus.audience.InterfaceAudience;
063import org.slf4j.Logger;
064import org.slf4j.LoggerFactory;
065
066/**
067 * A janitor for the catalog tables. Scans the <code>hbase:meta</code> catalog table on a period.
068 * Makes a lastReport on state of hbase:meta. Looks for unused regions to garbage collect. Scan of
069 * hbase:meta runs if we are NOT in maintenance mode, if we are NOT shutting down, AND if the
070 * assignmentmanager is loaded. Playing it safe, we will garbage collect no-longer needed region
071 * references only if there are no regions-in-transition (RIT).
072 */
073// TODO: Only works with single hbase:meta region currently. Fix.
074// TODO: Should it start over every time? Could it continue if runs into problem? Only if
075// problem does not mess up 'results'.
076// TODO: Do more by way of 'repair'; see note on unknownServers below.
077@InterfaceAudience.Private
078public class CatalogJanitor extends ScheduledChore {
079
080  public static final int DEFAULT_HBASE_CATALOGJANITOR_INTERVAL = 300 * 1000;
081
082  private static final Logger LOG = LoggerFactory.getLogger(CatalogJanitor.class.getName());
083
084  private final AtomicBoolean alreadyRunning = new AtomicBoolean(false);
085  private final AtomicBoolean enabled = new AtomicBoolean(true);
086  private final MasterServices services;
087
088  /**
089   * Saved report from last hbase:meta scan to completion. May be stale if having trouble completing
090   * scan. Check its date.
091   */
092  private volatile CatalogJanitorReport lastReport;
093
094  public CatalogJanitor(final MasterServices services) {
095    super("CatalogJanitor-" + services.getServerName().toShortString(), services,
096      services.getConfiguration().getInt("hbase.catalogjanitor.interval",
097        DEFAULT_HBASE_CATALOGJANITOR_INTERVAL));
098    this.services = services;
099  }
100
101  @Override
102  protected boolean initialChore() {
103    try {
104      if (getEnabled()) {
105        scan();
106      }
107    } catch (IOException e) {
108      LOG.warn("Failed initial janitorial scan of hbase:meta table", e);
109      return false;
110    }
111    return true;
112  }
113
114  public boolean setEnabled(final boolean enabled) {
115    boolean alreadyEnabled = this.enabled.getAndSet(enabled);
116    // If disabling is requested on an already enabled chore, we could have an active
117    // scan still going on, callers might not be aware of that and do further action thinkng
118    // that no action would be from this chore. In this case, the right action is to wait for
119    // the active scan to complete before exiting this function.
120    if (!enabled && alreadyEnabled) {
121      while (alreadyRunning.get()) {
122        Threads.sleepWithoutInterrupt(100);
123      }
124    }
125    return alreadyEnabled;
126  }
127
128  public boolean getEnabled() {
129    return this.enabled.get();
130  }
131
132  @Override
133  protected void chore() {
134    try {
135      AssignmentManager am = this.services.getAssignmentManager();
136      if (
137        getEnabled() && !this.services.isInMaintenanceMode()
138          && !this.services.getServerManager().isClusterShutdown() && isMetaLoaded(am)
139      ) {
140        scan();
141      } else {
142        LOG.warn("CatalogJanitor is disabled! Enabled=" + getEnabled() + ", maintenanceMode="
143          + this.services.isInMaintenanceMode() + ", am=" + am + ", metaLoaded=" + isMetaLoaded(am)
144          + ", hasRIT=" + isRIT(am) + " clusterShutDown="
145          + this.services.getServerManager().isClusterShutdown());
146      }
147    } catch (IOException e) {
148      LOG.warn("Failed janitorial scan of hbase:meta table", e);
149    }
150  }
151
152  private static boolean isMetaLoaded(AssignmentManager am) {
153    return am != null && am.isMetaLoaded();
154  }
155
156  private static boolean isRIT(AssignmentManager am) {
157    return isMetaLoaded(am) && am.hasRegionsInTransition();
158  }
159
160  /**
161   * Run janitorial scan of catalog <code>hbase:meta</code> table looking for garbage to collect.
162   * @return How many items gc'd whether for merge or split. Returns -1 if previous scan is in
163   *         progress.
164   */
165  public int scan() throws IOException {
166    int gcs = 0;
167    try {
168      if (!alreadyRunning.compareAndSet(false, true)) {
169        if (LOG.isDebugEnabled()) {
170          LOG.debug("CatalogJanitor already running");
171        }
172        // -1 indicates previous scan is in progress
173        return -1;
174      }
175      this.lastReport = scanForReport();
176      if (!this.lastReport.isEmpty()) {
177        LOG.warn(this.lastReport.toString());
178      } else {
179        if (LOG.isDebugEnabled()) {
180          LOG.debug(this.lastReport.toString());
181        }
182      }
183
184      updateAssignmentManagerMetrics();
185
186      Map<RegionInfo, Result> mergedRegions = this.lastReport.mergedRegions;
187      for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
188        if (this.services.isInMaintenanceMode()) {
189          // Stop cleaning if the master is in maintenance mode
190          LOG.debug("In maintenance mode, not cleaning");
191          break;
192        }
193
194        List<RegionInfo> parents = CatalogFamilyFormat.getMergeRegions(e.getValue().rawCells());
195        if (parents != null && cleanMergeRegion(this.services, e.getKey(), parents)) {
196          gcs++;
197        }
198      }
199      // Clean split parents
200      Map<RegionInfo, Result> splitParents = this.lastReport.splitParents;
201
202      // Now work on our list of found parents. See if any we can clean up.
203      HashSet<String> parentNotCleaned = new HashSet<>();
204      for (Map.Entry<RegionInfo, Result> e : splitParents.entrySet()) {
205        if (this.services.isInMaintenanceMode()) {
206          // Stop cleaning if the master is in maintenance mode
207          if (LOG.isDebugEnabled()) {
208            LOG.debug("In maintenance mode, not cleaning");
209          }
210          break;
211        }
212
213        if (
214          !parentNotCleaned.contains(e.getKey().getEncodedName())
215            && cleanParent(e.getKey(), e.getValue())
216        ) {
217          gcs++;
218        } else {
219          // We could not clean the parent, so it's daughters should not be
220          // cleaned either (HBASE-6160)
221          PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(e.getValue());
222          parentNotCleaned.add(daughters.getFirst().getEncodedName());
223          parentNotCleaned.add(daughters.getSecond().getEncodedName());
224        }
225      }
226      return gcs;
227    } finally {
228      alreadyRunning.set(false);
229    }
230  }
231
232  /**
233   * Scan hbase:meta.
234   * @return Return generated {@link CatalogJanitorReport}
235   */
236  // will be override in tests.
237  protected CatalogJanitorReport scanForReport() throws IOException {
238    ReportMakingVisitor visitor = new ReportMakingVisitor(this.services);
239    // Null tablename means scan all of meta.
240    MetaTableAccessor.scanMetaForTableRegions(this.services.getConnection(), visitor, null);
241    return visitor.getReport();
242  }
243
244  /** Returns Returns last published Report that comes of last successful scan of hbase:meta. */
245  public CatalogJanitorReport getLastReport() {
246    return this.lastReport;
247  }
248
249  /**
250   * If merged region no longer holds reference to the merge regions, archive merge region on hdfs
251   * and perform deleting references in hbase:meta
252   * @return true if we delete references in merged region on hbase:meta and archive the files on
253   *         the file system
254   */
255  static boolean cleanMergeRegion(MasterServices services, final RegionInfo mergedRegion,
256    List<RegionInfo> parents) throws IOException {
257    if (LOG.isDebugEnabled()) {
258      LOG.debug("Cleaning merged region {}", mergedRegion);
259    }
260
261    Pair<Boolean, Boolean> result =
262      checkRegionReferences(services, mergedRegion.getTable(), mergedRegion);
263
264    if (hasNoReferences(result)) {
265      if (LOG.isDebugEnabled()) {
266        LOG.debug(
267          "Deleting parents ({}) from fs; merged child {} no longer holds references", parents
268            .stream().map(r -> RegionInfo.getShortNameToLog(r)).collect(Collectors.joining(", ")),
269          mergedRegion);
270      }
271
272      ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
273      GCMultipleMergedRegionsProcedure mergeRegionProcedure =
274        new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), mergedRegion, parents);
275      pe.submitProcedure(mergeRegionProcedure);
276      if (LOG.isDebugEnabled()) {
277        LOG.debug("Submitted procedure {} for merged region {}", mergeRegionProcedure,
278          mergedRegion);
279      }
280      return true;
281    } else {
282      if (LOG.isDebugEnabled()) {
283        LOG.debug(
284          "Deferring cleanup up of {} parents of merged region {}, because references "
285            + "still exist in merged region or we encountered an exception in checking",
286          parents.size(), mergedRegion.getEncodedName());
287      }
288    }
289
290    return false;
291  }
292
293  /**
294   * Compare HRegionInfos in a way that has split parents sort BEFORE their daughters.
295   */
296  static class SplitParentFirstComparator implements Comparator<RegionInfo> {
297    Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();
298
299    @Override
300    public int compare(RegionInfo left, RegionInfo right) {
301      // This comparator differs from the one RegionInfo in that it sorts
302      // parent before daughters.
303      if (left == null) {
304        return -1;
305      }
306      if (right == null) {
307        return 1;
308      }
309      // Same table name.
310      int result = left.getTable().compareTo(right.getTable());
311      if (result != 0) {
312        return result;
313      }
314      // Compare start keys.
315      result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
316      if (result != 0) {
317        return result;
318      }
319      // Compare end keys, but flip the operands so parent comes first
320      result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey());
321
322      return result;
323    }
324  }
325
326  static boolean cleanParent(MasterServices services, RegionInfo parent, Result rowContent)
327    throws IOException {
328    if (LOG.isDebugEnabled()) {
329      LOG.debug("Cleaning parent region {}", parent);
330    }
331    // Check whether it is a merged region and if it is clean of references.
332    if (CatalogFamilyFormat.hasMergeRegions(rowContent.rawCells())) {
333      // Wait until clean of merge parent regions first
334      if (LOG.isDebugEnabled()) {
335        LOG.debug("Region {} has merge parents, cleaning them first", parent);
336      }
337      return false;
338    }
339    // Run checks on each daughter split.
340    PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(rowContent);
341    Pair<Boolean, Boolean> a =
342      checkRegionReferences(services, parent.getTable(), daughters.getFirst());
343    Pair<Boolean, Boolean> b =
344      checkRegionReferences(services, parent.getTable(), daughters.getSecond());
345    if (hasNoReferences(a) && hasNoReferences(b)) {
346      String daughterA =
347        daughters.getFirst() != null ? daughters.getFirst().getShortNameToLog() : "null";
348      String daughterB =
349        daughters.getSecond() != null ? daughters.getSecond().getShortNameToLog() : "null";
350      if (LOG.isDebugEnabled()) {
351        LOG.debug("Deleting region " + parent.getShortNameToLog() + " because daughters -- "
352          + daughterA + ", " + daughterB + " -- no longer hold references");
353      }
354      ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
355      GCRegionProcedure gcRegionProcedure = new GCRegionProcedure(pe.getEnvironment(), parent);
356      pe.submitProcedure(gcRegionProcedure);
357      if (LOG.isDebugEnabled()) {
358        LOG.debug("Submitted procedure {} for split parent {}", gcRegionProcedure, parent);
359      }
360      return true;
361    } else {
362      if (LOG.isDebugEnabled()) {
363        if (!hasNoReferences(a)) {
364          LOG.debug("Deferring removal of region {} because daughter {} still has references",
365            parent, daughters.getFirst());
366        }
367        if (!hasNoReferences(b)) {
368          LOG.debug("Deferring removal of region {} because daughter {} still has references",
369            parent, daughters.getSecond());
370        }
371      }
372    }
373    return false;
374  }
375
376  /**
377   * If daughters no longer hold reference to the parents, delete the parent.
378   * @param parent     RegionInfo of split offlined parent
379   * @param rowContent Content of <code>parent</code> row in <code>metaRegionName</code>
380   * @return True if we removed <code>parent</code> from meta table and from the filesystem.
381   */
382  private boolean cleanParent(final RegionInfo parent, Result rowContent) throws IOException {
383    return cleanParent(services, parent, rowContent);
384  }
385
386  /**
387   * @param p A pair where the first boolean says whether or not the daughter region directory
388   *          exists in the filesystem and then the second boolean says whether the daughter has
389   *          references to the parent.
390   * @return True the passed <code>p</code> signifies no references.
391   */
392  private static boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
393    return !p.getFirst() || !p.getSecond();
394  }
395
396  /**
397   * Checks if a region still holds references to parent.
398   * @param tableName The table for the region
399   * @param region    The region to check
400   * @return A pair where the first boolean says whether the region directory exists in the
401   *         filesystem and then the second boolean says whether the region has references to a
402   *         parent.
403   */
404  private static Pair<Boolean, Boolean> checkRegionReferences(MasterServices services,
405    TableName tableName, RegionInfo region) throws IOException {
406    if (region == null) {
407      return new Pair<>(Boolean.FALSE, Boolean.FALSE);
408    }
409
410    FileSystem fs = services.getMasterFileSystem().getFileSystem();
411    Path rootdir = services.getMasterFileSystem().getRootDir();
412    Path tabledir = CommonFSUtils.getTableDir(rootdir, tableName);
413    Path regionDir = new Path(tabledir, region.getEncodedName());
414
415    try {
416      if (!CommonFSUtils.isExists(fs, regionDir)) {
417        return new Pair<>(Boolean.FALSE, Boolean.FALSE);
418      }
419    } catch (IOException ioe) {
420      LOG.error("Error trying to determine if region exists, assuming exists and has references",
421        ioe);
422      return new Pair<>(Boolean.TRUE, Boolean.TRUE);
423    }
424
425    TableDescriptor tableDescriptor = services.getTableDescriptors().get(tableName);
426    try {
427      HRegionFileSystem regionFs = HRegionFileSystem
428        .openRegionFromFileSystem(services.getConfiguration(), fs, tabledir, region, true);
429      ColumnFamilyDescriptor[] families = tableDescriptor.getColumnFamilies();
430      boolean references = false;
431      for (ColumnFamilyDescriptor cfd : families) {
432        StoreFileTracker sft = StoreFileTrackerFactory.create(services.getConfiguration(),
433          tableDescriptor, ColumnFamilyDescriptorBuilder.of(cfd.getNameAsString()), regionFs);
434        references = references || sft.hasReferences();
435        if (references) {
436          break;
437        }
438      }
439      return new Pair<>(Boolean.TRUE, references);
440    } catch (IOException e) {
441      LOG.error("Error trying to determine if region {} has references, assuming it does",
442        region.getEncodedName(), e);
443      return new Pair<>(Boolean.TRUE, Boolean.TRUE);
444    }
445  }
446
447  private void updateAssignmentManagerMetrics() {
448    services.getAssignmentManager().getAssignmentManagerMetrics()
449      .updateHoles(lastReport.getHoles().size());
450    services.getAssignmentManager().getAssignmentManagerMetrics()
451      .updateOverlaps(lastReport.getOverlaps().size());
452    services.getAssignmentManager().getAssignmentManagerMetrics()
453      .updateUnknownServerRegions(lastReport.getUnknownServers().size());
454    services.getAssignmentManager().getAssignmentManagerMetrics()
455      .updateEmptyRegionInfoRegions(lastReport.getEmptyRegionInfo().size());
456  }
457
458  private static void checkLog4jProperties() {
459    String filename = "log4j.properties";
460    try (final InputStream inStream =
461      CatalogJanitor.class.getClassLoader().getResourceAsStream(filename)) {
462      if (inStream != null) {
463        new Properties().load(inStream);
464      } else {
465        System.out.println("No " + filename + " on classpath; Add one else no logging output!");
466      }
467    } catch (IOException e) {
468      LOG.error("Log4j check failed", e);
469    }
470  }
471
472  /**
473   * For testing against a cluster. Doesn't have a MasterServices context so does not report on good
474   * vs bad servers.
475   */
476  public static void main(String[] args) throws IOException {
477    checkLog4jProperties();
478    ReportMakingVisitor visitor = new ReportMakingVisitor(null);
479    Configuration configuration = HBaseConfiguration.create();
480    configuration.setBoolean("hbase.defaults.for.version.skip", true);
481    try (Connection connection = ConnectionFactory.createConnection(configuration)) {
482      /*
483       * Used to generate an overlap.
484       */
485      Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0."));
486      g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
487      try (Table t = connection.getTable(TableName.META_TABLE_NAME)) {
488        Result r = t.get(g);
489        byte[] row = g.getRow();
490        row[row.length - 2] <<= row[row.length - 2];
491        Put p = new Put(g.getRow());
492        p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
493          r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER));
494        t.put(p);
495      }
496      MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null);
497      CatalogJanitorReport report = visitor.getReport();
498      LOG.info(report != null ? report.toString() : "empty");
499    }
500  }
501}