001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.janitor;
019
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.Comparator;
023import java.util.HashSet;
024import java.util.List;
025import java.util.Map;
026import java.util.Properties;
027import java.util.concurrent.atomic.AtomicBoolean;
028import java.util.stream.Collectors;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Path;
032import org.apache.hadoop.hbase.HBaseConfiguration;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.MetaTableAccessor;
035import org.apache.hadoop.hbase.ScheduledChore;
036import org.apache.hadoop.hbase.TableName;
037import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
038import org.apache.hadoop.hbase.client.Connection;
039import org.apache.hadoop.hbase.client.ConnectionFactory;
040import org.apache.hadoop.hbase.client.Get;
041import org.apache.hadoop.hbase.client.Put;
042import org.apache.hadoop.hbase.client.RegionInfo;
043import org.apache.hadoop.hbase.client.Result;
044import org.apache.hadoop.hbase.client.Table;
045import org.apache.hadoop.hbase.client.TableDescriptor;
046import org.apache.hadoop.hbase.master.MasterServices;
047import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
048import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
049import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
050import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
051import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
052import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
053import org.apache.hadoop.hbase.util.Bytes;
054import org.apache.hadoop.hbase.util.CommonFSUtils;
055import org.apache.hadoop.hbase.util.Pair;
056import org.apache.hadoop.hbase.util.PairOfSameType;
057import org.apache.hadoop.hbase.util.Threads;
058import org.apache.yetus.audience.InterfaceAudience;
059import org.slf4j.Logger;
060import org.slf4j.LoggerFactory;
061
062/**
063 * A janitor for the catalog tables. Scans the <code>hbase:meta</code> catalog table on a period.
064 * Makes a lastReport on state of hbase:meta. Looks for unused regions to garbage collect. Scan of
065 * hbase:meta runs if we are NOT in maintenance mode, if we are NOT shutting down, AND if the
066 * assignmentmanager is loaded. Playing it safe, we will garbage collect no-longer needed region
067 * references only if there are no regions-in-transition (RIT).
068 */
069// TODO: Only works with single hbase:meta region currently. Fix.
070// TODO: Should it start over every time? Could it continue if runs into problem? Only if
071// problem does not mess up 'results'.
072// TODO: Do more by way of 'repair'; see note on unknownServers below.
073@InterfaceAudience.Private
074public class CatalogJanitor extends ScheduledChore {
075
076  private static final Logger LOG = LoggerFactory.getLogger(CatalogJanitor.class.getName());
077
078  private final AtomicBoolean alreadyRunning = new AtomicBoolean(false);
079  private final AtomicBoolean enabled = new AtomicBoolean(true);
080  private final MasterServices services;
081
082  /**
083   * Saved report from last hbase:meta scan to completion. May be stale if having trouble completing
084   * scan. Check its date.
085   */
086  private volatile Report lastReport;
087
088  public CatalogJanitor(final MasterServices services) {
089    super("CatalogJanitor-" + services.getServerName().toShortString(), services,
090      services.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000));
091    this.services = services;
092  }
093
094  @Override
095  protected boolean initialChore() {
096    try {
097      if (getEnabled()) {
098        scan();
099      }
100    } catch (IOException e) {
101      LOG.warn("Failed initial janitorial scan of hbase:meta table", e);
102      return false;
103    }
104    return true;
105  }
106
107  public boolean setEnabled(final boolean enabled) {
108    boolean alreadyEnabled = this.enabled.getAndSet(enabled);
109    // If disabling is requested on an already enabled chore, we could have an active
110    // scan still going on, callers might not be aware of that and do further action thinkng
111    // that no action would be from this chore. In this case, the right action is to wait for
112    // the active scan to complete before exiting this function.
113    if (!enabled && alreadyEnabled) {
114      while (alreadyRunning.get()) {
115        Threads.sleepWithoutInterrupt(100);
116      }
117    }
118    return alreadyEnabled;
119  }
120
121  public boolean getEnabled() {
122    return this.enabled.get();
123  }
124
125  @Override
126  protected void chore() {
127    try {
128      AssignmentManager am = this.services.getAssignmentManager();
129      if (getEnabled() && !this.services.isInMaintenanceMode() &&
130        !this.services.getServerManager().isClusterShutdown() && isMetaLoaded(am)) {
131        scan();
132      } else {
133        LOG.warn("CatalogJanitor is disabled! Enabled=" + getEnabled() + ", maintenanceMode=" +
134          this.services.isInMaintenanceMode() + ", am=" + am + ", metaLoaded=" + isMetaLoaded(am) +
135          ", hasRIT=" + isRIT(am) + " clusterShutDown=" +
136          this.services.getServerManager().isClusterShutdown());
137      }
138    } catch (IOException e) {
139      LOG.warn("Failed janitorial scan of hbase:meta table", e);
140    }
141  }
142
143  private static boolean isMetaLoaded(AssignmentManager am) {
144    return am != null && am.isMetaLoaded();
145  }
146
147  private static boolean isRIT(AssignmentManager am) {
148    return isMetaLoaded(am) && am.hasRegionsInTransition();
149  }
150
151  /**
152   * Run janitorial scan of catalog <code>hbase:meta</code> table looking for garbage to collect.
153   * @return How many items gc'd whether for merge or split. Returns -1 if previous scan is in
154   *         progress.
155   */
156  public int scan() throws IOException {
157    int gcs = 0;
158    try {
159      if (!alreadyRunning.compareAndSet(false, true)) {
160        LOG.debug("CatalogJanitor already running");
161        // -1 indicates previous scan is in progress
162        return -1;
163      }
164      this.lastReport = scanForReport();
165      if (!this.lastReport.isEmpty()) {
166        LOG.warn(this.lastReport.toString());
167      }
168
169      if (isRIT(this.services.getAssignmentManager())) {
170        LOG.warn("Playing-it-safe skipping merge/split gc'ing of regions from hbase:meta while " +
171          "regions-in-transition (RIT)");
172      }
173      Map<RegionInfo, Result> mergedRegions = this.lastReport.mergedRegions;
174      for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
175        if (this.services.isInMaintenanceMode()) {
176          // Stop cleaning if the master is in maintenance mode
177          break;
178        }
179
180        List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells());
181        if (parents != null && cleanMergeRegion(e.getKey(), parents)) {
182          gcs++;
183        }
184      }
185      // Clean split parents
186      Map<RegionInfo, Result> splitParents = this.lastReport.splitParents;
187
188      // Now work on our list of found parents. See if any we can clean up.
189      HashSet<String> parentNotCleaned = new HashSet<>();
190      for (Map.Entry<RegionInfo, Result> e : splitParents.entrySet()) {
191        if (this.services.isInMaintenanceMode()) {
192          // Stop cleaning if the master is in maintenance mode
193          break;
194        }
195
196        if (!parentNotCleaned.contains(e.getKey().getEncodedName()) &&
197          cleanParent(e.getKey(), e.getValue())) {
198          gcs++;
199        } else {
200          // We could not clean the parent, so it's daughters should not be
201          // cleaned either (HBASE-6160)
202          PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(e.getValue());
203          parentNotCleaned.add(daughters.getFirst().getEncodedName());
204          parentNotCleaned.add(daughters.getSecond().getEncodedName());
205        }
206      }
207      return gcs;
208    } finally {
209      alreadyRunning.set(false);
210    }
211  }
212
213  /**
214   * Scan hbase:meta.
215   * @return Return generated {@link Report}
216   */
217  // will be override in tests.
218  protected Report scanForReport() throws IOException {
219    ReportMakingVisitor visitor = new ReportMakingVisitor(this.services);
220    // Null tablename means scan all of meta.
221    MetaTableAccessor.scanMetaForTableRegions(this.services.getConnection(), visitor, null);
222    return visitor.getReport();
223  }
224
225  /**
226   * @return Returns last published Report that comes of last successful scan of hbase:meta.
227   */
228  public Report getLastReport() {
229    return this.lastReport;
230  }
231
232  /**
233   * If merged region no longer holds reference to the merge regions, archive merge region on hdfs
234   * and perform deleting references in hbase:meta
235   * @return true if we delete references in merged region on hbase:meta and archive the files on
236   *         the file system
237   */
238  private boolean cleanMergeRegion(final RegionInfo mergedRegion, List<RegionInfo> parents)
239    throws IOException {
240    FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
241    Path rootdir = this.services.getMasterFileSystem().getRootDir();
242    Path tabledir = CommonFSUtils.getTableDir(rootdir, mergedRegion.getTable());
243    TableDescriptor htd = getDescriptor(mergedRegion.getTable());
244    HRegionFileSystem regionFs = null;
245    try {
246      regionFs = HRegionFileSystem.openRegionFromFileSystem(this.services.getConfiguration(), fs,
247        tabledir, mergedRegion, true);
248    } catch (IOException e) {
249      LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName());
250    }
251    if (regionFs == null || !regionFs.hasReferences(htd)) {
252      LOG.debug(
253        "Deleting parents ({}) from fs; merged child {} no longer holds references", parents
254          .stream().map(r -> RegionInfo.getShortNameToLog(r)).collect(Collectors.joining(", ")),
255        mergedRegion);
256      ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor();
257      pe.submitProcedure(
258        new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), mergedRegion, parents));
259      for (RegionInfo ri : parents) {
260        // The above scheduled GCMultipleMergedRegionsProcedure does the below.
261        // Do we need this?
262        this.services.getAssignmentManager().getRegionStates().deleteRegion(ri);
263        this.services.getServerManager().removeRegion(ri);
264      }
265      return true;
266    }
267    return false;
268  }
269
270  /**
271   * Compare HRegionInfos in a way that has split parents sort BEFORE their daughters.
272   */
273  static class SplitParentFirstComparator implements Comparator<RegionInfo> {
274    Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();
275
276    @Override
277    public int compare(RegionInfo left, RegionInfo right) {
278      // This comparator differs from the one RegionInfo in that it sorts
279      // parent before daughters.
280      if (left == null) {
281        return -1;
282      }
283      if (right == null) {
284        return 1;
285      }
286      // Same table name.
287      int result = left.getTable().compareTo(right.getTable());
288      if (result != 0) {
289        return result;
290      }
291      // Compare start keys.
292      result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
293      if (result != 0) {
294        return result;
295      }
296      // Compare end keys, but flip the operands so parent comes first
297      result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey());
298
299      return result;
300    }
301  }
302
303  static boolean cleanParent(MasterServices services, RegionInfo parent, Result rowContent)
304    throws IOException {
305    // Check whether it is a merged region and if it is clean of references.
306    if (MetaTableAccessor.hasMergeRegions(rowContent.rawCells())) {
307      // Wait until clean of merge parent regions first
308      return false;
309    }
310    // Run checks on each daughter split.
311    PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(rowContent);
312    Pair<Boolean, Boolean> a = checkDaughterInFs(services, parent, daughters.getFirst());
313    Pair<Boolean, Boolean> b = checkDaughterInFs(services, parent, daughters.getSecond());
314    if (hasNoReferences(a) && hasNoReferences(b)) {
315      String daughterA =
316        daughters.getFirst() != null ? daughters.getFirst().getShortNameToLog() : "null";
317      String daughterB =
318        daughters.getSecond() != null ? daughters.getSecond().getShortNameToLog() : "null";
319      LOG.debug("Deleting region " + parent.getShortNameToLog() + " because daughters -- " +
320        daughterA + ", " + daughterB + " -- no longer hold references");
321      ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
322      pe.submitProcedure(new GCRegionProcedure(pe.getEnvironment(), parent));
323      // Remove from in-memory states
324      services.getAssignmentManager().getRegionStates().deleteRegion(parent);
325      services.getServerManager().removeRegion(parent);
326      return true;
327    }
328    return false;
329  }
330
331  /**
332   * If daughters no longer hold reference to the parents, delete the parent.
333   * @param parent RegionInfo of split offlined parent
334   * @param rowContent Content of <code>parent</code> row in <code>metaRegionName</code>
335   * @return True if we removed <code>parent</code> from meta table and from the filesystem.
336   */
337  private boolean cleanParent(final RegionInfo parent, Result rowContent) throws IOException {
338    return cleanParent(services, parent, rowContent);
339  }
340
341  /**
342   * @param p A pair where the first boolean says whether or not the daughter region directory
343   *          exists in the filesystem and then the second boolean says whether the daughter has
344   *          references to the parent.
345   * @return True the passed <code>p</code> signifies no references.
346   */
347  private static boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
348    return !p.getFirst() || !p.getSecond();
349  }
350
351  /**
352   * Checks if a daughter region -- either splitA or splitB -- still holds references to parent.
353   * @param parent Parent region
354   * @param daughter Daughter region
355   * @return A pair where the first boolean says whether or not the daughter region directory exists
356   *         in the filesystem and then the second boolean says whether the daughter has references
357   *         to the parent.
358   */
359  private static Pair<Boolean, Boolean> checkDaughterInFs(MasterServices services,
360    final RegionInfo parent, final RegionInfo daughter) throws IOException {
361    if (daughter == null) {
362      return new Pair<>(Boolean.FALSE, Boolean.FALSE);
363    }
364
365    FileSystem fs = services.getMasterFileSystem().getFileSystem();
366    Path rootdir = services.getMasterFileSystem().getRootDir();
367    Path tabledir = CommonFSUtils.getTableDir(rootdir, daughter.getTable());
368
369    Path daughterRegionDir = new Path(tabledir, daughter.getEncodedName());
370
371    HRegionFileSystem regionFs;
372
373    try {
374      if (!CommonFSUtils.isExists(fs, daughterRegionDir)) {
375        return new Pair<>(Boolean.FALSE, Boolean.FALSE);
376      }
377    } catch (IOException ioe) {
378      LOG.error("Error trying to determine if daughter region exists, " +
379        "assuming exists and has references", ioe);
380      return new Pair<>(Boolean.TRUE, Boolean.TRUE);
381    }
382
383    boolean references = false;
384    TableDescriptor parentDescriptor = services.getTableDescriptors().get(parent.getTable());
385    try {
386      regionFs = HRegionFileSystem.openRegionFromFileSystem(services.getConfiguration(), fs,
387        tabledir, daughter, true);
388
389      for (ColumnFamilyDescriptor family : parentDescriptor.getColumnFamilies()) {
390        references = regionFs.hasReferences(family.getNameAsString());
391        if (references) {
392          break;
393        }
394      }
395    } catch (IOException e) {
396      LOG.error("Error trying to determine referenced files from : " + daughter.getEncodedName() +
397        ", to: " + parent.getEncodedName() + " assuming has references", e);
398      return new Pair<>(Boolean.TRUE, Boolean.TRUE);
399    }
400    return new Pair<>(Boolean.TRUE, references);
401  }
402
403  private TableDescriptor getDescriptor(final TableName tableName) throws IOException {
404    return this.services.getTableDescriptors().get(tableName);
405  }
406
407  private static void checkLog4jProperties() {
408    String filename = "log4j.properties";
409    try {
410      final InputStream inStream =
411        CatalogJanitor.class.getClassLoader().getResourceAsStream(filename);
412      if (inStream != null) {
413        new Properties().load(inStream);
414      } else {
415        System.out.println("No " + filename + " on classpath; Add one else no logging output!");
416      }
417    } catch (IOException e) {
418      LOG.error("Log4j check failed", e);
419    }
420  }
421
422  /**
423   * For testing against a cluster. Doesn't have a MasterServices context so does not report on good
424   * vs bad servers.
425   */
426  public static void main(String[] args) throws IOException {
427    checkLog4jProperties();
428    ReportMakingVisitor visitor = new ReportMakingVisitor(null);
429    Configuration configuration = HBaseConfiguration.create();
430    configuration.setBoolean("hbase.defaults.for.version.skip", true);
431    try (Connection connection = ConnectionFactory.createConnection(configuration)) {
432      /*
433       * Used to generate an overlap.
434       */
435      Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0."));
436      g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
437      try (Table t = connection.getTable(TableName.META_TABLE_NAME)) {
438        Result r = t.get(g);
439        byte[] row = g.getRow();
440        row[row.length - 2] <<= row[row.length - 2];
441        Put p = new Put(g.getRow());
442        p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
443          r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER));
444        t.put(p);
445      }
446      MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null);
447      Report report = visitor.getReport();
448      LOG.info(report != null ? report.toString() : "empty");
449    }
450  }
451}