001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.io.Closeable;
021import java.io.FileNotFoundException;
022import java.io.IOException;
023import java.io.InterruptedIOException;
024import java.io.PrintWriter;
025import java.io.StringWriter;
026import java.net.InetAddress;
027import java.net.URI;
028import java.util.ArrayList;
029import java.util.Collection;
030import java.util.Collections;
031import java.util.Comparator;
032import java.util.EnumSet;
033import java.util.HashMap;
034import java.util.HashSet;
035import java.util.Iterator;
036import java.util.List;
037import java.util.Locale;
038import java.util.Map;
039import java.util.Map.Entry;
040import java.util.Objects;
041import java.util.Optional;
042import java.util.Set;
043import java.util.SortedMap;
044import java.util.TreeMap;
045import java.util.Vector;
046import java.util.concurrent.Callable;
047import java.util.concurrent.ConcurrentSkipListMap;
048import java.util.concurrent.ExecutionException;
049import java.util.concurrent.ExecutorService;
050import java.util.concurrent.Executors;
051import java.util.concurrent.Future;
052import java.util.concurrent.FutureTask;
053import java.util.concurrent.ScheduledThreadPoolExecutor;
054import java.util.concurrent.TimeUnit;
055import java.util.concurrent.TimeoutException;
056import java.util.concurrent.atomic.AtomicBoolean;
057import java.util.concurrent.atomic.AtomicInteger;
058import java.util.stream.Collectors;
059import org.apache.commons.io.IOUtils;
060import org.apache.commons.lang3.StringUtils;
061import org.apache.hadoop.conf.Configuration;
062import org.apache.hadoop.conf.Configured;
063import org.apache.hadoop.fs.FSDataOutputStream;
064import org.apache.hadoop.fs.FileStatus;
065import org.apache.hadoop.fs.FileSystem;
066import org.apache.hadoop.fs.Path;
067import org.apache.hadoop.fs.permission.FsAction;
068import org.apache.hadoop.fs.permission.FsPermission;
069import org.apache.hadoop.hbase.Abortable;
070import org.apache.hadoop.hbase.CatalogFamilyFormat;
071import org.apache.hadoop.hbase.Cell;
072import org.apache.hadoop.hbase.CellUtil;
073import org.apache.hadoop.hbase.ClientMetaTableAccessor;
074import org.apache.hadoop.hbase.ClusterMetrics;
075import org.apache.hadoop.hbase.ClusterMetrics.Option;
076import org.apache.hadoop.hbase.HBaseConfiguration;
077import org.apache.hadoop.hbase.HBaseInterfaceAudience;
078import org.apache.hadoop.hbase.HConstants;
079import org.apache.hadoop.hbase.HRegionLocation;
080import org.apache.hadoop.hbase.KeyValue;
081import org.apache.hadoop.hbase.MasterNotRunningException;
082import org.apache.hadoop.hbase.MetaTableAccessor;
083import org.apache.hadoop.hbase.RegionLocations;
084import org.apache.hadoop.hbase.ServerName;
085import org.apache.hadoop.hbase.TableName;
086import org.apache.hadoop.hbase.TableNotFoundException;
087import org.apache.hadoop.hbase.ZooKeeperConnectionException;
088import org.apache.hadoop.hbase.client.Admin;
089import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
090import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
091import org.apache.hadoop.hbase.client.Connection;
092import org.apache.hadoop.hbase.client.ConnectionFactory;
093import org.apache.hadoop.hbase.client.Delete;
094import org.apache.hadoop.hbase.client.Get;
095import org.apache.hadoop.hbase.client.Put;
096import org.apache.hadoop.hbase.client.RegionInfo;
097import org.apache.hadoop.hbase.client.RegionInfoBuilder;
098import org.apache.hadoop.hbase.client.RegionLocator;
099import org.apache.hadoop.hbase.client.RegionReplicaUtil;
100import org.apache.hadoop.hbase.client.Result;
101import org.apache.hadoop.hbase.client.ResultScanner;
102import org.apache.hadoop.hbase.client.RowMutations;
103import org.apache.hadoop.hbase.client.Scan;
104import org.apache.hadoop.hbase.client.Table;
105import org.apache.hadoop.hbase.client.TableDescriptor;
106import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
107import org.apache.hadoop.hbase.client.TableState;
108import org.apache.hadoop.hbase.io.FileLink;
109import org.apache.hadoop.hbase.io.HFileLink;
110import org.apache.hadoop.hbase.io.hfile.CacheConfig;
111import org.apache.hadoop.hbase.io.hfile.HFile;
112import org.apache.hadoop.hbase.master.RegionState;
113import org.apache.hadoop.hbase.regionserver.HRegion;
114import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
115import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
116import org.apache.hadoop.hbase.replication.ReplicationException;
117import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
118import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
119import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
120import org.apache.hadoop.hbase.security.UserProvider;
121import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
122import org.apache.hadoop.hbase.util.HbckErrorReporter.ERROR_CODE;
123import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
124import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
125import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
126import org.apache.hadoop.hbase.wal.WALSplitUtil;
127import org.apache.hadoop.hbase.zookeeper.ZKUtil;
128import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
129import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
130import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
131import org.apache.hadoop.ipc.RemoteException;
132import org.apache.hadoop.security.AccessControlException;
133import org.apache.hadoop.security.UserGroupInformation;
134import org.apache.hadoop.util.ReflectionUtils;
135import org.apache.hadoop.util.Tool;
136import org.apache.hadoop.util.ToolRunner;
137import org.apache.yetus.audience.InterfaceAudience;
138import org.apache.yetus.audience.InterfaceStability;
139import org.apache.zookeeper.KeeperException;
140import org.slf4j.Logger;
141import org.slf4j.LoggerFactory;
142
143import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
144import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
145import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
146import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
147import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
148import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
149
150/**
151 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and table integrity
152 * problems in a corrupted HBase. This tool was written for hbase-1.x. It does not work with
153 * hbase-2.x; it can read state but is not allowed to change state; i.e. effect 'repair'. Even
154 * though it can 'read' state, given how so much has changed in how hbase1 and hbase2 operate, it
155 * will often misread. See hbck2 (HBASE-19121) for a hbck tool for hbase2. This class is deprecated.
156 * <p>
157 * Region consistency checks verify that hbase:meta, region deployment on region servers and the
158 * state of data in HDFS (.regioninfo files) all are in accordance.
159 * <p>
160 * Table integrity checks verify that all possible row keys resolve to exactly one region of a
161 * table. This means there are no individual degenerate or backwards regions; no holes between
162 * regions; and that there are no overlapping regions.
163 * <p>
164 * The general repair strategy works in two phases:
165 * <ol>
166 * <li>Repair Table Integrity on HDFS. (merge or fabricate regions)
167 * <li>Repair Region Consistency with hbase:meta and assignments
168 * </ol>
169 * <p>
170 * For table integrity repairs, the tables' region directories are scanned for .regioninfo files.
171 * Each table's integrity is then verified. If there are any orphan regions (regions with no
172 * .regioninfo files) or holes, new regions are fabricated. Backwards regions are sidelined as well
173 * as empty degenerate (endkey==startkey) regions. If there are any overlapping regions, a new
174 * region is created and all data is merged into the new region.
175 * <p>
176 * Table integrity repairs deal solely with HDFS and could potentially be done offline -- the hbase
177 * region servers or master do not need to be running. This phase can eventually be used to
178 * completely reconstruct the hbase:meta table in an offline fashion.
179 * <p>
180 * Region consistency requires three conditions -- 1) valid .regioninfo file present in an HDFS
181 * region dir, 2) valid row with .regioninfo data in META, and 3) a region is deployed only at the
182 * regionserver that was assigned to with proper state in the master.
183 * <p>
184 * Region consistency repairs require hbase to be online so that hbck can contact the HBase master
185 * and region servers. The hbck#connect() method must first be called successfully. Much of the
186 * region consistency information is transient and less risky to repair.
187 * <p>
188 * If hbck is run from the command line, there are a handful of arguments that can be used to limit
189 * the kinds of repairs hbck will do. See the code in {@link #printUsageAndExit()} for more details.
190 * @deprecated For removal in hbase-4.0.0. Use HBCK2 instead.
191 */
192@Deprecated
193@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
194@InterfaceStability.Evolving
195public class HBaseFsck extends Configured implements Closeable {
196  public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
197  public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
198  private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
199  private static boolean rsSupportsOffline = true;
200  private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
201  private static final int DEFAULT_MAX_MERGE = 5;
202
203  /**
204   * Here is where hbase-1.x used to default the lock for hbck1. It puts in place a lock when it
205   * goes to write/make changes.
206   */
207  @InterfaceAudience.Private
208  public static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
209  private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
210  private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
211  private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
212  // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
213  // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
214  // AlreadyBeingCreatedException which is implies timeout on this operations up to
215  // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
216  private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
217  private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
218  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
219  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
220
221  /**********************
222   * Internal resources
223   **********************/
224  private static final Logger LOG = LoggerFactory.getLogger(HBaseFsck.class.getName());
225  private ClusterMetrics status;
226  private Connection connection;
227  private Admin admin;
228  private Table meta;
229  // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
230  protected ExecutorService executor;
231  private long startMillis = EnvironmentEdgeManager.currentTime();
232  private HFileCorruptionChecker hfcc;
233  private int retcode = 0;
234  private Path HBCK_LOCK_PATH;
235  private FSDataOutputStream hbckOutFd;
236  // This lock is to prevent cleanup of balancer resources twice between
237  // ShutdownHook and the main code. We cleanup only if the connect() is
238  // successful
239  private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
240
241  // Unsupported options in HBase 2.0+
242  private static final Set<String> unsupportedOptionsInV2 = Sets.newHashSet("-fix",
243    "-fixAssignments", "-fixMeta", "-fixHdfsHoles", "-fixHdfsOrphans", "-fixTableOrphans",
244    "-fixHdfsOverlaps", "-sidelineBigOverlaps", "-fixSplitParents", "-removeParents",
245    "-fixEmptyMetaCells", "-repair", "-repairHoles", "-maxOverlapsToSideline", "-maxMerge");
246
247  /***********
248   * Options
249   ***********/
250  private static boolean details = false; // do we display the full report
251  private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
252  private static boolean forceExclusive = false; // only this hbck can modify HBase
253  private boolean fixAssignments = false; // fix assignment errors?
254  private boolean fixMeta = false; // fix meta errors?
255  private boolean checkHdfs = true; // load and check fs consistency?
256  private boolean fixHdfsHoles = false; // fix fs holes?
257  private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
258  private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
259  private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
260  private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
261  private boolean fixSplitParents = false; // fix lingering split parents
262  private boolean removeParents = false; // remove split parents
263  private boolean fixReferenceFiles = false; // fix lingering reference store file
264  private boolean fixHFileLinks = false; // fix lingering HFileLinks
265  private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
266  private boolean fixReplication = false; // fix undeleted replication queues for removed peer
267  private boolean cleanReplicationBarrier = false; // clean replication barriers of a table
268  private boolean fixAny = false; // Set to true if any of the fix is required.
269
270  // limit checking/fixes to listed tables, if empty attempt to check/fix all
271  // hbase:meta are always checked
272  private Set<TableName> tablesIncluded = new HashSet<>();
273  private TableName cleanReplicationBarrierTable;
274  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
275  // maximum number of overlapping regions to sideline
276  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
277  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
278  private Path sidelineDir = null;
279
280  private boolean rerun = false; // if we tried to fix something, rerun hbck
281  private static boolean summary = false; // if we want to print less output
282  private boolean checkMetaOnly = false;
283  private boolean checkRegionBoundaries = false;
284  private boolean ignorePreCheckPermission = false; // if pre-check permission
285
286  /*********
287   * State
288   *********/
289  final private HbckErrorReporter errors;
290  int fixes = 0;
291
292  /**
293   * This map contains the state of all hbck items. It maps from encoded region name to
294   * HbckRegionInfo structure. The information contained in HbckRegionInfo is used to detect and
295   * correct consistency (hdfs/meta/deployment) problems.
296   */
297  private TreeMap<String, HbckRegionInfo> regionInfoMap = new TreeMap<>();
298  // Empty regioninfo qualifiers in hbase:meta
299  private Set<Result> emptyRegionInfoQualifiers = new HashSet<>();
300
301  /**
302   * This map from Tablename -> TableInfo contains the structures necessary to detect table
303   * consistency problems (holes, dupes, overlaps). It is sorted to prevent dupes. If tablesIncluded
304   * is empty, this map contains all tables. Otherwise, it contains only meta tables and tables in
305   * tablesIncluded, unless checkMetaOnly is specified, in which case, it contains only the meta
306   * table
307   */
308  private SortedMap<TableName, HbckTableInfo> tablesInfo = new ConcurrentSkipListMap<>();
309
310  /**
311   * When initially looking at HDFS, we attempt to find any orphaned data.
312   */
313  private List<HbckRegionInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<>());
314
315  private Map<TableName, Set<String>> orphanTableDirs = new HashMap<>();
316  private Map<TableName, TableState> tableStates = new HashMap<>();
317  private final RetryCounterFactory lockFileRetryCounterFactory;
318  private final RetryCounterFactory createZNodeRetryCounterFactory;
319
320  private Map<TableName, Set<String>> skippedRegions = new HashMap<>();
321
322  private ZKWatcher zkw = null;
323  private String hbckEphemeralNodePath = null;
324  private boolean hbckZodeCreated = false;
325
326  /**
327   * Constructor
328   * @param conf Configuration object
329   * @throws MasterNotRunningException    if the master is not running
330   * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
331   */
332  public HBaseFsck(Configuration conf) throws IOException, ClassNotFoundException {
333    this(conf, createThreadPool(conf));
334  }
335
336  private static ExecutorService createThreadPool(Configuration conf) {
337    int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
338    return new ScheduledThreadPoolExecutor(numThreads,
339      new ThreadFactoryBuilder().setNameFormat("hbasefsck-pool-%d").setDaemon(true)
340        .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build());
341  }
342
343  /**
344   * Constructor Configuration object if the master is not running if unable to connect to ZooKeeper
345   */
346  public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
347    ZooKeeperConnectionException, IOException, ClassNotFoundException {
348    super(conf);
349    errors = getErrorReporter(getConf());
350    this.executor = exec;
351    lockFileRetryCounterFactory = createLockRetryCounterFactory(getConf());
352    createZNodeRetryCounterFactory = createZnodeRetryCounterFactory(getConf());
353    zkw = createZooKeeperWatcher();
354  }
355
356  /** Returns A retry counter factory configured for retrying lock file creation. */
357  public static RetryCounterFactory createLockRetryCounterFactory(Configuration conf) {
358    return new RetryCounterFactory(
359      conf.getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
360      conf.getInt("hbase.hbck.lockfile.attempt.sleep.interval",
361        DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
362      conf.getInt("hbase.hbck.lockfile.attempt.maxsleeptime",
363        DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
364  }
365
366  /** Returns A retry counter factory configured for retrying znode creation. */
367  private static RetryCounterFactory createZnodeRetryCounterFactory(Configuration conf) {
368    return new RetryCounterFactory(
369      conf.getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
370      conf.getInt("hbase.hbck.createznode.attempt.sleep.interval",
371        DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
372      conf.getInt("hbase.hbck.createznode.attempt.maxsleeptime",
373        DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
374  }
375
376  /** Returns Return the tmp dir this tool writes too. */
377  @InterfaceAudience.Private
378  public static Path getTmpDir(Configuration conf) throws IOException {
379    return new Path(CommonFSUtils.getRootDir(conf), HConstants.HBASE_TEMP_DIRECTORY);
380  }
381
382  private static class FileLockCallable implements Callable<FSDataOutputStream> {
383    RetryCounter retryCounter;
384    private final Configuration conf;
385    private Path hbckLockPath = null;
386
387    public FileLockCallable(Configuration conf, RetryCounter retryCounter) {
388      this.retryCounter = retryCounter;
389      this.conf = conf;
390    }
391
392    /** Returns Will be <code>null</code> unless you call {@link #call()} */
393    Path getHbckLockPath() {
394      return this.hbckLockPath;
395    }
396
397    @Override
398    public FSDataOutputStream call() throws IOException {
399      try {
400        FileSystem fs = CommonFSUtils.getCurrentFileSystem(this.conf);
401        FsPermission defaultPerms =
402          CommonFSUtils.getFilePermissions(fs, this.conf, HConstants.DATA_FILE_UMASK_KEY);
403        Path tmpDir = getTmpDir(conf);
404        this.hbckLockPath = new Path(tmpDir, HBCK_LOCK_FILE);
405        fs.mkdirs(tmpDir);
406        final FSDataOutputStream out = createFileWithRetries(fs, this.hbckLockPath, defaultPerms);
407        out.writeBytes(InetAddress.getLocalHost().toString());
408        // Add a note into the file we write on why hbase2 is writing out an hbck1 lock file.
409        out.writeBytes(" Written by an hbase-2.x Master to block an "
410          + "attempt by an hbase-1.x HBCK tool making modification to state. "
411          + "See 'HBCK must match HBase server version' in the hbase refguide.");
412        out.flush();
413        return out;
414      } catch (RemoteException e) {
415        if (AlreadyBeingCreatedException.class.getName().equals(e.getClassName())) {
416          return null;
417        } else {
418          throw e;
419        }
420      }
421    }
422
423    private FSDataOutputStream createFileWithRetries(final FileSystem fs,
424      final Path hbckLockFilePath, final FsPermission defaultPerms) throws IOException {
425      IOException exception = null;
426      do {
427        try {
428          return CommonFSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
429        } catch (IOException ioe) {
430          LOG.info("Failed to create lock file " + hbckLockFilePath.getName() + ", try="
431            + (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
432          LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(), ioe);
433          try {
434            exception = ioe;
435            retryCounter.sleepUntilNextRetry();
436          } catch (InterruptedException ie) {
437            throw (InterruptedIOException) new InterruptedIOException(
438              "Can't create lock file " + hbckLockFilePath.getName()).initCause(ie);
439          }
440        }
441      } while (retryCounter.shouldRetry());
442
443      throw exception;
444    }
445  }
446
447  /**
448   * This method maintains a lock using a file. If the creation fails we return null
449   * @return FSDataOutputStream object corresponding to the newly opened lock file
450   * @throws IOException if IO failure occurs
451   */
452  public static Pair<Path, FSDataOutputStream> checkAndMarkRunningHbck(Configuration conf,
453    RetryCounter retryCounter) throws IOException {
454    FileLockCallable callable = new FileLockCallable(conf, retryCounter);
455    ExecutorService executor = Executors.newFixedThreadPool(1);
456    FutureTask<FSDataOutputStream> futureTask = new FutureTask<>(callable);
457    executor.execute(futureTask);
458    final int timeoutInSeconds =
459      conf.getInt("hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
460    FSDataOutputStream stream = null;
461    try {
462      stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
463    } catch (ExecutionException ee) {
464      LOG.warn("Encountered exception when opening lock file", ee);
465    } catch (InterruptedException ie) {
466      LOG.warn("Interrupted when opening lock file", ie);
467      Thread.currentThread().interrupt();
468    } catch (TimeoutException exception) {
469      // took too long to obtain lock
470      LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
471      futureTask.cancel(true);
472    } finally {
473      executor.shutdownNow();
474    }
475    return new Pair<Path, FSDataOutputStream>(callable.getHbckLockPath(), stream);
476  }
477
478  private void unlockHbck() {
479    if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) {
480      RetryCounter retryCounter = lockFileRetryCounterFactory.create();
481      do {
482        try {
483          Closeables.close(hbckOutFd, true);
484          CommonFSUtils.delete(CommonFSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
485          LOG.info("Finishing hbck");
486          return;
487        } catch (IOException ioe) {
488          LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
489            + (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
490          LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
491          try {
492            retryCounter.sleepUntilNextRetry();
493          } catch (InterruptedException ie) {
494            Thread.currentThread().interrupt();
495            LOG.warn("Interrupted while deleting lock file" + HBCK_LOCK_PATH);
496            return;
497          }
498        }
499      } while (retryCounter.shouldRetry());
500    }
501  }
502
503  /**
504   * To repair region consistency, one must call connect() in order to repair online state.
505   */
506  public void connect() throws IOException {
507
508    if (isExclusive()) {
509      // Grab the lock
510      Pair<Path, FSDataOutputStream> pair =
511        checkAndMarkRunningHbck(getConf(), this.lockFileRetryCounterFactory.create());
512      HBCK_LOCK_PATH = pair.getFirst();
513      this.hbckOutFd = pair.getSecond();
514      if (hbckOutFd == null) {
515        setRetCode(-1);
516        LOG.error("Another instance of hbck is fixing HBase, exiting this instance. "
517          + "[If you are sure no other instance is running, delete the lock file " + HBCK_LOCK_PATH
518          + " and rerun the tool]");
519        throw new IOException("Duplicate hbck - Abort");
520      }
521
522      // Make sure to cleanup the lock
523      hbckLockCleanup.set(true);
524    }
525
526    // Add a shutdown hook to this thread, in case user tries to
527    // kill the hbck with a ctrl-c, we want to cleanup the lock so that
528    // it is available for further calls
529    Runtime.getRuntime().addShutdownHook(new Thread() {
530      @Override
531      public void run() {
532        IOUtils.closeQuietly(HBaseFsck.this, e -> LOG.warn("", e));
533        cleanupHbckZnode();
534        unlockHbck();
535      }
536    });
537
538    LOG.info("Launching hbck");
539
540    connection = ConnectionFactory.createConnection(getConf());
541    admin = connection.getAdmin();
542    meta = connection.getTable(TableName.META_TABLE_NAME);
543    status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS, Option.DEAD_SERVERS,
544      Option.MASTER, Option.BACKUP_MASTERS, Option.REGIONS_IN_TRANSITION, Option.HBASE_VERSION));
545  }
546
547  /**
548   * Get deployed regions according to the region servers.
549   */
550  private void loadDeployedRegions() throws IOException, InterruptedException {
551    // From the master, get a list of all known live region servers
552    Collection<ServerName> regionServers = status.getLiveServerMetrics().keySet();
553    errors.print("Number of live region servers: " + regionServers.size());
554    if (details) {
555      for (ServerName rsinfo : regionServers) {
556        errors.print("  " + rsinfo.getServerName());
557      }
558    }
559
560    // From the master, get a list of all dead region servers
561    Collection<ServerName> deadRegionServers = status.getDeadServerNames();
562    errors.print("Number of dead region servers: " + deadRegionServers.size());
563    if (details) {
564      for (ServerName name : deadRegionServers) {
565        errors.print("  " + name);
566      }
567    }
568
569    // Print the current master name and state
570    errors.print("Master: " + status.getMasterName());
571
572    // Print the list of all backup masters
573    Collection<ServerName> backupMasters = status.getBackupMasterNames();
574    errors.print("Number of backup masters: " + backupMasters.size());
575    if (details) {
576      for (ServerName name : backupMasters) {
577        errors.print("  " + name);
578      }
579    }
580
581    errors.print("Average load: " + status.getAverageLoad());
582    errors.print("Number of requests: " + status.getRequestCount());
583    errors.print("Number of regions: " + status.getRegionCount());
584
585    List<RegionState> rits = status.getRegionStatesInTransition();
586    errors.print("Number of regions in transition: " + rits.size());
587    if (details) {
588      for (RegionState state : rits) {
589        errors.print("  " + state.toDescriptiveString());
590      }
591    }
592
593    // Determine what's deployed
594    processRegionServers(regionServers);
595  }
596
597  /**
598   * Clear the current state of hbck.
599   */
600  private void clearState() {
601    // Make sure regionInfo is empty before starting
602    fixes = 0;
603    regionInfoMap.clear();
604    emptyRegionInfoQualifiers.clear();
605    tableStates.clear();
606    errors.clear();
607    tablesInfo.clear();
608    orphanHdfsDirs.clear();
609    skippedRegions.clear();
610  }
611
612  /**
613   * This repair method analyzes hbase data in hdfs and repairs it to satisfy the table integrity
614   * rules. HBase doesn't need to be online for this operation to work.
615   */
616  public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
617    // Initial pass to fix orphans.
618    if (
619      shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
620        || shouldFixHdfsOverlaps() || shouldFixTableOrphans())
621    ) {
622      LOG.info("Loading regioninfos HDFS");
623      // if nothing is happening this should always complete in two iterations.
624      int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
625      int curIter = 0;
626      do {
627        clearState(); // clears hbck state and reset fixes to 0 and.
628        // repair what's on HDFS
629        restoreHdfsIntegrity();
630        curIter++;// limit the number of iterations.
631      } while (fixes > 0 && curIter <= maxIterations);
632
633      // Repairs should be done in the first iteration and verification in the second.
634      // If there are more than 2 passes, something funny has happened.
635      if (curIter > 2) {
636        if (curIter == maxIterations) {
637          LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
638            + "Tables integrity may not be fully repaired!");
639        } else {
640          LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
641        }
642      }
643    }
644  }
645
646  /**
647   * This repair method requires the cluster to be online since it contacts region servers and the
648   * masters. It makes each region's state in HDFS, in hbase:meta, and deployments consistent.
649   * @return If &gt; 0 , number of errors detected, if &lt; 0 there was an unrecoverable error. If
650   *         0, we have a clean hbase.
651   */
652  public int onlineConsistencyRepair() throws IOException, KeeperException, InterruptedException {
653
654    // get regions according to what is online on each RegionServer
655    loadDeployedRegions();
656    // check whether hbase:meta is deployed and online
657    recordMetaRegion();
658    // Check if hbase:meta is found only once and in the right place
659    if (!checkMetaRegion()) {
660      String errorMsg = "hbase:meta table is not consistent. ";
661      if (shouldFixAssignments()) {
662        errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
663      } else {
664        errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
665      }
666      errors.reportError(errorMsg + " Exiting...");
667      return -2;
668    }
669    // Not going with further consistency check for tables when hbase:meta itself is not consistent.
670    LOG.info("Loading regionsinfo from the hbase:meta table");
671    boolean success = loadMetaEntries();
672    if (!success) return -1;
673
674    // Empty cells in hbase:meta?
675    reportEmptyMetaCells();
676
677    // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
678    if (shouldFixEmptyMetaCells()) {
679      fixEmptyMetaCells();
680    }
681
682    // get a list of all tables that have not changed recently.
683    if (!checkMetaOnly) {
684      reportTablesInFlux();
685    }
686
687    // Get disabled tables states
688    loadTableStates();
689
690    // load regiondirs and regioninfos from HDFS
691    if (shouldCheckHdfs()) {
692      LOG.info("Loading region directories from HDFS");
693      loadHdfsRegionDirs();
694      LOG.info("Loading region information from HDFS");
695      loadHdfsRegionInfos();
696    }
697
698    // fix the orphan tables
699    fixOrphanTables();
700
701    LOG.info("Checking and fixing region consistency");
702    // Check and fix consistency
703    checkAndFixConsistency();
704
705    // Check integrity (does not fix)
706    checkIntegrity();
707    return errors.getErrorList().size();
708  }
709
710  /**
711   * This method maintains an ephemeral znode. If the creation fails we return false or throw
712   * exception
713   * @return true if creating znode succeeds; false otherwise
714   * @throws IOException if IO failure occurs
715   */
716  private boolean setMasterInMaintenanceMode() throws IOException {
717    RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
718    hbckEphemeralNodePath = ZNodePaths.joinZNode(zkw.getZNodePaths().masterMaintZNode,
719      "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
720    do {
721      try {
722        hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
723        if (hbckZodeCreated) {
724          break;
725        }
726      } catch (KeeperException e) {
727        if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
728          throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
729        }
730        // fall through and retry
731      }
732
733      LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try="
734        + (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
735
736      try {
737        retryCounter.sleepUntilNextRetry();
738      } catch (InterruptedException ie) {
739        throw (InterruptedIOException) new InterruptedIOException(
740          "Can't create znode " + hbckEphemeralNodePath).initCause(ie);
741      }
742    } while (retryCounter.shouldRetry());
743    return hbckZodeCreated;
744  }
745
746  private void cleanupHbckZnode() {
747    try {
748      if (zkw != null && hbckZodeCreated) {
749        ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
750        hbckZodeCreated = false;
751      }
752    } catch (KeeperException e) {
753      // Ignore
754      if (!e.code().equals(KeeperException.Code.NONODE)) {
755        LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
756      }
757    }
758  }
759
760  /**
761   * Contacts the master and prints out cluster-wide information
762   * @return 0 on success, non-zero on failure
763   */
764  public int onlineHbck()
765    throws IOException, KeeperException, InterruptedException, ReplicationException {
766    // print hbase server version
767    errors.print("Version: " + status.getHBaseVersion());
768
769    // Clean start
770    clearState();
771    // Do offline check and repair first
772    offlineHdfsIntegrityRepair();
773    offlineReferenceFileRepair();
774    offlineHLinkFileRepair();
775    // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
776    // hbck, it is likely that hbck would be misled and report transient errors. Therefore, it
777    // is better to set Master into maintenance mode during online hbck.
778    //
779    if (!setMasterInMaintenanceMode()) {
780      LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
781        + "error.  Please run HBCK multiple times to reduce the chance of transient error.");
782    }
783
784    onlineConsistencyRepair();
785
786    if (checkRegionBoundaries) {
787      checkRegionBoundaries();
788    }
789
790    checkAndFixReplication();
791
792    cleanReplicationBarrier();
793
794    // Remove the hbck znode
795    cleanupHbckZnode();
796
797    // Remove the hbck lock
798    unlockHbck();
799
800    // Print table summary
801    printTableSummary(tablesInfo);
802    return errors.summarize();
803  }
804
805  public static byte[] keyOnly(byte[] b) {
806    if (b == null) return b;
807    int rowlength = Bytes.toShort(b, 0);
808    byte[] result = new byte[rowlength];
809    System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
810    return result;
811  }
812
813  @Override
814  public void close() throws IOException {
815    try {
816      cleanupHbckZnode();
817      unlockHbck();
818    } catch (Exception io) {
819      LOG.warn(io.toString(), io);
820    } finally {
821      if (zkw != null) {
822        zkw.close();
823        zkw = null;
824      }
825      IOUtils.closeQuietly(admin, e -> LOG.warn("", e));
826      IOUtils.closeQuietly(meta, e -> LOG.warn("", e));
827      IOUtils.closeQuietly(connection, e -> LOG.warn("", e));
828    }
829  }
830
831  private static class RegionBoundariesInformation {
832    public byte[] regionName;
833    public byte[] metaFirstKey;
834    public byte[] metaLastKey;
835    public byte[] storesFirstKey;
836    public byte[] storesLastKey;
837
838    @Override
839    public String toString() {
840      return "regionName=" + Bytes.toStringBinary(regionName) + "\nmetaFirstKey="
841        + Bytes.toStringBinary(metaFirstKey) + "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey)
842        + "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) + "\nstoresLastKey="
843        + Bytes.toStringBinary(storesLastKey);
844    }
845  }
846
847  public void checkRegionBoundaries() {
848    try {
849      ByteArrayComparator comparator = new ByteArrayComparator();
850      List<RegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
851      final RegionBoundariesInformation currentRegionBoundariesInformation =
852        new RegionBoundariesInformation();
853      Path hbaseRoot = CommonFSUtils.getRootDir(getConf());
854      for (RegionInfo regionInfo : regions) {
855        Path tableDir = CommonFSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
856        currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
857        // For each region, get the start and stop key from the META and compare them to the
858        // same information from the Stores.
859        Path path = new Path(tableDir, regionInfo.getEncodedName());
860        FileSystem fs = path.getFileSystem(getConf());
861        FileStatus[] files = fs.listStatus(path);
862        // For all the column families in this region...
863        byte[] storeFirstKey = null;
864        byte[] storeLastKey = null;
865        for (FileStatus file : files) {
866          String fileName = file.getPath().toString();
867          fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
868          if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
869            FileStatus[] storeFiles = fs.listStatus(file.getPath());
870            // For all the stores in this column family.
871            for (FileStatus storeFile : storeFiles) {
872              HFile.Reader reader =
873                HFile.createReader(fs, storeFile.getPath(), CacheConfig.DISABLED, true, getConf());
874              if (
875                (reader.getFirstKey() != null)
876                  && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
877                    ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey()) > 0))
878              ) {
879                storeFirstKey = ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey();
880              }
881              if (
882                (reader.getLastKey() != null)
883                  && ((storeLastKey == null) || (comparator.compare(storeLastKey,
884                    ((KeyValue.KeyOnlyKeyValue) reader.getLastKey().get()).getKey())) < 0)
885              ) {
886                storeLastKey = ((KeyValue.KeyOnlyKeyValue) reader.getLastKey().get()).getKey();
887              }
888              reader.close();
889            }
890          }
891        }
892        currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
893        currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
894        currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
895        currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
896        if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
897          currentRegionBoundariesInformation.metaFirstKey = null;
898        if (currentRegionBoundariesInformation.metaLastKey.length == 0)
899          currentRegionBoundariesInformation.metaLastKey = null;
900
901        // For a region to be correct, we need the META start key to be smaller or equal to the
902        // smallest start key from all the stores, and the start key from the next META entry to
903        // be bigger than the last key from all the current stores. First region start key is null;
904        // Last region end key is null; some regions can be empty and not have any store.
905
906        boolean valid = true;
907        // Checking start key.
908        if (
909          (currentRegionBoundariesInformation.storesFirstKey != null)
910            && (currentRegionBoundariesInformation.metaFirstKey != null)
911        ) {
912          valid = valid && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
913            currentRegionBoundariesInformation.metaFirstKey) >= 0;
914        }
915        // Checking stop key.
916        if (
917          (currentRegionBoundariesInformation.storesLastKey != null)
918            && (currentRegionBoundariesInformation.metaLastKey != null)
919        ) {
920          valid = valid && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
921            currentRegionBoundariesInformation.metaLastKey) < 0;
922        }
923        if (!valid) {
924          errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
925            tablesInfo.get(regionInfo.getTable()));
926          LOG.warn("Region's boundaries not aligned between stores and META for:");
927          LOG.warn(Objects.toString(currentRegionBoundariesInformation));
928        }
929      }
930    } catch (IOException e) {
931      LOG.error(e.toString(), e);
932    }
933  }
934
935  /**
936   * Iterates through the list of all orphan/invalid regiondirs.
937   */
938  private void adoptHdfsOrphans(Collection<HbckRegionInfo> orphanHdfsDirs) throws IOException {
939    for (HbckRegionInfo hi : orphanHdfsDirs) {
940      LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
941      adoptHdfsOrphan(hi);
942    }
943  }
944
945  /**
946   * Orphaned regions are regions without a .regioninfo file in them. We "adopt" these orphans by
947   * creating a new region, and moving the column families, recovered edits, WALs, into the new
948   * region dir. We determine the region startkey and endkeys by looking at all of the hfiles inside
949   * the column families to identify the min and max keys. The resulting region will likely violate
950   * table integrity but will be dealt with by merging overlapping regions.
951   */
952  @SuppressWarnings("deprecation")
953  private void adoptHdfsOrphan(HbckRegionInfo hi) throws IOException {
954    Path p = hi.getHdfsRegionDir();
955    FileSystem fs = p.getFileSystem(getConf());
956    FileStatus[] dirs = fs.listStatus(p);
957    if (dirs == null) {
958      LOG.warn("Attempt to adopt orphan hdfs region skipped because no files present in " + p
959        + ". This dir could probably be deleted.");
960      return;
961    }
962
963    TableName tableName = hi.getTableName();
964    HbckTableInfo tableInfo = tablesInfo.get(tableName);
965    Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
966    TableDescriptor template = tableInfo.getTableDescriptor();
967
968    // find min and max key values
969    Pair<byte[], byte[]> orphanRegionRange = null;
970    for (FileStatus cf : dirs) {
971      String cfName = cf.getPath().getName();
972      // TODO Figure out what the special dirs are
973      if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
974
975      FileStatus[] hfiles = fs.listStatus(cf.getPath());
976      for (FileStatus hfile : hfiles) {
977        byte[] start, end;
978        HFile.Reader hf = null;
979        try {
980          hf = HFile.createReader(fs, hfile.getPath(), CacheConfig.DISABLED, true, getConf());
981          Optional<Cell> startKv = hf.getFirstKey();
982          start = CellUtil.cloneRow(startKv.get());
983          Optional<Cell> endKv = hf.getLastKey();
984          end = CellUtil.cloneRow(endKv.get());
985        } catch (Exception ioe) {
986          LOG.warn("Problem reading orphan file " + hfile + ", skipping");
987          continue;
988        } finally {
989          if (hf != null) {
990            hf.close();
991          }
992        }
993
994        // expand the range to include the range of all hfiles
995        if (orphanRegionRange == null) {
996          // first range
997          orphanRegionRange = new Pair<>(start, end);
998        } else {
999          // TODO add test
1000
1001          // expand range only if the hfile is wider.
1002          if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
1003            orphanRegionRange.setFirst(start);
1004          }
1005          if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0) {
1006            orphanRegionRange.setSecond(end);
1007          }
1008        }
1009      }
1010    }
1011    if (orphanRegionRange == null) {
1012      LOG.warn("No data in dir " + p + ", sidelining data");
1013      fixes++;
1014      sidelineRegionDir(fs, hi);
1015      return;
1016    }
1017    LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", "
1018      + Bytes.toString(orphanRegionRange.getSecond()) + ")");
1019
1020    // create new region on hdfs. move data into place.
1021    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(template.getTableName())
1022      .setStartKey(orphanRegionRange.getFirst())
1023      .setEndKey(Bytes.add(orphanRegionRange.getSecond(), new byte[1])).build();
1024    LOG.info("Creating new region : " + regionInfo);
1025    HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), regionInfo, template);
1026    Path target = region.getRegionFileSystem().getRegionDir();
1027
1028    // rename all the data to new region
1029    mergeRegionDirs(target, hi);
1030    fixes++;
1031  }
1032
1033  /**
1034   * This method determines if there are table integrity errors in HDFS. If there are errors and the
1035   * appropriate "fix" options are enabled, the method will first correct orphan regions making them
1036   * into legit regiondirs, and then reload to merge potentially overlapping regions.
1037   * @return number of table integrity errors found
1038   */
1039  private int restoreHdfsIntegrity() throws IOException, InterruptedException {
1040    // Determine what's on HDFS
1041    LOG.info("Loading HBase regioninfo from HDFS...");
1042    loadHdfsRegionDirs(); // populating regioninfo table.
1043
1044    int errs = errors.getErrorList().size();
1045    // First time just get suggestions.
1046    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1047    checkHdfsIntegrity(false, false);
1048
1049    if (errors.getErrorList().size() == errs) {
1050      LOG.info("No integrity errors.  We are done with this phase. Glorious.");
1051      return 0;
1052    }
1053
1054    if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
1055      adoptHdfsOrphans(orphanHdfsDirs);
1056      // TODO optimize by incrementally adding instead of reloading.
1057    }
1058
1059    // Make sure there are no holes now.
1060    if (shouldFixHdfsHoles()) {
1061      clearState(); // this also resets # fixes.
1062      loadHdfsRegionDirs();
1063      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1064      tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1065    }
1066
1067    // Now we fix overlaps
1068    if (shouldFixHdfsOverlaps()) {
1069      // second pass we fix overlaps.
1070      clearState(); // this also resets # fixes.
1071      loadHdfsRegionDirs();
1072      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1073      tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1074    }
1075
1076    return errors.getErrorList().size();
1077  }
1078
1079  /**
1080   * Scan all the store file names to find any lingering reference files, which refer to some
1081   * none-exiting files. If "fix" option is enabled, any lingering reference file will be sidelined
1082   * if found.
1083   * <p>
1084   * Lingering reference file prevents a region from opening. It has to be fixed before a cluster
1085   * can start properly.
1086   */
1087  private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1088    clearState();
1089    Configuration conf = getConf();
1090    Path hbaseRoot = CommonFSUtils.getRootDir(conf);
1091    FileSystem fs = hbaseRoot.getFileSystem(conf);
1092    LOG.info("Computing mapping of all store files");
1093    Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1094      new FSUtils.ReferenceFileFilter(fs), executor, errors);
1095    errors.print("");
1096    LOG.info("Validating mapping using HDFS state");
1097    for (Path path : allFiles.values()) {
1098      Path referredToFile = StoreFileInfo.getReferredToFile(path);
1099      if (fs.exists(referredToFile)) continue; // good, expected
1100
1101      // Found a lingering reference file
1102      errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1103        "Found lingering reference file " + path);
1104      if (!shouldFixReferenceFiles()) continue;
1105
1106      // Now, trying to fix it since requested
1107      boolean success = false;
1108      String pathStr = path.toString();
1109
1110      // A reference file path should be like
1111      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1112      // Up 5 directories to get the root folder.
1113      // So the file will be sidelined to a similar folder structure.
1114      int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1115      for (int i = 0; index > 0 && i < 5; i++) {
1116        index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1117      }
1118      if (index > 0) {
1119        Path rootDir = getSidelineDir();
1120        Path dst = new Path(rootDir, pathStr.substring(index + 1));
1121        fs.mkdirs(dst.getParent());
1122        LOG.info("Trying to sideline reference file " + path + " to " + dst);
1123        setShouldRerun();
1124
1125        success = fs.rename(path, dst);
1126        debugLsr(dst);
1127
1128      }
1129      if (!success) {
1130        LOG.error("Failed to sideline reference file " + path);
1131      }
1132    }
1133  }
1134
1135  /**
1136   * Scan all the store file names to find any lingering HFileLink files, which refer to some
1137   * none-exiting files. If "fix" option is enabled, any lingering HFileLink file will be sidelined
1138   * if found.
1139   */
1140  private void offlineHLinkFileRepair() throws IOException, InterruptedException {
1141    Configuration conf = getConf();
1142    Path hbaseRoot = CommonFSUtils.getRootDir(conf);
1143    FileSystem fs = hbaseRoot.getFileSystem(conf);
1144    LOG.info("Computing mapping of all link files");
1145    Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1146      new FSUtils.HFileLinkFilter(), executor, errors);
1147    errors.print("");
1148
1149    LOG.info("Validating mapping using HDFS state");
1150    for (Path path : allFiles.values()) {
1151      // building HFileLink object to gather locations
1152      HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
1153      if (actualLink.exists(fs)) continue; // good, expected
1154
1155      // Found a lingering HFileLink
1156      errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
1157      if (!shouldFixHFileLinks()) continue;
1158
1159      // Now, trying to fix it since requested
1160      setShouldRerun();
1161
1162      // An HFileLink path should be like
1163      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
1164      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same
1165      // folder structure.
1166      boolean success = sidelineFile(fs, hbaseRoot, path);
1167
1168      if (!success) {
1169        LOG.error("Failed to sideline HFileLink file " + path);
1170      }
1171
1172      // An HFileLink backreference path should be like
1173      // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
1174      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same
1175      // folder structure.
1176      Path backRefPath = FileLink.getBackReferencesDir(
1177        HFileArchiveUtil.getStoreArchivePath(conf,
1178          HFileLink.getReferencedTableName(path.getName().toString()),
1179          HFileLink.getReferencedRegionName(path.getName().toString()), path.getParent().getName()),
1180        HFileLink.getReferencedHFileName(path.getName().toString()));
1181      success = sidelineFile(fs, hbaseRoot, backRefPath);
1182
1183      if (!success) {
1184        LOG.error("Failed to sideline HFileLink backreference file " + path);
1185      }
1186    }
1187  }
1188
1189  private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
1190    URI uri = hbaseRoot.toUri().relativize(path.toUri());
1191    if (uri.isAbsolute()) return false;
1192    String relativePath = uri.getPath();
1193    Path rootDir = getSidelineDir();
1194    Path dst = new Path(rootDir, relativePath);
1195    boolean pathCreated = fs.mkdirs(dst.getParent());
1196    if (!pathCreated) {
1197      LOG.error("Failed to create path: " + dst.getParent());
1198      return false;
1199    }
1200    LOG.info("Trying to sideline file " + path + " to " + dst);
1201    return fs.rename(path, dst);
1202  }
1203
1204  /**
1205   * TODO -- need to add tests for this.
1206   */
1207  private void reportEmptyMetaCells() {
1208    errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: "
1209      + emptyRegionInfoQualifiers.size());
1210    if (details) {
1211      for (Result r : emptyRegionInfoQualifiers) {
1212        errors.print("  " + r);
1213      }
1214    }
1215  }
1216
1217  /**
1218   * TODO -- need to add tests for this.
1219   */
1220  private void reportTablesInFlux() {
1221    AtomicInteger numSkipped = new AtomicInteger(0);
1222    TableDescriptor[] allTables = getTables(numSkipped);
1223    errors.print("Number of Tables: " + allTables.length);
1224    if (details) {
1225      if (numSkipped.get() > 0) {
1226        errors.detail("Number of Tables in flux: " + numSkipped.get());
1227      }
1228      for (TableDescriptor td : allTables) {
1229        errors.detail("  Table: " + td.getTableName() + "\t" + (td.isReadOnly() ? "ro" : "rw")
1230          + "\t" + (td.isMetaRegion() ? "META" : "    ") + "\t" + " families: "
1231          + td.getColumnFamilyCount());
1232      }
1233    }
1234  }
1235
1236  public HbckErrorReporter getErrors() {
1237    return errors;
1238  }
1239
1240  /**
1241   * Populate hbi's from regionInfos loaded from file system.
1242   */
1243  private SortedMap<TableName, HbckTableInfo> loadHdfsRegionInfos()
1244    throws IOException, InterruptedException {
1245    tablesInfo.clear(); // regenerating the data
1246    // generate region split structure
1247    Collection<HbckRegionInfo> hbckRegionInfos = regionInfoMap.values();
1248
1249    // Parallelized read of .regioninfo files.
1250    List<WorkItemHdfsRegionInfo> hbis = new ArrayList<>(hbckRegionInfos.size());
1251    List<Future<Void>> hbiFutures;
1252
1253    for (HbckRegionInfo hbi : hbckRegionInfos) {
1254      WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1255      hbis.add(work);
1256    }
1257
1258    // Submit and wait for completion
1259    hbiFutures = executor.invokeAll(hbis);
1260
1261    for (int i = 0; i < hbiFutures.size(); i++) {
1262      WorkItemHdfsRegionInfo work = hbis.get(i);
1263      Future<Void> f = hbiFutures.get(i);
1264      try {
1265        f.get();
1266      } catch (ExecutionException e) {
1267        LOG.warn("Failed to read .regioninfo file for region " + work.hbi.getRegionNameAsString(),
1268          e.getCause());
1269      }
1270    }
1271
1272    Path hbaseRoot = CommonFSUtils.getRootDir(getConf());
1273    FileSystem fs = hbaseRoot.getFileSystem(getConf());
1274    // serialized table info gathering.
1275    for (HbckRegionInfo hbi : hbckRegionInfos) {
1276
1277      if (hbi.getHdfsHRI() == null) {
1278        // was an orphan
1279        continue;
1280      }
1281
1282      // get table name from hdfs, populate various HBaseFsck tables.
1283      TableName tableName = hbi.getTableName();
1284      if (tableName == null) {
1285        // There was an entry in hbase:meta not in the HDFS?
1286        LOG.warn("tableName was null for: " + hbi);
1287        continue;
1288      }
1289
1290      HbckTableInfo modTInfo = tablesInfo.get(tableName);
1291      if (modTInfo == null) {
1292        // only executed once per table.
1293        modTInfo = new HbckTableInfo(tableName, this);
1294        tablesInfo.put(tableName, modTInfo);
1295        try {
1296          TableDescriptor htd =
1297            FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1298          modTInfo.htds.add(htd);
1299        } catch (IOException ioe) {
1300          if (!orphanTableDirs.containsKey(tableName)) {
1301            LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1302            // should only report once for each table
1303            errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1304              "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1305            Set<String> columns = new HashSet<>();
1306            orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1307          }
1308        }
1309      }
1310      if (!hbi.isSkipChecks()) {
1311        modTInfo.addRegionInfo(hbi);
1312      }
1313    }
1314
1315    loadTableInfosForTablesWithNoRegion();
1316    errors.print("");
1317
1318    return tablesInfo;
1319  }
1320
1321  /**
1322   * To get the column family list according to the column family dirs
1323   * @return a set of column families
1324   */
1325  private Set<String> getColumnFamilyList(Set<String> columns, HbckRegionInfo hbi)
1326    throws IOException {
1327    Path regionDir = hbi.getHdfsRegionDir();
1328    FileSystem fs = regionDir.getFileSystem(getConf());
1329    FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1330    for (FileStatus subdir : subDirs) {
1331      String columnfamily = subdir.getPath().getName();
1332      columns.add(columnfamily);
1333    }
1334    return columns;
1335  }
1336
1337  /**
1338   * To fabricate a .tableinfo file with following contents<br>
1339   * 1. the correct tablename <br>
1340   * 2. the correct colfamily list<br>
1341   * 3. the default properties for both {@link TableDescriptor} and
1342   * {@link ColumnFamilyDescriptor}<br>
1343   */
1344  private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1345    Set<String> columns) throws IOException {
1346    if (columns == null || columns.isEmpty()) return false;
1347    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
1348    for (String columnfamimly : columns) {
1349      builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(columnfamimly));
1350    }
1351    fstd.createTableDescriptor(builder.build(), true);
1352    return true;
1353  }
1354
1355  /**
1356   * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1357   */
1358  public void fixEmptyMetaCells() throws IOException {
1359    if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1360      LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1361      for (Result region : emptyRegionInfoQualifiers) {
1362        deleteMetaRegion(region.getRow());
1363        errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1364      }
1365      emptyRegionInfoQualifiers.clear();
1366    }
1367  }
1368
1369  /**
1370   * To fix orphan table by creating a .tableinfo file under tableDir <br>
1371   * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1372   * 2. else create a default .tableinfo file with following items<br>
1373   * &nbsp;2.1 the correct tablename <br>
1374   * &nbsp;2.2 the correct colfamily list<br>
1375   * &nbsp;2.3 the default properties for both {@link TableDescriptor} and
1376   * {@link ColumnFamilyDescriptor}<br>
1377   */
1378  public void fixOrphanTables() throws IOException {
1379    if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1380
1381      List<TableName> tmpList = new ArrayList<>(orphanTableDirs.keySet().size());
1382      tmpList.addAll(orphanTableDirs.keySet());
1383      TableDescriptor[] htds = getTableDescriptors(tmpList);
1384      Iterator<Entry<TableName, Set<String>>> iter = orphanTableDirs.entrySet().iterator();
1385      int j = 0;
1386      int numFailedCase = 0;
1387      FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1388      while (iter.hasNext()) {
1389        Entry<TableName, Set<String>> entry = iter.next();
1390        TableName tableName = entry.getKey();
1391        LOG.info("Trying to fix orphan table error: " + tableName);
1392        if (j < htds.length) {
1393          if (tableName.equals(htds[j].getTableName())) {
1394            TableDescriptor htd = htds[j];
1395            LOG.info("fixing orphan table: " + tableName + " from cache");
1396            fstd.createTableDescriptor(htd, true);
1397            j++;
1398            iter.remove();
1399          }
1400        } else {
1401          if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1402            LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1403            LOG.warn(
1404              "Strongly recommend to modify the TableDescriptor if necessary for: " + tableName);
1405            iter.remove();
1406          } else {
1407            LOG.error("Unable to create default .tableinfo for " + tableName
1408              + " while missing column family information");
1409            numFailedCase++;
1410          }
1411        }
1412        fixes++;
1413      }
1414
1415      if (orphanTableDirs.isEmpty()) {
1416        // all orphanTableDirs are luckily recovered
1417        // re-run doFsck after recovering the .tableinfo file
1418        setShouldRerun();
1419        LOG.warn(
1420          "Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1421      } else if (numFailedCase > 0) {
1422        LOG.error("Failed to fix " + numFailedCase + " OrphanTables with default .tableinfo files");
1423      }
1424
1425    }
1426    // cleanup the list
1427    orphanTableDirs.clear();
1428
1429  }
1430
1431  /**
1432   * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1433   */
1434  private void logParallelMerge() {
1435    if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1436      LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to"
1437        + " false to run serially.");
1438    } else {
1439      LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to"
1440        + " true to run in parallel.");
1441    }
1442  }
1443
1444  private SortedMap<TableName, HbckTableInfo> checkHdfsIntegrity(boolean fixHoles,
1445    boolean fixOverlaps) throws IOException {
1446    LOG.info("Checking HBase region split map from HDFS data...");
1447    logParallelMerge();
1448    for (HbckTableInfo tInfo : tablesInfo.values()) {
1449      TableIntegrityErrorHandler handler;
1450      if (fixHoles || fixOverlaps) {
1451        handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(), fixHoles, fixOverlaps);
1452      } else {
1453        handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1454      }
1455      if (!tInfo.checkRegionChain(handler)) {
1456        // should dump info as well.
1457        errors.report("Found inconsistency in table " + tInfo.getName());
1458      }
1459    }
1460    return tablesInfo;
1461  }
1462
1463  Path getSidelineDir() throws IOException {
1464    if (sidelineDir == null) {
1465      Path hbaseDir = CommonFSUtils.getRootDir(getConf());
1466      Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1467      sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-" + startMillis);
1468    }
1469    return sidelineDir;
1470  }
1471
1472  /**
1473   * Sideline a region dir (instead of deleting it)
1474   */
1475  Path sidelineRegionDir(FileSystem fs, HbckRegionInfo hi) throws IOException {
1476    return sidelineRegionDir(fs, null, hi);
1477  }
1478
1479  /**
1480   * Sideline a region dir (instead of deleting it)
1481   * @param parentDir if specified, the region will be sidelined to folder like
1482   *                  {@literal .../parentDir/<table name>/<region name>}. The purpose is to group
1483   *                  together similar regions sidelined, for example, those regions should be bulk
1484   *                  loaded back later on. If NULL, it is ignored.
1485   */
1486  Path sidelineRegionDir(FileSystem fs, String parentDir, HbckRegionInfo hi) throws IOException {
1487    TableName tableName = hi.getTableName();
1488    Path regionDir = hi.getHdfsRegionDir();
1489
1490    if (!fs.exists(regionDir)) {
1491      LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1492      return null;
1493    }
1494
1495    Path rootDir = getSidelineDir();
1496    if (parentDir != null) {
1497      rootDir = new Path(rootDir, parentDir);
1498    }
1499    Path sidelineTableDir = CommonFSUtils.getTableDir(rootDir, tableName);
1500    Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1501    fs.mkdirs(sidelineRegionDir);
1502    boolean success = false;
1503    FileStatus[] cfs = fs.listStatus(regionDir);
1504    if (cfs == null) {
1505      LOG.info("Region dir is empty: " + regionDir);
1506    } else {
1507      for (FileStatus cf : cfs) {
1508        Path src = cf.getPath();
1509        Path dst = new Path(sidelineRegionDir, src.getName());
1510        if (fs.isFile(src)) {
1511          // simple file
1512          success = fs.rename(src, dst);
1513          if (!success) {
1514            String msg = "Unable to rename file " + src + " to " + dst;
1515            LOG.error(msg);
1516            throw new IOException(msg);
1517          }
1518          continue;
1519        }
1520
1521        // is a directory.
1522        fs.mkdirs(dst);
1523
1524        LOG.info("Sidelining files from " + src + " into containing region " + dst);
1525        // FileSystem.rename is inconsistent with directories -- if the
1526        // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1527        // it moves the src into the dst dir resulting in (foo/a/b). If
1528        // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1529        FileStatus[] hfiles = fs.listStatus(src);
1530        if (hfiles != null && hfiles.length > 0) {
1531          for (FileStatus hfile : hfiles) {
1532            success = fs.rename(hfile.getPath(), dst);
1533            if (!success) {
1534              String msg = "Unable to rename file " + src + " to " + dst;
1535              LOG.error(msg);
1536              throw new IOException(msg);
1537            }
1538          }
1539        }
1540        LOG.debug("Sideline directory contents:");
1541        debugLsr(sidelineRegionDir);
1542      }
1543    }
1544
1545    LOG.info("Removing old region dir: " + regionDir);
1546    success = fs.delete(regionDir, true);
1547    if (!success) {
1548      String msg = "Unable to delete dir " + regionDir;
1549      LOG.error(msg);
1550      throw new IOException(msg);
1551    }
1552    return sidelineRegionDir;
1553  }
1554
1555  /**
1556   * Load the list of disabled tables in ZK into local set.
1557   */
1558  private void loadTableStates() throws IOException {
1559    tableStates = MetaTableAccessor.getTableStates(connection);
1560    // Add hbase:meta so this tool keeps working. In hbase2, meta is always enabled though it
1561    // has no entry in the table states. HBCK doesn't work right w/ hbase2 but just do this in
1562    // meantime.
1563    this.tableStates.put(TableName.META_TABLE_NAME,
1564      new TableState(TableName.META_TABLE_NAME, TableState.State.ENABLED));
1565  }
1566
1567  /**
1568   * Check if the specified region's table is disabled.
1569   * @param tableName table to check status of
1570   */
1571  boolean isTableDisabled(TableName tableName) {
1572    return tableStates.containsKey(tableName)
1573      && tableStates.get(tableName).inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1574  }
1575
1576  /**
1577   * Scan HDFS for all regions, recording their information into regionInfoMap
1578   */
1579  public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1580    Path rootDir = CommonFSUtils.getRootDir(getConf());
1581    FileSystem fs = rootDir.getFileSystem(getConf());
1582
1583    // list all tables from HDFS
1584    List<FileStatus> tableDirs = Lists.newArrayList();
1585
1586    boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1587
1588    List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1589    for (Path path : paths) {
1590      TableName tableName = CommonFSUtils.getTableName(path);
1591      if (
1592        (!checkMetaOnly && isTableIncluded(tableName))
1593          || tableName.equals(TableName.META_TABLE_NAME)
1594      ) {
1595        tableDirs.add(fs.getFileStatus(path));
1596      }
1597    }
1598
1599    // verify that version file exists
1600    if (!foundVersionFile) {
1601      errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1602        "Version file does not exist in root dir " + rootDir);
1603      if (shouldFixVersionFile()) {
1604        LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME + " file.");
1605        setShouldRerun();
1606        FSUtils.setVersion(fs, rootDir,
1607          getConf().getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000),
1608          getConf().getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1609            HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1610      }
1611    }
1612
1613    // Avoid multithreading at table-level because already multithreaded internally at
1614    // region-level. Additionally multithreading at table-level can lead to deadlock
1615    // if there are many tables in the cluster. Since there are a limited # of threads
1616    // in the executor's thread pool and if we multithread at the table-level by putting
1617    // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1618    // executor tied up solely in waiting for the tables' region-level calls to complete.
1619    // If there are enough tables then there will be no actual threads in the pool left
1620    // for the region-level callables to be serviced.
1621    for (FileStatus tableDir : tableDirs) {
1622      LOG.debug("Loading region dirs from " + tableDir.getPath());
1623      WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1624      try {
1625        item.call();
1626      } catch (ExecutionException e) {
1627        LOG.warn("Could not completely load table dir " + tableDir.getPath(), e.getCause());
1628      }
1629    }
1630    errors.print("");
1631  }
1632
1633  /**
1634   * Record the location of the hbase:meta region as found in ZooKeeper.
1635   */
1636  private boolean recordMetaRegion() throws IOException {
1637    List<HRegionLocation> locs;
1638    try (RegionLocator locator = connection.getRegionLocator(TableName.META_TABLE_NAME)) {
1639      locs = locator.getRegionLocations(HConstants.EMPTY_START_ROW, true);
1640    }
1641    if (locs == null || locs.isEmpty()) {
1642      errors.reportError(ERROR_CODE.NULL_META_REGION, "META region was not found in ZooKeeper");
1643      return false;
1644    }
1645    for (HRegionLocation metaLocation : locs) {
1646      // Check if Meta region is valid and existing
1647      if (metaLocation == null) {
1648        errors.reportError(ERROR_CODE.NULL_META_REGION, "META region location is null");
1649        return false;
1650      }
1651      if (metaLocation.getRegion() == null) {
1652        errors.reportError(ERROR_CODE.NULL_META_REGION, "META location regionInfo is null");
1653        return false;
1654      }
1655      if (metaLocation.getHostname() == null) {
1656        errors.reportError(ERROR_CODE.NULL_META_REGION, "META location hostName is null");
1657        return false;
1658      }
1659      ServerName sn = metaLocation.getServerName();
1660      HbckRegionInfo.MetaEntry m = new HbckRegionInfo.MetaEntry(metaLocation.getRegion(), sn,
1661        EnvironmentEdgeManager.currentTime());
1662      HbckRegionInfo hbckRegionInfo = regionInfoMap.get(metaLocation.getRegion().getEncodedName());
1663      if (hbckRegionInfo == null) {
1664        regionInfoMap.put(metaLocation.getRegion().getEncodedName(), new HbckRegionInfo(m));
1665      } else {
1666        hbckRegionInfo.setMetaEntry(m);
1667      }
1668    }
1669    return true;
1670  }
1671
1672  private ZKWatcher createZooKeeperWatcher() throws IOException {
1673    return new ZKWatcher(getConf(), "hbase Fsck", new Abortable() {
1674      @Override
1675      public void abort(String why, Throwable e) {
1676        LOG.error(why, e);
1677        System.exit(1);
1678      }
1679
1680      @Override
1681      public boolean isAborted() {
1682        return false;
1683      }
1684
1685    });
1686  }
1687
1688  /**
1689   * Contacts each regionserver and fetches metadata about regions.
1690   * @param regionServerList - the list of region servers to connect to
1691   * @throws IOException if a remote or network exception occurs
1692   */
1693  void processRegionServers(Collection<ServerName> regionServerList)
1694    throws IOException, InterruptedException {
1695
1696    List<WorkItemRegion> workItems = new ArrayList<>(regionServerList.size());
1697    List<Future<Void>> workFutures;
1698
1699    // loop to contact each region server in parallel
1700    for (ServerName rsinfo : regionServerList) {
1701      workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1702    }
1703
1704    workFutures = executor.invokeAll(workItems);
1705
1706    for (int i = 0; i < workFutures.size(); i++) {
1707      WorkItemRegion item = workItems.get(i);
1708      Future<Void> f = workFutures.get(i);
1709      try {
1710        f.get();
1711      } catch (ExecutionException e) {
1712        LOG.warn("Could not process regionserver {}", item.rsinfo.getAddress(), e.getCause());
1713      }
1714    }
1715  }
1716
1717  /**
1718   * Check consistency of all regions that have been found in previous phases.
1719   */
1720  private void checkAndFixConsistency() throws IOException, KeeperException, InterruptedException {
1721    // Divide the checks in two phases. One for default/primary replicas and another
1722    // for the non-primary ones. Keeps code cleaner this way.
1723
1724    List<CheckRegionConsistencyWorkItem> workItems = new ArrayList<>(regionInfoMap.size());
1725    for (java.util.Map.Entry<String, HbckRegionInfo> e : regionInfoMap.entrySet()) {
1726      if (e.getValue().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1727        workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1728      }
1729    }
1730    checkRegionConsistencyConcurrently(workItems);
1731
1732    boolean prevHdfsCheck = shouldCheckHdfs();
1733    setCheckHdfs(false); // replicas don't have any hdfs data
1734    // Run a pass over the replicas and fix any assignment issues that exist on the currently
1735    // deployed/undeployed replicas.
1736    List<CheckRegionConsistencyWorkItem> replicaWorkItems = new ArrayList<>(regionInfoMap.size());
1737    for (java.util.Map.Entry<String, HbckRegionInfo> e : regionInfoMap.entrySet()) {
1738      if (e.getValue().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
1739        replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1740      }
1741    }
1742    checkRegionConsistencyConcurrently(replicaWorkItems);
1743    setCheckHdfs(prevHdfsCheck);
1744
1745    // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
1746    // not get accurate state of the hbase if continuing. The config here allows users to tune
1747    // the tolerance of number of skipped region.
1748    // TODO: evaluate the consequence to continue the hbck operation without config.
1749    int terminateThreshold = getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
1750    int numOfSkippedRegions = skippedRegions.size();
1751    if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
1752      throw new IOException(
1753        numOfSkippedRegions + " region(s) could not be checked or repaired.  See logs for detail.");
1754    }
1755
1756    if (shouldCheckHdfs()) {
1757      checkAndFixTableStates();
1758    }
1759  }
1760
1761  /**
1762   * Check consistency of all regions using multiple threads concurrently.
1763   */
1764  private void
1765    checkRegionConsistencyConcurrently(final List<CheckRegionConsistencyWorkItem> workItems)
1766      throws IOException, KeeperException, InterruptedException {
1767    if (workItems.isEmpty()) {
1768      return; // nothing to check
1769    }
1770
1771    List<Future<Void>> workFutures = executor.invokeAll(workItems);
1772    for (Future<Void> f : workFutures) {
1773      try {
1774        f.get();
1775      } catch (ExecutionException e1) {
1776        LOG.warn("Could not check region consistency ", e1.getCause());
1777        if (e1.getCause() instanceof IOException) {
1778          throw (IOException) e1.getCause();
1779        } else if (e1.getCause() instanceof KeeperException) {
1780          throw (KeeperException) e1.getCause();
1781        } else if (e1.getCause() instanceof InterruptedException) {
1782          throw (InterruptedException) e1.getCause();
1783        } else {
1784          throw new IOException(e1.getCause());
1785        }
1786      }
1787    }
1788  }
1789
1790  class CheckRegionConsistencyWorkItem implements Callable<Void> {
1791    private final String key;
1792    private final HbckRegionInfo hbi;
1793
1794    CheckRegionConsistencyWorkItem(String key, HbckRegionInfo hbi) {
1795      this.key = key;
1796      this.hbi = hbi;
1797    }
1798
1799    @Override
1800    public synchronized Void call() throws Exception {
1801      try {
1802        checkRegionConsistency(key, hbi);
1803      } catch (Exception e) {
1804        // If the region is non-META region, skip this region and send warning/error message; if
1805        // the region is META region, we should not continue.
1806        LOG.warn(
1807          "Unable to complete check or repair the region '" + hbi.getRegionNameAsString() + "'.",
1808          e);
1809        if (hbi.getHdfsHRI().isMetaRegion()) {
1810          throw e;
1811        }
1812        LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
1813        addSkippedRegion(hbi);
1814      }
1815      return null;
1816    }
1817  }
1818
1819  private void addSkippedRegion(final HbckRegionInfo hbi) {
1820    Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
1821    if (skippedRegionNames == null) {
1822      skippedRegionNames = new HashSet<>();
1823    }
1824    skippedRegionNames.add(hbi.getRegionNameAsString());
1825    skippedRegions.put(hbi.getTableName(), skippedRegionNames);
1826  }
1827
1828  /**
1829   * Check and fix table states, assumes full info available: - tableInfos - empty tables loaded
1830   */
1831  private void checkAndFixTableStates() throws IOException {
1832    // first check dangling states
1833    for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
1834      TableName tableName = entry.getKey();
1835      TableState tableState = entry.getValue();
1836      HbckTableInfo tableInfo = tablesInfo.get(tableName);
1837      if (isTableIncluded(tableName) && !tableName.isSystemTable() && tableInfo == null) {
1838        if (fixMeta) {
1839          MetaTableAccessor.deleteTableState(connection, tableName);
1840          TableState state = MetaTableAccessor.getTableState(connection, tableName);
1841          if (state != null) {
1842            errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1843              tableName + " unable to delete dangling table state " + tableState);
1844          }
1845        } else if (!checkMetaOnly) {
1846          // dangling table state in meta if checkMetaOnly is false. If checkMetaOnly is
1847          // true, tableInfo will be null as tablesInfo are not polulated for all tables from hdfs
1848          errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1849            tableName + " has dangling table state " + tableState);
1850        }
1851      }
1852    }
1853    // check that all tables have states
1854    for (TableName tableName : tablesInfo.keySet()) {
1855      if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
1856        if (fixMeta) {
1857          MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
1858          TableState newState = MetaTableAccessor.getTableState(connection, tableName);
1859          if (newState == null) {
1860            errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1861              "Unable to change state for table " + tableName + " in meta ");
1862          }
1863        } else {
1864          errors.reportError(ERROR_CODE.NO_TABLE_STATE, tableName + " has no state in meta ");
1865        }
1866      }
1867    }
1868  }
1869
1870  private void preCheckPermission() throws IOException {
1871    if (shouldIgnorePreCheckPermission()) {
1872      return;
1873    }
1874
1875    Path hbaseDir = CommonFSUtils.getRootDir(getConf());
1876    FileSystem fs = hbaseDir.getFileSystem(getConf());
1877    UserProvider userProvider = UserProvider.instantiate(getConf());
1878    UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1879    FileStatus[] files = fs.listStatus(hbaseDir);
1880    for (FileStatus file : files) {
1881      try {
1882        fs.access(file.getPath(), FsAction.WRITE);
1883      } catch (AccessControlException ace) {
1884        LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1885        errors.reportError(ERROR_CODE.WRONG_USAGE,
1886          "Current user " + ugi.getUserName() + " does not have write perms to " + file.getPath()
1887            + ". Please rerun hbck as hdfs user " + file.getOwner());
1888        throw ace;
1889      }
1890    }
1891  }
1892
1893  /**
1894   * Deletes region from meta table
1895   */
1896  private void deleteMetaRegion(HbckRegionInfo hi) throws IOException {
1897    deleteMetaRegion(hi.getMetaEntry().getRegionInfo().getRegionName());
1898  }
1899
1900  /**
1901   * Deletes region from meta table
1902   */
1903  private void deleteMetaRegion(byte[] metaKey) throws IOException {
1904    Delete d = new Delete(metaKey);
1905    meta.delete(d);
1906    LOG.info("Deleted " + Bytes.toString(metaKey) + " from META");
1907  }
1908
1909  /**
1910   * Reset the split parent region info in meta table
1911   */
1912  private void resetSplitParent(HbckRegionInfo hi) throws IOException {
1913    RowMutations mutations = new RowMutations(hi.getMetaEntry().getRegionInfo().getRegionName());
1914    Delete d = new Delete(hi.getMetaEntry().getRegionInfo().getRegionName());
1915    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1916    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1917    mutations.add(d);
1918
1919    RegionInfo hri = RegionInfoBuilder.newBuilder(hi.getMetaEntry().getRegionInfo())
1920      .setOffline(false).setSplit(false).build();
1921    Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime());
1922    mutations.add(p);
1923
1924    meta.mutateRow(mutations);
1925    LOG.info("Reset split parent " + hi.getMetaEntry().getRegionInfo().getRegionNameAsString()
1926      + " in META");
1927  }
1928
1929  /**
1930   * This backwards-compatibility wrapper for permanently offlining a region that should not be
1931   * alive. If the region server does not support the "offline" method, it will use the closest
1932   * unassign method instead. This will basically work until one attempts to disable or delete the
1933   * affected table. The problem has to do with in-memory only master state, so restarting the
1934   * HMaster or failing over to another should fix this.
1935   */
1936  void offline(byte[] regionName) throws IOException {
1937    String regionString = Bytes.toStringBinary(regionName);
1938    if (!rsSupportsOffline) {
1939      LOG.warn("Using unassign region " + regionString
1940        + " instead of using offline method, you should" + " restart HMaster after these repairs");
1941      admin.unassign(regionName, true);
1942      return;
1943    }
1944
1945    // first time we assume the rs's supports #offline.
1946    try {
1947      LOG.info("Offlining region " + regionString);
1948      admin.offline(regionName);
1949    } catch (IOException ioe) {
1950      String notFoundMsg =
1951        "java.lang.NoSuchMethodException: " + "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1952      if (ioe.getMessage().contains(notFoundMsg)) {
1953        LOG.warn(
1954          "Using unassign region " + regionString + " instead of using offline method, you should"
1955            + " restart HMaster after these repairs");
1956        rsSupportsOffline = false; // in the future just use unassign
1957        admin.unassign(regionName, true);
1958        return;
1959      }
1960      throw ioe;
1961    }
1962  }
1963
1964  /**
1965   * Attempts to undeploy a region from a region server based in information in META. Any operations
1966   * that modify the file system should make sure that its corresponding region is not deployed to
1967   * prevent data races. A separate call is required to update the master in-memory region state
1968   * kept in the AssignementManager. Because disable uses this state instead of that found in META,
1969   * we can't seem to cleanly disable/delete tables that have been hbck fixed. When used on a
1970   * version of HBase that does not have the offline ipc call exposed on the master (&lt;0.90.5,
1971   * &lt;0.92.0) a master restart or failover may be required.
1972   */
1973  void closeRegion(HbckRegionInfo hi) throws IOException, InterruptedException {
1974    if (hi.getMetaEntry() == null && hi.getHdfsEntry() == null) {
1975      undeployRegions(hi);
1976      return;
1977    }
1978
1979    // get assignment info and hregioninfo from meta.
1980    Get get = new Get(hi.getRegionName());
1981    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1982    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1983    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1984    // also get the locations of the replicas to close if the primary region is being closed
1985    if (hi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1986      int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
1987      for (int i = 0; i < numReplicas; i++) {
1988        get.addColumn(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(i));
1989        get.addColumn(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getStartCodeColumn(i));
1990      }
1991    }
1992    Result r = meta.get(get);
1993    RegionLocations rl = CatalogFamilyFormat.getRegionLocations(r);
1994    if (rl == null) {
1995      LOG.warn("Unable to close region " + hi.getRegionNameAsString()
1996        + " since meta does not have handle to reach it");
1997      return;
1998    }
1999    for (HRegionLocation h : rl.getRegionLocations()) {
2000      ServerName serverName = h.getServerName();
2001      if (serverName == null) {
2002        errors.reportError("Unable to close region " + hi.getRegionNameAsString()
2003          + " because meta does not " + "have handle to reach it.");
2004        continue;
2005      }
2006      RegionInfo hri = h.getRegion();
2007      if (hri == null) {
2008        LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2009          + " because hbase:meta had invalid or missing " + HConstants.CATALOG_FAMILY_STR + ":"
2010          + Bytes.toString(HConstants.REGIONINFO_QUALIFIER) + " qualifier value.");
2011        continue;
2012      }
2013      // close the region -- close files and remove assignment
2014      HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2015    }
2016  }
2017
2018  private void undeployRegions(HbckRegionInfo hi) throws IOException, InterruptedException {
2019    undeployRegionsForHbi(hi);
2020    // undeploy replicas of the region (but only if the method is invoked for the primary)
2021    if (hi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2022      return;
2023    }
2024    int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2025    for (int i = 1; i < numReplicas; i++) {
2026      if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2027      RegionInfo hri =
2028        RegionReplicaUtil.getRegionInfoForReplica(hi.getPrimaryHRIForDeployedReplica(), i);
2029      HbckRegionInfo h = regionInfoMap.get(hri.getEncodedName());
2030      if (h != null) {
2031        undeployRegionsForHbi(h);
2032        // set skip checks; we undeployed it, and we don't want to evaluate this anymore
2033        // in consistency checks
2034        h.setSkipChecks(true);
2035      }
2036    }
2037  }
2038
2039  private void undeployRegionsForHbi(HbckRegionInfo hi) throws IOException, InterruptedException {
2040    for (HbckRegionInfo.OnlineEntry rse : hi.getOnlineEntries()) {
2041      LOG.debug("Undeploy region " + rse.getRegionInfo() + " from " + rse.getServerName());
2042      try {
2043        HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.getServerName(),
2044          rse.getRegionInfo());
2045        offline(rse.getRegionInfo().getRegionName());
2046      } catch (IOException ioe) {
2047        LOG.warn("Got exception when attempting to offline region "
2048          + Bytes.toString(rse.getRegionInfo().getRegionName()), ioe);
2049      }
2050    }
2051  }
2052
2053  private void tryAssignmentRepair(HbckRegionInfo hbi, String msg)
2054    throws IOException, KeeperException, InterruptedException {
2055    // If we are trying to fix the errors
2056    if (shouldFixAssignments()) {
2057      errors.print(msg);
2058      undeployRegions(hbi);
2059      setShouldRerun();
2060      RegionInfo hri = hbi.getHdfsHRI();
2061      if (hri == null) {
2062        hri = hbi.getMetaEntry().getRegionInfo();
2063      }
2064      HBaseFsckRepair.fixUnassigned(admin, hri);
2065      HBaseFsckRepair.waitUntilAssigned(admin, hri);
2066
2067      // also assign replicas if needed (do it only when this call operates on a primary replica)
2068      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) return;
2069      int replicationCount = admin.getDescriptor(hri.getTable()).getRegionReplication();
2070      for (int i = 1; i < replicationCount; i++) {
2071        hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2072        HbckRegionInfo h = regionInfoMap.get(hri.getEncodedName());
2073        if (h != null) {
2074          undeployRegions(h);
2075          // set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2076          // in consistency checks
2077          h.setSkipChecks(true);
2078        }
2079        HBaseFsckRepair.fixUnassigned(admin, hri);
2080        HBaseFsckRepair.waitUntilAssigned(admin, hri);
2081      }
2082
2083    }
2084  }
2085
2086  /**
2087   * Check a single region for consistency and correct deployment.
2088   */
2089  private void checkRegionConsistency(final String key, final HbckRegionInfo hbi)
2090    throws IOException, KeeperException, InterruptedException {
2091
2092    if (hbi.isSkipChecks()) return;
2093    String descriptiveName = hbi.toString();
2094    boolean inMeta = hbi.getMetaEntry() != null;
2095    // In case not checking HDFS, assume the region is on HDFS
2096    boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2097    boolean hasMetaAssignment = inMeta && hbi.getMetaEntry().regionServer != null;
2098    boolean isDeployed = !hbi.getDeployedOn().isEmpty();
2099    boolean isMultiplyDeployed = hbi.getDeployedOn().size() > 1;
2100    boolean deploymentMatchesMeta = hasMetaAssignment && isDeployed && !isMultiplyDeployed
2101      && hbi.getMetaEntry().regionServer.equals(hbi.getDeployedOn().get(0));
2102    boolean splitParent = inMeta && hbi.getMetaEntry().getRegionInfo().isSplit()
2103      && hbi.getMetaEntry().getRegionInfo().isOffline();
2104    boolean shouldBeDeployed =
2105      inMeta && !isTableDisabled(hbi.getMetaEntry().getRegionInfo().getTable());
2106    boolean recentlyModified =
2107      inHdfs && hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2108
2109    // ========== First the healthy cases =============
2110    if (hbi.containsOnlyHdfsEdits()) {
2111      return;
2112    }
2113    if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2114      return;
2115    } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2116      LOG.info("Region " + descriptiveName + " is in META, and in a disabled "
2117        + "tabled that is not deployed");
2118      return;
2119    } else if (recentlyModified) {
2120      LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2121      return;
2122    }
2123    // ========== Cases where the region is not in hbase:meta =============
2124    else if (!inMeta && !inHdfs && !isDeployed) {
2125      // We shouldn't have record of this region at all then!
2126      assert false : "Entry for region with no data";
2127    } else if (!inMeta && !inHdfs && isDeployed) {
2128      errors.reportError(ERROR_CODE.NOT_IN_META_HDFS,
2129        "Region " + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but "
2130          + "deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2131      if (shouldFixAssignments()) {
2132        undeployRegions(hbi);
2133      }
2134
2135    } else if (!inMeta && inHdfs && !isDeployed) {
2136      if (hbi.isMerged()) {
2137        // This region has already been merged, the remaining hdfs file will be
2138        // cleaned by CatalogJanitor later
2139        hbi.setSkipChecks(true);
2140        LOG.info("Region " + descriptiveName
2141          + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2142        return;
2143      }
2144      errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region " + descriptiveName
2145        + " on HDFS, but not listed in hbase:meta " + "or deployed on any region server");
2146      // restore region consistency of an adopted orphan
2147      if (shouldFixMeta()) {
2148        if (!hbi.isHdfsRegioninfoPresent()) {
2149          LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2150            + " in table integrity repair phase if -fixHdfsOrphans was" + " used.");
2151          return;
2152        }
2153
2154        RegionInfo hri = hbi.getHdfsHRI();
2155        HbckTableInfo tableInfo = tablesInfo.get(hri.getTable());
2156
2157        for (RegionInfo region : tableInfo.getRegionsFromMeta(this.regionInfoMap)) {
2158          if (
2159            Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2160              && (region.getEndKey().length == 0
2161                || Bytes.compareTo(region.getEndKey(), hri.getEndKey()) >= 0)
2162              && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0
2163          ) {
2164            if (region.isSplit() || region.isOffline()) continue;
2165            Path regionDir = hbi.getHdfsRegionDir();
2166            FileSystem fs = regionDir.getFileSystem(getConf());
2167            List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2168            for (Path familyDir : familyDirs) {
2169              List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2170              for (Path referenceFilePath : referenceFilePaths) {
2171                Path parentRegionDir =
2172                  StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2173                if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2174                  LOG.warn(hri + " start and stop keys are in the range of " + region
2175                    + ". The region might not be cleaned up from hdfs when region " + region
2176                    + " split failed. Hence deleting from hdfs.");
2177                  HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs, regionDir.getParent(),
2178                    hri);
2179                  return;
2180                }
2181              }
2182            }
2183          }
2184        }
2185        LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2186        int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
2187        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2188          admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet(),
2189          numReplicas);
2190
2191        tryAssignmentRepair(hbi, "Trying to reassign region...");
2192      }
2193
2194    } else if (!inMeta && inHdfs && isDeployed) {
2195      errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2196        + " not in META, but deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2197      debugLsr(hbi.getHdfsRegionDir());
2198      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2199        // for replicas, this means that we should undeploy the region (we would have
2200        // gone over the primaries and fixed meta holes in first phase under
2201        // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2202        // this stage unless unwanted replica)
2203        if (shouldFixAssignments()) {
2204          undeployRegionsForHbi(hbi);
2205        }
2206      }
2207      if (shouldFixMeta() && hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2208        if (!hbi.isHdfsRegioninfoPresent()) {
2209          LOG.error("This should have been repaired in table integrity repair phase");
2210          return;
2211        }
2212
2213        LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2214        int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
2215        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2216          admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet(),
2217          numReplicas);
2218        tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2219      }
2220
2221      // ========== Cases where the region is in hbase:meta =============
2222    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2223      // check whether this is an actual error, or just transient state where parent
2224      // is not cleaned
2225      if (hbi.getMetaEntry().splitA != null && hbi.getMetaEntry().splitB != null) {
2226        // check that split daughters are there
2227        HbckRegionInfo infoA = this.regionInfoMap.get(hbi.getMetaEntry().splitA.getEncodedName());
2228        HbckRegionInfo infoB = this.regionInfoMap.get(hbi.getMetaEntry().splitB.getEncodedName());
2229        if (infoA != null && infoB != null) {
2230          // we already processed or will process daughters. Move on, nothing to see here.
2231          hbi.setSkipChecks(true);
2232          return;
2233        }
2234      }
2235
2236      // For Replica region, we need to do a similar check. If replica is not split successfully,
2237      // error is going to be reported against primary daughter region.
2238      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2239        LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, "
2240          + "and not deployed on any region server. This may be transient.");
2241        hbi.setSkipChecks(true);
2242        return;
2243      }
2244
2245      errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT,
2246        "Region " + descriptiveName + " is a split parent in META, in HDFS, "
2247          + "and not deployed on any region server. This could be transient, "
2248          + "consider to run the catalog janitor first!");
2249      if (shouldFixSplitParents()) {
2250        setShouldRerun();
2251        resetSplitParent(hbi);
2252      }
2253    } else if (inMeta && !inHdfs && !isDeployed) {
2254      errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region " + descriptiveName
2255        + " found in META, but not in HDFS " + "or deployed on any region server.");
2256      if (shouldFixMeta()) {
2257        deleteMetaRegion(hbi);
2258      }
2259    } else if (inMeta && !inHdfs && isDeployed) {
2260      errors.reportError(ERROR_CODE.NOT_IN_HDFS,
2261        "Region " + descriptiveName + " found in META, but not in HDFS, " + "and deployed on "
2262          + Joiner.on(", ").join(hbi.getDeployedOn()));
2263      // We treat HDFS as ground truth. Any information in meta is transient
2264      // and equivalent data can be regenerated. So, lets unassign and remove
2265      // these problems from META.
2266      if (shouldFixAssignments()) {
2267        errors.print("Trying to fix unassigned region...");
2268        undeployRegions(hbi);
2269      }
2270      if (shouldFixMeta()) {
2271        // wait for it to complete
2272        deleteMetaRegion(hbi);
2273      }
2274    } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2275      errors.reportError(ERROR_CODE.NOT_DEPLOYED,
2276        "Region " + descriptiveName + " not deployed on any region server.");
2277      tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2278    } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2279      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2280        "Region " + descriptiveName + " should not be deployed according "
2281          + "to META, but is deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2282      if (shouldFixAssignments()) {
2283        errors.print("Trying to close the region " + descriptiveName);
2284        setShouldRerun();
2285        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
2286          hbi.getDeployedOn());
2287      }
2288    } else if (inMeta && inHdfs && isMultiplyDeployed) {
2289      errors.reportError(ERROR_CODE.MULTI_DEPLOYED,
2290        "Region " + descriptiveName + " is listed in hbase:meta on region server "
2291          + hbi.getMetaEntry().regionServer + " but is multiply assigned to region servers "
2292          + Joiner.on(", ").join(hbi.getDeployedOn()));
2293      // If we are trying to fix the errors
2294      if (shouldFixAssignments()) {
2295        errors.print("Trying to fix assignment error...");
2296        setShouldRerun();
2297        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
2298          hbi.getDeployedOn());
2299      }
2300    } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2301      errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META,
2302        "Region " + descriptiveName + " listed in hbase:meta on region server "
2303          + hbi.getMetaEntry().regionServer + " but found on region server "
2304          + hbi.getDeployedOn().get(0));
2305      // If we are trying to fix the errors
2306      if (shouldFixAssignments()) {
2307        errors.print("Trying to fix assignment error...");
2308        setShouldRerun();
2309        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
2310          hbi.getDeployedOn());
2311        HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2312      }
2313    } else {
2314      errors.reportError(ERROR_CODE.UNKNOWN,
2315        "Region " + descriptiveName + " is in an unforeseen state:" + " inMeta=" + inMeta
2316          + " inHdfs=" + inHdfs + " isDeployed=" + isDeployed + " isMultiplyDeployed="
2317          + isMultiplyDeployed + " deploymentMatchesMeta=" + deploymentMatchesMeta
2318          + " shouldBeDeployed=" + shouldBeDeployed);
2319    }
2320  }
2321
2322  /**
2323   * Checks tables integrity. Goes over all regions and scans the tables. Collects all the pieces
2324   * for each table and checks if there are missing, repeated or overlapping ones.
2325   */
2326  SortedMap<TableName, HbckTableInfo> checkIntegrity() throws IOException {
2327    tablesInfo = new TreeMap<>();
2328    LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2329    for (HbckRegionInfo hbi : regionInfoMap.values()) {
2330      // Check only valid, working regions
2331      if (hbi.getMetaEntry() == null) {
2332        // this assumes that consistency check has run loadMetaEntry
2333        Path p = hbi.getHdfsRegionDir();
2334        if (p == null) {
2335          errors.report("No regioninfo in Meta or HDFS. " + hbi);
2336        }
2337
2338        // TODO test.
2339        continue;
2340      }
2341      if (hbi.getMetaEntry().regionServer == null) {
2342        errors.detail("Skipping region because no region server: " + hbi);
2343        continue;
2344      }
2345      if (hbi.getMetaEntry().getRegionInfo().isOffline()) {
2346        errors.detail("Skipping region because it is offline: " + hbi);
2347        continue;
2348      }
2349      if (hbi.containsOnlyHdfsEdits()) {
2350        errors.detail("Skipping region because it only contains edits" + hbi);
2351        continue;
2352      }
2353
2354      // Missing regionDir or over-deployment is checked elsewhere. Include
2355      // these cases in modTInfo, so we can evaluate those regions as part of
2356      // the region chain in META
2357      // if (hbi.foundRegionDir == null) continue;
2358      // if (hbi.deployedOn.size() != 1) continue;
2359      if (hbi.getDeployedOn().isEmpty()) {
2360        continue;
2361      }
2362
2363      // We should be safe here
2364      TableName tableName = hbi.getMetaEntry().getRegionInfo().getTable();
2365      HbckTableInfo modTInfo = tablesInfo.get(tableName);
2366      if (modTInfo == null) {
2367        modTInfo = new HbckTableInfo(tableName, this);
2368      }
2369      for (ServerName server : hbi.getDeployedOn()) {
2370        modTInfo.addServer(server);
2371      }
2372
2373      if (!hbi.isSkipChecks()) {
2374        modTInfo.addRegionInfo(hbi);
2375      }
2376
2377      tablesInfo.put(tableName, modTInfo);
2378    }
2379
2380    loadTableInfosForTablesWithNoRegion();
2381
2382    logParallelMerge();
2383    for (HbckTableInfo tInfo : tablesInfo.values()) {
2384      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2385      if (!tInfo.checkRegionChain(handler)) {
2386        errors.report("Found inconsistency in table " + tInfo.getName());
2387      }
2388    }
2389    return tablesInfo;
2390  }
2391
2392  /**
2393   * Loads table info's for tables that may not have been included, since there are no regions
2394   * reported for the table, but table dir is there in hdfs
2395   */
2396  private void loadTableInfosForTablesWithNoRegion() throws IOException {
2397    Map<String, TableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2398    for (TableDescriptor htd : allTables.values()) {
2399      if (checkMetaOnly && !htd.isMetaTable()) {
2400        continue;
2401      }
2402
2403      TableName tableName = htd.getTableName();
2404      if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2405        HbckTableInfo tableInfo = new HbckTableInfo(tableName, this);
2406        tableInfo.htds.add(htd);
2407        tablesInfo.put(htd.getTableName(), tableInfo);
2408      }
2409    }
2410  }
2411
2412  /**
2413   * Merge hdfs data by moving from contained HbckRegionInfo into targetRegionDir.
2414   * @return number of file move fixes done to merge regions.
2415   */
2416  public int mergeRegionDirs(Path targetRegionDir, HbckRegionInfo contained) throws IOException {
2417    int fileMoves = 0;
2418    String thread = Thread.currentThread().getName();
2419    LOG.debug("[" + thread + "] Contained region dir after close and pause");
2420    debugLsr(contained.getHdfsRegionDir());
2421
2422    // rename the contained into the container.
2423    FileSystem fs = targetRegionDir.getFileSystem(getConf());
2424    FileStatus[] dirs = null;
2425    try {
2426      dirs = fs.listStatus(contained.getHdfsRegionDir());
2427    } catch (FileNotFoundException fnfe) {
2428      // region we are attempting to merge in is not present! Since this is a merge, there is
2429      // no harm skipping this region if it does not exist.
2430      if (!fs.exists(contained.getHdfsRegionDir())) {
2431        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2432          + " is missing. Assuming already sidelined or moved.");
2433      } else {
2434        sidelineRegionDir(fs, contained);
2435      }
2436      return fileMoves;
2437    }
2438
2439    if (dirs == null) {
2440      if (!fs.exists(contained.getHdfsRegionDir())) {
2441        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2442          + " already sidelined.");
2443      } else {
2444        sidelineRegionDir(fs, contained);
2445      }
2446      return fileMoves;
2447    }
2448
2449    for (FileStatus cf : dirs) {
2450      Path src = cf.getPath();
2451      Path dst = new Path(targetRegionDir, src.getName());
2452
2453      if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2454        // do not copy the old .regioninfo file.
2455        continue;
2456      }
2457
2458      if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2459        // do not copy the .oldlogs files
2460        continue;
2461      }
2462
2463      LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2464      // FileSystem.rename is inconsistent with directories -- if the
2465      // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2466      // it moves the src into the dst dir resulting in (foo/a/b). If
2467      // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2468      for (FileStatus hfile : fs.listStatus(src)) {
2469        boolean success = fs.rename(hfile.getPath(), dst);
2470        if (success) {
2471          fileMoves++;
2472        }
2473      }
2474      LOG.debug("[" + thread + "] Sideline directory contents:");
2475      debugLsr(targetRegionDir);
2476    }
2477
2478    // if all success.
2479    sidelineRegionDir(fs, contained);
2480    LOG.info("[" + thread + "] Sidelined region dir " + contained.getHdfsRegionDir() + " into "
2481      + getSidelineDir());
2482    debugLsr(contained.getHdfsRegionDir());
2483
2484    return fileMoves;
2485  }
2486
2487  static class WorkItemOverlapMerge implements Callable<Void> {
2488    private TableIntegrityErrorHandler handler;
2489    Collection<HbckRegionInfo> overlapgroup;
2490
2491    WorkItemOverlapMerge(Collection<HbckRegionInfo> overlapgroup,
2492      TableIntegrityErrorHandler handler) {
2493      this.handler = handler;
2494      this.overlapgroup = overlapgroup;
2495    }
2496
2497    @Override
2498    public Void call() throws Exception {
2499      handler.handleOverlapGroup(overlapgroup);
2500      return null;
2501    }
2502  }
2503
2504  /**
2505   * Return a list of user-space table names whose metadata have not been modified in the last few
2506   * milliseconds specified by timelag if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER,
2507   * STARTCODE_QUALIFIER, SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
2508   * milliseconds specified by timelag, then the table is a candidate to be returned.
2509   * @return tables that have not been modified recently
2510   * @throws IOException if an error is encountered
2511   */
2512  TableDescriptor[] getTables(AtomicInteger numSkipped) {
2513    List<TableName> tableNames = new ArrayList<>();
2514    long now = EnvironmentEdgeManager.currentTime();
2515
2516    for (HbckRegionInfo hbi : regionInfoMap.values()) {
2517      HbckRegionInfo.MetaEntry info = hbi.getMetaEntry();
2518
2519      // if the start key is zero, then we have found the first region of a table.
2520      // pick only those tables that were not modified in the last few milliseconds.
2521      if (
2522        info != null && info.getRegionInfo().getStartKey().length == 0
2523          && !info.getRegionInfo().isMetaRegion()
2524      ) {
2525        if (info.modTime + timelag < now) {
2526          tableNames.add(info.getRegionInfo().getTable());
2527        } else {
2528          numSkipped.incrementAndGet(); // one more in-flux table
2529        }
2530      }
2531    }
2532    return getTableDescriptors(tableNames);
2533  }
2534
2535  TableDescriptor[] getTableDescriptors(List<TableName> tableNames) {
2536    LOG.info("getTableDescriptors == tableNames => " + tableNames);
2537    try (Connection conn = ConnectionFactory.createConnection(getConf());
2538      Admin admin = conn.getAdmin()) {
2539      List<TableDescriptor> tds = admin.listTableDescriptors(tableNames);
2540      return tds.toArray(new TableDescriptor[tds.size()]);
2541    } catch (IOException e) {
2542      LOG.debug("Exception getting table descriptors", e);
2543    }
2544    return new TableDescriptor[0];
2545  }
2546
2547  /**
2548   * Gets the entry in regionInfo corresponding to the the given encoded region name. If the region
2549   * has not been seen yet, a new entry is added and returned.
2550   */
2551  private synchronized HbckRegionInfo getOrCreateInfo(String name) {
2552    HbckRegionInfo hbi = regionInfoMap.get(name);
2553    if (hbi == null) {
2554      hbi = new HbckRegionInfo(null);
2555      regionInfoMap.put(name, hbi);
2556    }
2557    return hbi;
2558  }
2559
2560  private void checkAndFixReplication() throws ReplicationException {
2561    ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, errors);
2562    checker.checkUnDeletedQueues();
2563
2564    if (checker.hasUnDeletedQueues() && this.fixReplication) {
2565      checker.fixUnDeletedQueues();
2566      setShouldRerun();
2567    }
2568  }
2569
2570  /**
2571   * Check values in regionInfo for hbase:meta Check if zero or more than one regions with
2572   * hbase:meta are found. If there are inconsistencies (i.e. zero or more than one regions pretend
2573   * to be holding the hbase:meta) try to fix that and report an error.
2574   * @throws IOException from HBaseFsckRepair functions
2575   */
2576  boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
2577    Map<Integer, HbckRegionInfo> metaRegions = new HashMap<>();
2578    for (HbckRegionInfo value : regionInfoMap.values()) {
2579      if (value.getMetaEntry() != null && value.getMetaEntry().getRegionInfo().isMetaRegion()) {
2580        metaRegions.put(value.getReplicaId(), value);
2581      }
2582    }
2583    int metaReplication = admin.getDescriptor(TableName.META_TABLE_NAME).getRegionReplication();
2584    boolean noProblem = true;
2585    // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
2586    // Check the deployed servers. It should be exactly one server for each replica.
2587    for (int i = 0; i < metaReplication; i++) {
2588      HbckRegionInfo metaHbckRegionInfo = metaRegions.remove(i);
2589      List<ServerName> servers = new ArrayList<>();
2590      if (metaHbckRegionInfo != null) {
2591        servers = metaHbckRegionInfo.getDeployedOn();
2592      }
2593      if (servers.size() != 1) {
2594        noProblem = false;
2595        if (servers.isEmpty()) {
2596          assignMetaReplica(i);
2597        } else if (servers.size() > 1) {
2598          errors.reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId "
2599            + metaHbckRegionInfo.getReplicaId() + " is found on more than one region.");
2600          if (shouldFixAssignments()) {
2601            errors.print("Trying to fix a problem with hbase:meta, replicaId "
2602              + metaHbckRegionInfo.getReplicaId() + "..");
2603            setShouldRerun();
2604            // try fix it (treat is a dupe assignment)
2605            HBaseFsckRepair.fixMultiAssignment(connection,
2606              metaHbckRegionInfo.getMetaEntry().getRegionInfo(), servers);
2607          }
2608        }
2609      }
2610    }
2611    // unassign whatever is remaining in metaRegions. They are excess replicas.
2612    for (Map.Entry<Integer, HbckRegionInfo> entry : metaRegions.entrySet()) {
2613      noProblem = false;
2614      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2615        "hbase:meta replicas are deployed in excess. Configured " + metaReplication + ", deployed "
2616          + metaRegions.size());
2617      if (shouldFixAssignments()) {
2618        errors.print(
2619          "Trying to undeploy excess replica, replicaId: " + entry.getKey() + " of hbase:meta..");
2620        setShouldRerun();
2621        unassignMetaReplica(entry.getValue());
2622      }
2623    }
2624    // if noProblem is false, rerun hbck with hopefully fixed META
2625    // if noProblem is true, no errors, so continue normally
2626    return noProblem;
2627  }
2628
2629  private void unassignMetaReplica(HbckRegionInfo hi)
2630    throws IOException, InterruptedException, KeeperException {
2631    undeployRegions(hi);
2632    ZKUtil.deleteNode(zkw,
2633      zkw.getZNodePaths().getZNodeForReplica(hi.getMetaEntry().getRegionInfo().getReplicaId()));
2634  }
2635
2636  private void assignMetaReplica(int replicaId)
2637    throws IOException, KeeperException, InterruptedException {
2638    errors.reportError(ERROR_CODE.NO_META_REGION,
2639      "hbase:meta, replicaId " + replicaId + " is not found on any region.");
2640    if (shouldFixAssignments()) {
2641      errors.print("Trying to fix a problem with hbase:meta..");
2642      setShouldRerun();
2643      // try to fix it (treat it as unassigned region)
2644      RegionInfo h = RegionReplicaUtil
2645        .getRegionInfoForReplica(RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId);
2646      HBaseFsckRepair.fixUnassigned(admin, h);
2647      HBaseFsckRepair.waitUntilAssigned(admin, h);
2648    }
2649  }
2650
2651  /**
2652   * Scan hbase:meta, adding all regions found to the regionInfo map.
2653   * @throws IOException if an error is encountered
2654   */
2655  boolean loadMetaEntries() throws IOException {
2656    ClientMetaTableAccessor.Visitor visitor = new ClientMetaTableAccessor.Visitor() {
2657      int countRecord = 1;
2658
2659      // comparator to sort KeyValues with latest modtime
2660      final Comparator<Cell> comp = new Comparator<Cell>() {
2661        @Override
2662        public int compare(Cell k1, Cell k2) {
2663          return Long.compare(k1.getTimestamp(), k2.getTimestamp());
2664        }
2665      };
2666
2667      @Override
2668      public boolean visit(Result result) throws IOException {
2669        try {
2670
2671          // record the latest modification of this META record
2672          long ts = Collections.max(result.listCells(), comp).getTimestamp();
2673          RegionLocations rl = CatalogFamilyFormat.getRegionLocations(result);
2674          if (rl == null) {
2675            emptyRegionInfoQualifiers.add(result);
2676            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2677              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
2678            return true;
2679          }
2680          ServerName sn = null;
2681          if (
2682            rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID) == null
2683              || rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegion() == null
2684          ) {
2685            emptyRegionInfoQualifiers.add(result);
2686            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2687              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
2688            return true;
2689          }
2690          RegionInfo hri = rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegion();
2691          if (!(isTableIncluded(hri.getTable()) || hri.isMetaRegion())) {
2692            return true;
2693          }
2694          PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(result);
2695          for (HRegionLocation h : rl.getRegionLocations()) {
2696            if (h == null || h.getRegion() == null) {
2697              continue;
2698            }
2699            sn = h.getServerName();
2700            hri = h.getRegion();
2701
2702            HbckRegionInfo.MetaEntry m = null;
2703            if (hri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2704              m = new HbckRegionInfo.MetaEntry(hri, sn, ts, daughters.getFirst(),
2705                daughters.getSecond());
2706            } else {
2707              m = new HbckRegionInfo.MetaEntry(hri, sn, ts, null, null);
2708            }
2709            HbckRegionInfo previous = regionInfoMap.get(hri.getEncodedName());
2710            if (previous == null) {
2711              regionInfoMap.put(hri.getEncodedName(), new HbckRegionInfo(m));
2712            } else if (previous.getMetaEntry() == null) {
2713              previous.setMetaEntry(m);
2714            } else {
2715              throw new IOException("Two entries in hbase:meta are same " + previous);
2716            }
2717          }
2718          List<RegionInfo> mergeParents = CatalogFamilyFormat.getMergeRegions(result.rawCells());
2719          if (mergeParents != null) {
2720            for (RegionInfo mergeRegion : mergeParents) {
2721              if (mergeRegion != null) {
2722                // This region is already being merged
2723                HbckRegionInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
2724                hbInfo.setMerged(true);
2725              }
2726            }
2727          }
2728
2729          // show proof of progress to the user, once for every 100 records.
2730          if (countRecord % 100 == 0) {
2731            errors.progress();
2732          }
2733          countRecord++;
2734          return true;
2735        } catch (RuntimeException e) {
2736          LOG.error("Result=" + result);
2737          throw e;
2738        }
2739      }
2740    };
2741    if (!checkMetaOnly) {
2742      // Scan hbase:meta to pick up user regions
2743      MetaTableAccessor.fullScanRegions(connection, visitor);
2744    }
2745
2746    errors.print("");
2747    return true;
2748  }
2749
2750  /**
2751   * Prints summary of all tables found on the system.
2752   */
2753  private void printTableSummary(SortedMap<TableName, HbckTableInfo> tablesInfo) {
2754    StringBuilder sb = new StringBuilder();
2755    int numOfSkippedRegions;
2756    errors.print("Summary:");
2757    for (HbckTableInfo tInfo : tablesInfo.values()) {
2758      numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName()))
2759        ? skippedRegions.get(tInfo.getName()).size()
2760        : 0;
2761
2762      if (errors.tableHasErrors(tInfo)) {
2763        errors.print("Table " + tInfo.getName() + " is inconsistent.");
2764      } else if (numOfSkippedRegions > 0) {
2765        errors.print("Table " + tInfo.getName() + " is okay (with " + numOfSkippedRegions
2766          + " skipped regions).");
2767      } else {
2768        errors.print("Table " + tInfo.getName() + " is okay.");
2769      }
2770      errors.print("    Number of regions: " + tInfo.getNumRegions());
2771      if (numOfSkippedRegions > 0) {
2772        Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
2773        System.out.println("    Number of skipped regions: " + numOfSkippedRegions);
2774        System.out.println("      List of skipped regions:");
2775        for (String sr : skippedRegionStrings) {
2776          System.out.println("        " + sr);
2777        }
2778      }
2779      sb.setLength(0); // clear out existing buffer, if any.
2780      sb.append("    Deployed on: ");
2781      for (ServerName server : tInfo.deployedOn) {
2782        sb.append(" " + server.toString());
2783      }
2784      errors.print(sb.toString());
2785    }
2786  }
2787
2788  static HbckErrorReporter getErrorReporter(final Configuration conf)
2789    throws ClassNotFoundException {
2790    Class<? extends HbckErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter",
2791      PrintingErrorReporter.class, HbckErrorReporter.class);
2792    return ReflectionUtils.newInstance(reporter, conf);
2793  }
2794
2795  static class PrintingErrorReporter implements HbckErrorReporter {
2796    public int errorCount = 0;
2797    private int showProgress;
2798    // How frequently calls to progress() will create output
2799    private static final int progressThreshold = 100;
2800
2801    Set<HbckTableInfo> errorTables = new HashSet<>();
2802
2803    // for use by unit tests to verify which errors were discovered
2804    private ArrayList<ERROR_CODE> errorList = new ArrayList<>();
2805
2806    @Override
2807    public void clear() {
2808      errorTables.clear();
2809      errorList.clear();
2810      errorCount = 0;
2811    }
2812
2813    @Override
2814    public synchronized void reportError(ERROR_CODE errorCode, String message) {
2815      if (errorCode == ERROR_CODE.WRONG_USAGE) {
2816        System.err.println(message);
2817        return;
2818      }
2819
2820      errorList.add(errorCode);
2821      if (!summary) {
2822        System.out.println("ERROR: " + message);
2823      }
2824      errorCount++;
2825      showProgress = 0;
2826    }
2827
2828    @Override
2829    public synchronized void reportError(ERROR_CODE errorCode, String message,
2830      HbckTableInfo table) {
2831      errorTables.add(table);
2832      reportError(errorCode, message);
2833    }
2834
2835    @Override
2836    public synchronized void reportError(ERROR_CODE errorCode, String message, HbckTableInfo table,
2837      HbckRegionInfo info) {
2838      errorTables.add(table);
2839      String reference = "(region " + info.getRegionNameAsString() + ")";
2840      reportError(errorCode, reference + " " + message);
2841    }
2842
2843    @Override
2844    public synchronized void reportError(ERROR_CODE errorCode, String message, HbckTableInfo table,
2845      HbckRegionInfo info1, HbckRegionInfo info2) {
2846      errorTables.add(table);
2847      String reference =
2848        "(regions " + info1.getRegionNameAsString() + " and " + info2.getRegionNameAsString() + ")";
2849      reportError(errorCode, reference + " " + message);
2850    }
2851
2852    @Override
2853    public synchronized void reportError(String message) {
2854      reportError(ERROR_CODE.UNKNOWN, message);
2855    }
2856
2857    /**
2858     * Report error information, but do not increment the error count. Intended for cases where the
2859     * actual error would have been reported previously.
2860     */
2861    @Override
2862    public synchronized void report(String message) {
2863      if (!summary) {
2864        System.out.println("ERROR: " + message);
2865      }
2866      showProgress = 0;
2867    }
2868
2869    @Override
2870    public synchronized int summarize() {
2871      System.out.println(Integer.toString(errorCount) + " inconsistencies detected.");
2872      if (errorCount == 0) {
2873        System.out.println("Status: OK");
2874        return 0;
2875      } else {
2876        System.out.println("Status: INCONSISTENT");
2877        return -1;
2878      }
2879    }
2880
2881    @Override
2882    public ArrayList<ERROR_CODE> getErrorList() {
2883      return errorList;
2884    }
2885
2886    @Override
2887    public synchronized void print(String message) {
2888      if (!summary) {
2889        System.out.println(message);
2890      }
2891    }
2892
2893    @Override
2894    public boolean tableHasErrors(HbckTableInfo table) {
2895      return errorTables.contains(table);
2896    }
2897
2898    @Override
2899    public void resetErrors() {
2900      errorCount = 0;
2901    }
2902
2903    @Override
2904    public synchronized void detail(String message) {
2905      if (details) {
2906        System.out.println(message);
2907      }
2908      showProgress = 0;
2909    }
2910
2911    @Override
2912    public synchronized void progress() {
2913      if (showProgress++ == progressThreshold) {
2914        if (!summary) {
2915          System.out.print(".");
2916        }
2917        showProgress = 0;
2918      }
2919    }
2920  }
2921
2922  /**
2923   * Contact a region server and get all information from it
2924   */
2925  static class WorkItemRegion implements Callable<Void> {
2926    private final HBaseFsck hbck;
2927    private final ServerName rsinfo;
2928    private final HbckErrorReporter errors;
2929    private final Connection connection;
2930
2931    WorkItemRegion(HBaseFsck hbck, ServerName info, HbckErrorReporter errors,
2932      Connection connection) {
2933      this.hbck = hbck;
2934      this.rsinfo = info;
2935      this.errors = errors;
2936      this.connection = connection;
2937    }
2938
2939    @Override
2940    public synchronized Void call() throws IOException {
2941      errors.progress();
2942      try {
2943        // list all online regions from this region server
2944        List<RegionInfo> regions = connection.getAdmin().getRegions(rsinfo);
2945        regions = filterRegions(regions);
2946
2947        if (details) {
2948          errors.detail(
2949            "RegionServer: " + rsinfo.getServerName() + " number of regions: " + regions.size());
2950          for (RegionInfo rinfo : regions) {
2951            errors.detail("  " + rinfo.getRegionNameAsString() + " id: " + rinfo.getRegionId()
2952              + " encoded_name: " + rinfo.getEncodedName() + " start: "
2953              + Bytes.toStringBinary(rinfo.getStartKey()) + " end: "
2954              + Bytes.toStringBinary(rinfo.getEndKey()));
2955          }
2956        }
2957
2958        // check to see if the existence of this region matches the region in META
2959        for (RegionInfo r : regions) {
2960          HbckRegionInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
2961          hbi.addServer(r, rsinfo);
2962        }
2963      } catch (IOException e) { // unable to connect to the region server.
2964        errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE,
2965          "RegionServer: " + rsinfo.getServerName() + " Unable to fetch region information. " + e);
2966        throw e;
2967      }
2968      return null;
2969    }
2970
2971    private List<RegionInfo> filterRegions(List<RegionInfo> regions) {
2972      List<RegionInfo> ret = Lists.newArrayList();
2973      for (RegionInfo hri : regions) {
2974        if (hri.isMetaRegion() || (!hbck.checkMetaOnly && hbck.isTableIncluded(hri.getTable()))) {
2975          ret.add(hri);
2976        }
2977      }
2978      return ret;
2979    }
2980  }
2981
2982  /**
2983   * Contact hdfs and get all information about specified table directory into regioninfo list.
2984   */
2985  class WorkItemHdfsDir implements Callable<Void> {
2986    private FileStatus tableDir;
2987    private HbckErrorReporter errors;
2988    private FileSystem fs;
2989
2990    WorkItemHdfsDir(FileSystem fs, HbckErrorReporter errors, FileStatus status) {
2991      this.fs = fs;
2992      this.tableDir = status;
2993      this.errors = errors;
2994    }
2995
2996    @Override
2997    public synchronized Void call() throws InterruptedException, ExecutionException {
2998      final Vector<Exception> exceptions = new Vector<>();
2999
3000      try {
3001        final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
3002        final List<Future<?>> futures = new ArrayList<>(regionDirs.length);
3003
3004        for (final FileStatus regionDir : regionDirs) {
3005          errors.progress();
3006          final String encodedName = regionDir.getPath().getName();
3007          // ignore directories that aren't hexadecimal
3008          if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
3009            continue;
3010          }
3011
3012          if (!exceptions.isEmpty()) {
3013            break;
3014          }
3015
3016          futures.add(executor.submit(new Runnable() {
3017            @Override
3018            public void run() {
3019              try {
3020                LOG.debug("Loading region info from hdfs:" + regionDir.getPath());
3021
3022                Path regioninfoFile =
3023                  new Path(regionDir.getPath(), HRegionFileSystem.REGION_INFO_FILE);
3024                boolean regioninfoFileExists = fs.exists(regioninfoFile);
3025
3026                if (!regioninfoFileExists) {
3027                  // As tables become larger it is more and more likely that by the time you
3028                  // reach a given region that it will be gone due to region splits/merges.
3029                  if (!fs.exists(regionDir.getPath())) {
3030                    LOG.warn("By the time we tried to process this region dir it was already gone: "
3031                      + regionDir.getPath());
3032                    return;
3033                  }
3034                }
3035
3036                HbckRegionInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
3037                HbckRegionInfo.HdfsEntry he = new HbckRegionInfo.HdfsEntry();
3038                synchronized (hbi) {
3039                  if (hbi.getHdfsRegionDir() != null) {
3040                    errors
3041                      .print("Directory " + encodedName + " duplicate??" + hbi.getHdfsRegionDir());
3042                  }
3043
3044                  he.regionDir = regionDir.getPath();
3045                  he.regionDirModTime = regionDir.getModificationTime();
3046                  he.hdfsRegioninfoFilePresent = regioninfoFileExists;
3047                  // we add to orphan list when we attempt to read .regioninfo
3048
3049                  // Set a flag if this region contains only edits
3050                  // This is special case if a region is left after split
3051                  he.hdfsOnlyEdits = true;
3052                  FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
3053                  Path ePath = WALSplitUtil.getRegionDirRecoveredEditsDir(regionDir.getPath());
3054                  for (FileStatus subDir : subDirs) {
3055                    errors.progress();
3056                    String sdName = subDir.getPath().getName();
3057                    if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
3058                      he.hdfsOnlyEdits = false;
3059                      break;
3060                    }
3061                  }
3062                  hbi.setHdfsEntry(he);
3063                }
3064              } catch (Exception e) {
3065                LOG.error("Could not load region dir", e);
3066                exceptions.add(e);
3067              }
3068            }
3069          }));
3070        }
3071
3072        // Ensure all pending tasks are complete (or that we run into an exception)
3073        for (Future<?> f : futures) {
3074          if (!exceptions.isEmpty()) {
3075            break;
3076          }
3077          try {
3078            f.get();
3079          } catch (ExecutionException e) {
3080            LOG.error("Unexpected exec exception!  Should've been caught already.  (Bug?)", e);
3081            // Shouldn't happen, we already logged/caught any exceptions in the Runnable
3082          }
3083        }
3084      } catch (IOException e) {
3085        LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
3086        exceptions.add(e);
3087      } finally {
3088        if (!exceptions.isEmpty()) {
3089          errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
3090            + tableDir.getPath().getName() + " Unable to fetch all HDFS region information. ");
3091          // Just throw the first exception as an indication something bad happened
3092          // Don't need to propagate all the exceptions, we already logged them all anyway
3093          throw new ExecutionException("First exception in WorkItemHdfsDir",
3094            exceptions.firstElement());
3095        }
3096      }
3097      return null;
3098    }
3099  }
3100
3101  /**
3102   * Contact hdfs and get all information about specified table directory into regioninfo list.
3103   */
3104  static class WorkItemHdfsRegionInfo implements Callable<Void> {
3105    private HbckRegionInfo hbi;
3106    private HBaseFsck hbck;
3107    private HbckErrorReporter errors;
3108
3109    WorkItemHdfsRegionInfo(HbckRegionInfo hbi, HBaseFsck hbck, HbckErrorReporter errors) {
3110      this.hbi = hbi;
3111      this.hbck = hbck;
3112      this.errors = errors;
3113    }
3114
3115    @Override
3116    public synchronized Void call() throws IOException {
3117      // only load entries that haven't been loaded yet.
3118      if (hbi.getHdfsHRI() == null) {
3119        try {
3120          errors.progress();
3121          hbi.loadHdfsRegioninfo(hbck.getConf());
3122        } catch (IOException ioe) {
3123          String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3124            + hbi.getTableName() + " in hdfs dir " + hbi.getHdfsRegionDir()
3125            + "!  It may be an invalid format or version file.  Treating as "
3126            + "an orphaned regiondir.";
3127          errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3128          try {
3129            hbck.debugLsr(hbi.getHdfsRegionDir());
3130          } catch (IOException ioe2) {
3131            LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3132            throw ioe2;
3133          }
3134          hbck.orphanHdfsDirs.add(hbi);
3135          throw ioe;
3136        }
3137      }
3138      return null;
3139    }
3140  }
3141
3142  /**
3143   * Display the full report from fsck. This displays all live and dead region servers, and all
3144   * known regions.
3145   */
3146  public static void setDisplayFullReport() {
3147    details = true;
3148  }
3149
3150  public static boolean shouldDisplayFullReport() {
3151    return details;
3152  }
3153
3154  /**
3155   * Set exclusive mode.
3156   */
3157  public static void setForceExclusive() {
3158    forceExclusive = true;
3159  }
3160
3161  /**
3162   * Only one instance of hbck can modify HBase at a time.
3163   */
3164  public boolean isExclusive() {
3165    return fixAny || forceExclusive;
3166  }
3167
3168  /**
3169   * Set summary mode. Print only summary of the tables and status (OK or INCONSISTENT)
3170   */
3171  static void setSummary() {
3172    summary = true;
3173  }
3174
3175  /**
3176   * Set hbase:meta check mode. Print only info about hbase:meta table deployment/state
3177   */
3178  void setCheckMetaOnly() {
3179    checkMetaOnly = true;
3180  }
3181
3182  /**
3183   * Set region boundaries check mode.
3184   */
3185  void setRegionBoundariesCheck() {
3186    checkRegionBoundaries = true;
3187  }
3188
3189  /**
3190   * Set replication fix mode.
3191   */
3192  public void setFixReplication(boolean shouldFix) {
3193    fixReplication = shouldFix;
3194    fixAny |= shouldFix;
3195  }
3196
3197  public void setCleanReplicationBarrier(boolean shouldClean) {
3198    cleanReplicationBarrier = shouldClean;
3199  }
3200
3201  /**
3202   * Check if we should rerun fsck again. This checks if we've tried to fix something and we should
3203   * rerun fsck tool again. Display the full report from fsck. This displays all live and dead
3204   * region servers, and all known regions.
3205   */
3206  void setShouldRerun() {
3207    rerun = true;
3208  }
3209
3210  public boolean shouldRerun() {
3211    return rerun;
3212  }
3213
3214  /**
3215   * Fix inconsistencies found by fsck. This should try to fix errors (if any) found by fsck
3216   * utility.
3217   */
3218  public void setFixAssignments(boolean shouldFix) {
3219    fixAssignments = shouldFix;
3220    fixAny |= shouldFix;
3221  }
3222
3223  boolean shouldFixAssignments() {
3224    return fixAssignments;
3225  }
3226
3227  public void setFixMeta(boolean shouldFix) {
3228    fixMeta = shouldFix;
3229    fixAny |= shouldFix;
3230  }
3231
3232  boolean shouldFixMeta() {
3233    return fixMeta;
3234  }
3235
3236  public void setFixEmptyMetaCells(boolean shouldFix) {
3237    fixEmptyMetaCells = shouldFix;
3238    fixAny |= shouldFix;
3239  }
3240
3241  boolean shouldFixEmptyMetaCells() {
3242    return fixEmptyMetaCells;
3243  }
3244
3245  public void setCheckHdfs(boolean checking) {
3246    checkHdfs = checking;
3247  }
3248
3249  boolean shouldCheckHdfs() {
3250    return checkHdfs;
3251  }
3252
3253  public void setFixHdfsHoles(boolean shouldFix) {
3254    fixHdfsHoles = shouldFix;
3255    fixAny |= shouldFix;
3256  }
3257
3258  boolean shouldFixHdfsHoles() {
3259    return fixHdfsHoles;
3260  }
3261
3262  public void setFixTableOrphans(boolean shouldFix) {
3263    fixTableOrphans = shouldFix;
3264    fixAny |= shouldFix;
3265  }
3266
3267  boolean shouldFixTableOrphans() {
3268    return fixTableOrphans;
3269  }
3270
3271  public void setFixHdfsOverlaps(boolean shouldFix) {
3272    fixHdfsOverlaps = shouldFix;
3273    fixAny |= shouldFix;
3274  }
3275
3276  boolean shouldFixHdfsOverlaps() {
3277    return fixHdfsOverlaps;
3278  }
3279
3280  public void setFixHdfsOrphans(boolean shouldFix) {
3281    fixHdfsOrphans = shouldFix;
3282    fixAny |= shouldFix;
3283  }
3284
3285  boolean shouldFixHdfsOrphans() {
3286    return fixHdfsOrphans;
3287  }
3288
3289  public void setFixVersionFile(boolean shouldFix) {
3290    fixVersionFile = shouldFix;
3291    fixAny |= shouldFix;
3292  }
3293
3294  public boolean shouldFixVersionFile() {
3295    return fixVersionFile;
3296  }
3297
3298  public void setSidelineBigOverlaps(boolean sbo) {
3299    this.sidelineBigOverlaps = sbo;
3300  }
3301
3302  public boolean shouldSidelineBigOverlaps() {
3303    return sidelineBigOverlaps;
3304  }
3305
3306  public void setFixSplitParents(boolean shouldFix) {
3307    fixSplitParents = shouldFix;
3308    fixAny |= shouldFix;
3309  }
3310
3311  public void setRemoveParents(boolean shouldFix) {
3312    removeParents = shouldFix;
3313    fixAny |= shouldFix;
3314  }
3315
3316  boolean shouldFixSplitParents() {
3317    return fixSplitParents;
3318  }
3319
3320  boolean shouldRemoveParents() {
3321    return removeParents;
3322  }
3323
3324  public void setFixReferenceFiles(boolean shouldFix) {
3325    fixReferenceFiles = shouldFix;
3326    fixAny |= shouldFix;
3327  }
3328
3329  boolean shouldFixReferenceFiles() {
3330    return fixReferenceFiles;
3331  }
3332
3333  public void setFixHFileLinks(boolean shouldFix) {
3334    fixHFileLinks = shouldFix;
3335    fixAny |= shouldFix;
3336  }
3337
3338  boolean shouldFixHFileLinks() {
3339    return fixHFileLinks;
3340  }
3341
3342  public boolean shouldIgnorePreCheckPermission() {
3343    return !fixAny || ignorePreCheckPermission;
3344  }
3345
3346  public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3347    this.ignorePreCheckPermission = ignorePreCheckPermission;
3348  }
3349
3350  /**
3351   * @param mm maximum number of regions to merge into a single region.
3352   */
3353  public void setMaxMerge(int mm) {
3354    this.maxMerge = mm;
3355  }
3356
3357  public int getMaxMerge() {
3358    return maxMerge;
3359  }
3360
3361  public void setMaxOverlapsToSideline(int mo) {
3362    this.maxOverlapsToSideline = mo;
3363  }
3364
3365  public int getMaxOverlapsToSideline() {
3366    return maxOverlapsToSideline;
3367  }
3368
3369  /**
3370   * Only check/fix tables specified by the list, Empty list means all tables are included.
3371   */
3372  boolean isTableIncluded(TableName table) {
3373    return (tablesIncluded.isEmpty()) || tablesIncluded.contains(table);
3374  }
3375
3376  public void includeTable(TableName table) {
3377    tablesIncluded.add(table);
3378  }
3379
3380  Set<TableName> getIncludedTables() {
3381    return new HashSet<>(tablesIncluded);
3382  }
3383
3384  /**
3385   * We are interested in only those tables that have not changed their state in hbase:meta during
3386   * the last few seconds specified by hbase.admin.fsck.timelag
3387   * @param seconds - the time in seconds
3388   */
3389  public void setTimeLag(long seconds) {
3390    timelag = seconds * 1000; // convert to milliseconds
3391  }
3392
3393  /**
3394   * @param sidelineDir - HDFS path to sideline data
3395   */
3396  public void setSidelineDir(String sidelineDir) {
3397    this.sidelineDir = new Path(sidelineDir);
3398  }
3399
3400  protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
3401    throws IOException {
3402    return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
3403  }
3404
3405  public HFileCorruptionChecker getHFilecorruptionChecker() {
3406    return hfcc;
3407  }
3408
3409  public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
3410    this.hfcc = hfcc;
3411  }
3412
3413  public void setRetCode(int code) {
3414    this.retcode = code;
3415  }
3416
3417  public int getRetCode() {
3418    return retcode;
3419  }
3420
3421  protected HBaseFsck printUsageAndExit() {
3422    StringWriter sw = new StringWriter(2048);
3423    PrintWriter out = new PrintWriter(sw);
3424    out.println("");
3425    out.println("-----------------------------------------------------------------------");
3426    out.println("NOTE: As of HBase version 2.0, the hbck tool is significantly changed.");
3427    out.println("In general, all Read-Only options are supported and can be be used");
3428    out.println("safely. Most -fix/ -repair options are NOT supported. Please see usage");
3429    out.println("below for details on which options are not supported.");
3430    out.println("-----------------------------------------------------------------------");
3431    out.println("");
3432    out.println("Usage: fsck [opts] {only tables}");
3433    out.println(" where [opts] are:");
3434    out.println("   -help Display help options (this)");
3435    out.println("   -details Display full report of all regions.");
3436    out.println("   -timelag <timeInSeconds>  Process only regions that "
3437      + " have not experienced any metadata updates in the last " + " <timeInSeconds> seconds.");
3438    out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds"
3439      + " before checking if the fix worked if run with -fix");
3440    out.println("   -summary Print only summary of the tables and status.");
3441    out.println("   -metaonly Only check the state of the hbase:meta table.");
3442    out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
3443    out.println(
3444      "   -boundaries Verify that regions boundaries are the same between META and store files.");
3445    out.println("   -exclusive Abort if another hbck is exclusive or fixing.");
3446
3447    out.println("");
3448    out.println("  Datafile Repair options: (expert features, use with caution!)");
3449    out.println(
3450      "   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
3451    out.println(
3452      "   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
3453
3454    out.println("");
3455    out.println(" Replication options");
3456    out.println("   -fixReplication   Deletes replication queues for removed peers");
3457
3458    out.println("");
3459    out.println(
3460      "  Metadata Repair options supported as of version 2.0: (expert features, use with caution!)");
3461    out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
3462    out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
3463    out.println("   -fixHFileLinks  Try to offline lingering HFileLinks");
3464    out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
3465      + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
3466    out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
3467
3468    out.println("");
3469    out.println("NOTE: Following options are NOT supported as of HBase version 2.0+.");
3470    out.println("");
3471    out.println("  UNSUPPORTED Metadata Repair options: (expert features, use with caution!)");
3472    out.println(
3473      "   -fix              Try to fix region assignments.  This is for backwards compatibility");
3474    out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
3475    out.println(
3476      "   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
3477    out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
3478    out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
3479    out.println(
3480      "   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
3481    out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
3482    out.println(
3483      "   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n="
3484        + DEFAULT_MAX_MERGE + " by default)");
3485    out.println(
3486      "   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
3487    out.println(
3488      "   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n="
3489        + DEFAULT_OVERLAPS_TO_SIDELINE + " by default)");
3490    out.println("   -fixSplitParents  Try to force offline split parents to be online.");
3491    out.println(
3492      "   -removeParents    Try to offline and sideline lingering parents and keep daughter regions.");
3493    out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
3494      + " (empty REGIONINFO_QUALIFIER rows)");
3495
3496    out.println("");
3497    out.println("  UNSUPPORTED Metadata Repair shortcuts");
3498    out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles "
3499      + "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles"
3500      + "-fixHFileLinks");
3501    out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
3502    out.println("");
3503    out.println(" Replication options");
3504    out.println("   -fixReplication   Deletes replication queues for removed peers");
3505    out.println("   -cleanReplicationBarrier [tableName] clean the replication barriers "
3506      + "of a specified table, tableName is required");
3507    out.flush();
3508    errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
3509
3510    setRetCode(-2);
3511    return this;
3512  }
3513
3514  /**
3515   * Main program
3516   */
3517  public static void main(String[] args) throws Exception {
3518    // create a fsck object
3519    Configuration conf = HBaseConfiguration.create();
3520    Path hbasedir = CommonFSUtils.getRootDir(conf);
3521    URI defaultFs = hbasedir.getFileSystem(conf).getUri();
3522    CommonFSUtils.setFsDefault(conf, new Path(defaultFs));
3523    int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
3524    System.exit(ret);
3525  }
3526
3527  /**
3528   * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
3529   */
3530  static class HBaseFsckTool extends Configured implements Tool {
3531    HBaseFsckTool(Configuration conf) {
3532      super(conf);
3533    }
3534
3535    @Override
3536    public int run(String[] args) throws Exception {
3537      HBaseFsck hbck = new HBaseFsck(getConf());
3538      hbck.exec(hbck.executor, args);
3539      hbck.close();
3540      return hbck.getRetCode();
3541    }
3542  }
3543
3544  public HBaseFsck exec(ExecutorService exec, String[] args)
3545    throws KeeperException, IOException, InterruptedException, ReplicationException {
3546    long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
3547
3548    boolean checkCorruptHFiles = false;
3549    boolean sidelineCorruptHFiles = false;
3550
3551    // Process command-line args.
3552    for (int i = 0; i < args.length; i++) {
3553      String cmd = args[i];
3554      if (cmd.equals("-help") || cmd.equals("-h")) {
3555        return printUsageAndExit();
3556      } else if (cmd.equals("-details")) {
3557        setDisplayFullReport();
3558      } else if (cmd.equals("-exclusive")) {
3559        setForceExclusive();
3560      } else if (cmd.equals("-timelag")) {
3561        if (i == args.length - 1) {
3562          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
3563          return printUsageAndExit();
3564        }
3565        try {
3566          long timelag = Long.parseLong(args[++i]);
3567          setTimeLag(timelag);
3568        } catch (NumberFormatException e) {
3569          errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
3570          return printUsageAndExit();
3571        }
3572      } else if (cmd.equals("-sleepBeforeRerun")) {
3573        if (i == args.length - 1) {
3574          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sleepBeforeRerun needs a value.");
3575          return printUsageAndExit();
3576        }
3577        try {
3578          sleepBeforeRerun = Long.parseLong(args[++i]);
3579        } catch (NumberFormatException e) {
3580          errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
3581          return printUsageAndExit();
3582        }
3583      } else if (cmd.equals("-sidelineDir")) {
3584        if (i == args.length - 1) {
3585          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
3586          return printUsageAndExit();
3587        }
3588        setSidelineDir(args[++i]);
3589      } else if (cmd.equals("-fix")) {
3590        errors.reportError(ERROR_CODE.WRONG_USAGE,
3591          "This option is deprecated, please use  -fixAssignments instead.");
3592        setFixAssignments(true);
3593      } else if (cmd.equals("-fixAssignments")) {
3594        setFixAssignments(true);
3595      } else if (cmd.equals("-fixMeta")) {
3596        setFixMeta(true);
3597      } else if (cmd.equals("-noHdfsChecking")) {
3598        setCheckHdfs(false);
3599      } else if (cmd.equals("-fixHdfsHoles")) {
3600        setFixHdfsHoles(true);
3601      } else if (cmd.equals("-fixHdfsOrphans")) {
3602        setFixHdfsOrphans(true);
3603      } else if (cmd.equals("-fixTableOrphans")) {
3604        setFixTableOrphans(true);
3605      } else if (cmd.equals("-fixHdfsOverlaps")) {
3606        setFixHdfsOverlaps(true);
3607      } else if (cmd.equals("-fixVersionFile")) {
3608        setFixVersionFile(true);
3609      } else if (cmd.equals("-sidelineBigOverlaps")) {
3610        setSidelineBigOverlaps(true);
3611      } else if (cmd.equals("-fixSplitParents")) {
3612        setFixSplitParents(true);
3613      } else if (cmd.equals("-removeParents")) {
3614        setRemoveParents(true);
3615      } else if (cmd.equals("-ignorePreCheckPermission")) {
3616        setIgnorePreCheckPermission(true);
3617      } else if (cmd.equals("-checkCorruptHFiles")) {
3618        checkCorruptHFiles = true;
3619      } else if (cmd.equals("-sidelineCorruptHFiles")) {
3620        sidelineCorruptHFiles = true;
3621      } else if (cmd.equals("-fixReferenceFiles")) {
3622        setFixReferenceFiles(true);
3623      } else if (cmd.equals("-fixHFileLinks")) {
3624        setFixHFileLinks(true);
3625      } else if (cmd.equals("-fixEmptyMetaCells")) {
3626        setFixEmptyMetaCells(true);
3627      } else if (cmd.equals("-repair")) {
3628        // this attempts to merge overlapping hdfs regions, needs testing
3629        // under load
3630        setFixHdfsHoles(true);
3631        setFixHdfsOrphans(true);
3632        setFixMeta(true);
3633        setFixAssignments(true);
3634        setFixHdfsOverlaps(true);
3635        setFixVersionFile(true);
3636        setSidelineBigOverlaps(true);
3637        setFixSplitParents(false);
3638        setCheckHdfs(true);
3639        setFixReferenceFiles(true);
3640        setFixHFileLinks(true);
3641      } else if (cmd.equals("-repairHoles")) {
3642        // this will make all missing hdfs regions available but may lose data
3643        setFixHdfsHoles(true);
3644        setFixHdfsOrphans(false);
3645        setFixMeta(true);
3646        setFixAssignments(true);
3647        setFixHdfsOverlaps(false);
3648        setSidelineBigOverlaps(false);
3649        setFixSplitParents(false);
3650        setCheckHdfs(true);
3651      } else if (cmd.equals("-maxOverlapsToSideline")) {
3652        if (i == args.length - 1) {
3653          errors.reportError(ERROR_CODE.WRONG_USAGE,
3654            "-maxOverlapsToSideline needs a numeric value argument.");
3655          return printUsageAndExit();
3656        }
3657        try {
3658          int maxOverlapsToSideline = Integer.parseInt(args[++i]);
3659          setMaxOverlapsToSideline(maxOverlapsToSideline);
3660        } catch (NumberFormatException e) {
3661          errors.reportError(ERROR_CODE.WRONG_USAGE,
3662            "-maxOverlapsToSideline needs a numeric value argument.");
3663          return printUsageAndExit();
3664        }
3665      } else if (cmd.equals("-maxMerge")) {
3666        if (i == args.length - 1) {
3667          errors.reportError(ERROR_CODE.WRONG_USAGE, "-maxMerge needs a numeric value argument.");
3668          return printUsageAndExit();
3669        }
3670        try {
3671          int maxMerge = Integer.parseInt(args[++i]);
3672          setMaxMerge(maxMerge);
3673        } catch (NumberFormatException e) {
3674          errors.reportError(ERROR_CODE.WRONG_USAGE, "-maxMerge needs a numeric value argument.");
3675          return printUsageAndExit();
3676        }
3677      } else if (cmd.equals("-summary")) {
3678        setSummary();
3679      } else if (cmd.equals("-metaonly")) {
3680        setCheckMetaOnly();
3681      } else if (cmd.equals("-boundaries")) {
3682        setRegionBoundariesCheck();
3683      } else if (cmd.equals("-fixReplication")) {
3684        setFixReplication(true);
3685      } else if (cmd.equals("-cleanReplicationBarrier")) {
3686        setCleanReplicationBarrier(true);
3687        if (args[++i].startsWith("-")) {
3688          printUsageAndExit();
3689        }
3690        setCleanReplicationBarrierTable(args[i]);
3691      } else if (cmd.startsWith("-")) {
3692        errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
3693        return printUsageAndExit();
3694      } else {
3695        includeTable(TableName.valueOf(cmd));
3696        errors.print("Allow checking/fixes for table: " + cmd);
3697      }
3698    }
3699
3700    errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
3701
3702    // pre-check current user has FS write permission or not
3703    try {
3704      preCheckPermission();
3705    } catch (IOException ioe) {
3706      Runtime.getRuntime().exit(-1);
3707    }
3708
3709    // do the real work of hbck
3710    connect();
3711
3712    // after connecting to server above, we have server version
3713    // check if unsupported option is specified based on server version
3714    if (!isOptionsSupported(args)) {
3715      return printUsageAndExit();
3716    }
3717
3718    try {
3719      // if corrupt file mode is on, first fix them since they may be opened later
3720      if (checkCorruptHFiles || sidelineCorruptHFiles) {
3721        LOG.info("Checking all hfiles for corruption");
3722        HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
3723        setHFileCorruptionChecker(hfcc); // so we can get result
3724        Collection<TableName> tables = getIncludedTables();
3725        Collection<Path> tableDirs = new ArrayList<>();
3726        Path rootdir = CommonFSUtils.getRootDir(getConf());
3727        if (tables.size() > 0) {
3728          for (TableName t : tables) {
3729            tableDirs.add(CommonFSUtils.getTableDir(rootdir, t));
3730          }
3731        } else {
3732          tableDirs = FSUtils.getTableDirs(CommonFSUtils.getCurrentFileSystem(getConf()), rootdir);
3733        }
3734        hfcc.checkTables(tableDirs);
3735        hfcc.report(errors);
3736      }
3737
3738      // check and fix table integrity, region consistency.
3739      int code = onlineHbck();
3740      setRetCode(code);
3741      // If we have changed the HBase state it is better to run hbck again
3742      // to see if we haven't broken something else in the process.
3743      // We run it only once more because otherwise we can easily fall into
3744      // an infinite loop.
3745      if (shouldRerun()) {
3746        try {
3747          LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
3748          Thread.sleep(sleepBeforeRerun);
3749        } catch (InterruptedException ie) {
3750          LOG.warn("Interrupted while sleeping");
3751          return this;
3752        }
3753        // Just report
3754        setFixAssignments(false);
3755        setFixMeta(false);
3756        setFixHdfsHoles(false);
3757        setFixHdfsOverlaps(false);
3758        setFixVersionFile(false);
3759        setFixTableOrphans(false);
3760        errors.resetErrors();
3761        code = onlineHbck();
3762        setRetCode(code);
3763      }
3764    } finally {
3765      IOUtils.closeQuietly(this, e -> LOG.warn("", e));
3766    }
3767    return this;
3768  }
3769
3770  private boolean isOptionsSupported(String[] args) {
3771    boolean result = true;
3772    String hbaseServerVersion = status.getHBaseVersion();
3773    if (VersionInfo.compareVersion("2.any.any", hbaseServerVersion) < 0) {
3774      // Process command-line args.
3775      for (String arg : args) {
3776        if (unsupportedOptionsInV2.contains(arg)) {
3777          errors.reportError(ERROR_CODE.UNSUPPORTED_OPTION,
3778            "option '" + arg + "' is not " + "supported!");
3779          result = false;
3780          break;
3781        }
3782      }
3783    }
3784    return result;
3785  }
3786
3787  public void setCleanReplicationBarrierTable(String cleanReplicationBarrierTable) {
3788    this.cleanReplicationBarrierTable = TableName.valueOf(cleanReplicationBarrierTable);
3789  }
3790
3791  public void cleanReplicationBarrier() throws IOException {
3792    if (!cleanReplicationBarrier || cleanReplicationBarrierTable == null) {
3793      return;
3794    }
3795    if (cleanReplicationBarrierTable.isSystemTable()) {
3796      errors.reportError(ERROR_CODE.INVALID_TABLE,
3797        "invalid table: " + cleanReplicationBarrierTable);
3798      return;
3799    }
3800
3801    boolean isGlobalScope = false;
3802    try {
3803      isGlobalScope = admin.getDescriptor(cleanReplicationBarrierTable).hasGlobalReplicationScope();
3804    } catch (TableNotFoundException e) {
3805      LOG.info("we may need to clean some erroneous data due to bugs");
3806    }
3807
3808    if (isGlobalScope) {
3809      errors.reportError(ERROR_CODE.INVALID_TABLE,
3810        "table's replication scope is global: " + cleanReplicationBarrierTable);
3811      return;
3812    }
3813    List<byte[]> regionNames = new ArrayList<>();
3814    Scan barrierScan = new Scan();
3815    barrierScan.setCaching(100);
3816    barrierScan.addFamily(HConstants.REPLICATION_BARRIER_FAMILY);
3817    barrierScan
3818      .withStartRow(ClientMetaTableAccessor.getTableStartRowForMeta(cleanReplicationBarrierTable,
3819        ClientMetaTableAccessor.QueryType.REGION))
3820      .withStopRow(ClientMetaTableAccessor.getTableStopRowForMeta(cleanReplicationBarrierTable,
3821        ClientMetaTableAccessor.QueryType.REGION));
3822    Result result;
3823    try (ResultScanner scanner = meta.getScanner(barrierScan)) {
3824      while ((result = scanner.next()) != null) {
3825        regionNames.add(result.getRow());
3826      }
3827    }
3828    if (regionNames.size() <= 0) {
3829      errors.reportError(ERROR_CODE.INVALID_TABLE,
3830        "there is no barriers of this table: " + cleanReplicationBarrierTable);
3831      return;
3832    }
3833    ReplicationQueueStorage queueStorage =
3834      ReplicationStorageFactory.getReplicationQueueStorage(zkw, getConf());
3835    List<ReplicationPeerDescription> peerDescriptions = admin.listReplicationPeers();
3836    if (peerDescriptions != null && peerDescriptions.size() > 0) {
3837      List<String> peers = peerDescriptions.stream()
3838        .filter(
3839          peerConfig -> peerConfig.getPeerConfig().needToReplicate(cleanReplicationBarrierTable))
3840        .map(peerConfig -> peerConfig.getPeerId()).collect(Collectors.toList());
3841      try {
3842        List<String> batch = new ArrayList<>();
3843        for (String peer : peers) {
3844          for (byte[] regionName : regionNames) {
3845            batch.add(RegionInfo.encodeRegionName(regionName));
3846            if (batch.size() % 100 == 0) {
3847              queueStorage.removeLastSequenceIds(peer, batch);
3848              batch.clear();
3849            }
3850          }
3851          if (batch.size() > 0) {
3852            queueStorage.removeLastSequenceIds(peer, batch);
3853            batch.clear();
3854          }
3855        }
3856      } catch (ReplicationException re) {
3857        throw new IOException(re);
3858      }
3859    }
3860    for (byte[] regionName : regionNames) {
3861      meta.delete(new Delete(regionName).addFamily(HConstants.REPLICATION_BARRIER_FAMILY));
3862    }
3863    setShouldRerun();
3864  }
3865
3866  /**
3867   * ls -r for debugging purposes
3868   */
3869  void debugLsr(Path p) throws IOException {
3870    debugLsr(getConf(), p, errors);
3871  }
3872
3873  /**
3874   * ls -r for debugging purposes
3875   */
3876  public static void debugLsr(Configuration conf, Path p) throws IOException {
3877    debugLsr(conf, p, new PrintingErrorReporter());
3878  }
3879
3880  /**
3881   * ls -r for debugging purposes
3882   */
3883  public static void debugLsr(Configuration conf, Path p, HbckErrorReporter errors)
3884    throws IOException {
3885    if (!LOG.isDebugEnabled() || p == null) {
3886      return;
3887    }
3888    FileSystem fs = p.getFileSystem(conf);
3889
3890    if (!fs.exists(p)) {
3891      // nothing
3892      return;
3893    }
3894    errors.print(p.toString());
3895
3896    if (fs.isFile(p)) {
3897      return;
3898    }
3899
3900    if (fs.getFileStatus(p).isDirectory()) {
3901      FileStatus[] fss = fs.listStatus(p);
3902      for (FileStatus status : fss) {
3903        debugLsr(conf, status.getPath(), errors);
3904      }
3905    }
3906  }
3907}