001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.io.Closeable;
021import java.io.FileNotFoundException;
022import java.io.IOException;
023import java.io.InterruptedIOException;
024import java.io.PrintWriter;
025import java.io.StringWriter;
026import java.net.InetAddress;
027import java.net.URI;
028import java.util.ArrayList;
029import java.util.Collection;
030import java.util.Collections;
031import java.util.Comparator;
032import java.util.EnumSet;
033import java.util.HashMap;
034import java.util.HashSet;
035import java.util.Iterator;
036import java.util.List;
037import java.util.Locale;
038import java.util.Map;
039import java.util.Map.Entry;
040import java.util.Objects;
041import java.util.Optional;
042import java.util.Set;
043import java.util.SortedMap;
044import java.util.TreeMap;
045import java.util.Vector;
046import java.util.concurrent.Callable;
047import java.util.concurrent.ConcurrentSkipListMap;
048import java.util.concurrent.ExecutionException;
049import java.util.concurrent.ExecutorService;
050import java.util.concurrent.Executors;
051import java.util.concurrent.Future;
052import java.util.concurrent.FutureTask;
053import java.util.concurrent.ScheduledThreadPoolExecutor;
054import java.util.concurrent.TimeUnit;
055import java.util.concurrent.TimeoutException;
056import java.util.concurrent.atomic.AtomicBoolean;
057import java.util.concurrent.atomic.AtomicInteger;
058import java.util.stream.Collectors;
059import org.apache.commons.io.IOUtils;
060import org.apache.commons.lang3.StringUtils;
061import org.apache.hadoop.conf.Configuration;
062import org.apache.hadoop.conf.Configured;
063import org.apache.hadoop.fs.FSDataOutputStream;
064import org.apache.hadoop.fs.FileStatus;
065import org.apache.hadoop.fs.FileSystem;
066import org.apache.hadoop.fs.Path;
067import org.apache.hadoop.fs.permission.FsAction;
068import org.apache.hadoop.fs.permission.FsPermission;
069import org.apache.hadoop.hbase.Abortable;
070import org.apache.hadoop.hbase.CatalogFamilyFormat;
071import org.apache.hadoop.hbase.Cell;
072import org.apache.hadoop.hbase.CellUtil;
073import org.apache.hadoop.hbase.ClientMetaTableAccessor;
074import org.apache.hadoop.hbase.ClusterMetrics;
075import org.apache.hadoop.hbase.ClusterMetrics.Option;
076import org.apache.hadoop.hbase.HBaseConfiguration;
077import org.apache.hadoop.hbase.HBaseInterfaceAudience;
078import org.apache.hadoop.hbase.HConstants;
079import org.apache.hadoop.hbase.HRegionLocation;
080import org.apache.hadoop.hbase.KeyValue;
081import org.apache.hadoop.hbase.MasterNotRunningException;
082import org.apache.hadoop.hbase.MetaTableAccessor;
083import org.apache.hadoop.hbase.RegionLocations;
084import org.apache.hadoop.hbase.ServerName;
085import org.apache.hadoop.hbase.TableName;
086import org.apache.hadoop.hbase.TableNotFoundException;
087import org.apache.hadoop.hbase.ZooKeeperConnectionException;
088import org.apache.hadoop.hbase.client.Admin;
089import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
090import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
091import org.apache.hadoop.hbase.client.Connection;
092import org.apache.hadoop.hbase.client.ConnectionFactory;
093import org.apache.hadoop.hbase.client.Delete;
094import org.apache.hadoop.hbase.client.Get;
095import org.apache.hadoop.hbase.client.Put;
096import org.apache.hadoop.hbase.client.RegionInfo;
097import org.apache.hadoop.hbase.client.RegionInfoBuilder;
098import org.apache.hadoop.hbase.client.RegionLocator;
099import org.apache.hadoop.hbase.client.RegionReplicaUtil;
100import org.apache.hadoop.hbase.client.Result;
101import org.apache.hadoop.hbase.client.ResultScanner;
102import org.apache.hadoop.hbase.client.RowMutations;
103import org.apache.hadoop.hbase.client.Scan;
104import org.apache.hadoop.hbase.client.Table;
105import org.apache.hadoop.hbase.client.TableDescriptor;
106import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
107import org.apache.hadoop.hbase.client.TableState;
108import org.apache.hadoop.hbase.io.FileLink;
109import org.apache.hadoop.hbase.io.HFileLink;
110import org.apache.hadoop.hbase.io.hfile.CacheConfig;
111import org.apache.hadoop.hbase.io.hfile.HFile;
112import org.apache.hadoop.hbase.master.RegionState;
113import org.apache.hadoop.hbase.regionserver.HRegion;
114import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
115import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
116import org.apache.hadoop.hbase.replication.ReplicationException;
117import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
118import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
119import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
120import org.apache.hadoop.hbase.security.UserProvider;
121import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
122import org.apache.hadoop.hbase.util.HbckErrorReporter.ERROR_CODE;
123import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
124import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
125import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
126import org.apache.hadoop.hbase.wal.WALSplitUtil;
127import org.apache.hadoop.hbase.zookeeper.ZKUtil;
128import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
129import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
130import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
131import org.apache.hadoop.ipc.RemoteException;
132import org.apache.hadoop.security.AccessControlException;
133import org.apache.hadoop.security.UserGroupInformation;
134import org.apache.hadoop.util.ReflectionUtils;
135import org.apache.hadoop.util.Tool;
136import org.apache.hadoop.util.ToolRunner;
137import org.apache.yetus.audience.InterfaceAudience;
138import org.apache.yetus.audience.InterfaceStability;
139import org.apache.zookeeper.KeeperException;
140import org.slf4j.Logger;
141import org.slf4j.LoggerFactory;
142
143import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
144import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
145import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
146import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
147import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
148import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
149
150/**
151 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and table integrity
152 * problems in a corrupted HBase. This tool was written for hbase-1.x. It does not work with
153 * hbase-2.x; it can read state but is not allowed to change state; i.e. effect 'repair'. Even
154 * though it can 'read' state, given how so much has changed in how hbase1 and hbase2 operate, it
155 * will often misread. See hbck2 (HBASE-19121) for a hbck tool for hbase2. This class is deprecated.
156 * <p>
157 * Region consistency checks verify that hbase:meta, region deployment on region servers and the
158 * state of data in HDFS (.regioninfo files) all are in accordance.
159 * <p>
160 * Table integrity checks verify that all possible row keys resolve to exactly one region of a
161 * table. This means there are no individual degenerate or backwards regions; no holes between
162 * regions; and that there are no overlapping regions.
163 * <p>
164 * The general repair strategy works in two phases:
165 * <ol>
166 * <li>Repair Table Integrity on HDFS. (merge or fabricate regions)
167 * <li>Repair Region Consistency with hbase:meta and assignments
168 * </ol>
169 * <p>
170 * For table integrity repairs, the tables' region directories are scanned for .regioninfo files.
171 * Each table's integrity is then verified. If there are any orphan regions (regions with no
172 * .regioninfo files) or holes, new regions are fabricated. Backwards regions are sidelined as well
173 * as empty degenerate (endkey==startkey) regions. If there are any overlapping regions, a new
174 * region is created and all data is merged into the new region.
175 * <p>
176 * Table integrity repairs deal solely with HDFS and could potentially be done offline -- the hbase
177 * region servers or master do not need to be running. This phase can eventually be used to
178 * completely reconstruct the hbase:meta table in an offline fashion.
179 * <p>
180 * Region consistency requires three conditions -- 1) valid .regioninfo file present in an HDFS
181 * region dir, 2) valid row with .regioninfo data in META, and 3) a region is deployed only at the
182 * regionserver that was assigned to with proper state in the master.
183 * <p>
184 * Region consistency repairs require hbase to be online so that hbck can contact the HBase master
185 * and region servers. The hbck#connect() method must first be called successfully. Much of the
186 * region consistency information is transient and less risky to repair.
187 * <p>
188 * If hbck is run from the command line, there are a handful of arguments that can be used to limit
189 * the kinds of repairs hbck will do. See the code in {@link #printUsageAndExit()} for more details.
190 * @deprecated For removal in hbase-4.0.0. Use HBCK2 instead.
191 */
192@Deprecated
193@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
194@InterfaceStability.Evolving
195public class HBaseFsck extends Configured implements Closeable {
196  public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
197  public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
198  private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
199  private static boolean rsSupportsOffline = true;
200  private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
201  private static final int DEFAULT_MAX_MERGE = 5;
202
203  /**
204   * Here is where hbase-1.x used to default the lock for hbck1. It puts in place a lock when it
205   * goes to write/make changes.
206   */
207  @InterfaceAudience.Private
208  public static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
209  private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
210  private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
211  private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
212  // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
213  // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
214  // AlreadyBeingCreatedException which is implies timeout on this operations up to
215  // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
216  private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
217  private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
218  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
219  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
220
221  /**********************
222   * Internal resources
223   **********************/
224  private static final Logger LOG = LoggerFactory.getLogger(HBaseFsck.class.getName());
225  private ClusterMetrics status;
226  private Connection connection;
227  private Admin admin;
228  private Table meta;
229  // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
230  protected ExecutorService executor;
231  private long startMillis = EnvironmentEdgeManager.currentTime();
232  private HFileCorruptionChecker hfcc;
233  private int retcode = 0;
234  private Path HBCK_LOCK_PATH;
235  private FSDataOutputStream hbckOutFd;
236  // This lock is to prevent cleanup of balancer resources twice between
237  // ShutdownHook and the main code. We cleanup only if the connect() is
238  // successful
239  private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
240
241  // Unsupported options in HBase 2.0+
242  private static final Set<String> unsupportedOptionsInV2 = Sets.newHashSet("-fix",
243    "-fixAssignments", "-fixMeta", "-fixHdfsHoles", "-fixHdfsOrphans", "-fixTableOrphans",
244    "-fixHdfsOverlaps", "-sidelineBigOverlaps", "-fixSplitParents", "-removeParents",
245    "-fixEmptyMetaCells", "-repair", "-repairHoles", "-maxOverlapsToSideline", "-maxMerge");
246
247  /***********
248   * Options
249   ***********/
250  private static boolean details = false; // do we display the full report
251  private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
252  private static boolean forceExclusive = false; // only this hbck can modify HBase
253  private boolean fixAssignments = false; // fix assignment errors?
254  private boolean fixMeta = false; // fix meta errors?
255  private boolean checkHdfs = true; // load and check fs consistency?
256  private boolean fixHdfsHoles = false; // fix fs holes?
257  private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
258  private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
259  private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
260  private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
261  private boolean fixSplitParents = false; // fix lingering split parents
262  private boolean removeParents = false; // remove split parents
263  private boolean fixReferenceFiles = false; // fix lingering reference store file
264  private boolean fixHFileLinks = false; // fix lingering HFileLinks
265  private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
266  private boolean fixReplication = false; // fix undeleted replication queues for removed peer
267  private boolean cleanReplicationBarrier = false; // clean replication barriers of a table
268  private boolean fixAny = false; // Set to true if any of the fix is required.
269
270  // limit checking/fixes to listed tables, if empty attempt to check/fix all
271  // hbase:meta are always checked
272  private Set<TableName> tablesIncluded = new HashSet<>();
273  private TableName cleanReplicationBarrierTable;
274  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
275  // maximum number of overlapping regions to sideline
276  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
277  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
278  private Path sidelineDir = null;
279
280  private boolean rerun = false; // if we tried to fix something, rerun hbck
281  private static boolean summary = false; // if we want to print less output
282  private boolean checkMetaOnly = false;
283  private boolean checkRegionBoundaries = false;
284  private boolean ignorePreCheckPermission = false; // if pre-check permission
285
286  /*********
287   * State
288   *********/
289  final private HbckErrorReporter errors;
290  int fixes = 0;
291
292  /**
293   * This map contains the state of all hbck items. It maps from encoded region name to
294   * HbckRegionInfo structure. The information contained in HbckRegionInfo is used to detect and
295   * correct consistency (hdfs/meta/deployment) problems.
296   */
297  private TreeMap<String, HbckRegionInfo> regionInfoMap = new TreeMap<>();
298  // Empty regioninfo qualifiers in hbase:meta
299  private Set<Result> emptyRegionInfoQualifiers = new HashSet<>();
300
301  /**
302   * This map from Tablename -> TableInfo contains the structures necessary to detect table
303   * consistency problems (holes, dupes, overlaps). It is sorted to prevent dupes. If tablesIncluded
304   * is empty, this map contains all tables. Otherwise, it contains only meta tables and tables in
305   * tablesIncluded, unless checkMetaOnly is specified, in which case, it contains only the meta
306   * table
307   */
308  private SortedMap<TableName, HbckTableInfo> tablesInfo = new ConcurrentSkipListMap<>();
309
310  /**
311   * When initially looking at HDFS, we attempt to find any orphaned data.
312   */
313  private List<HbckRegionInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<>());
314
315  private Map<TableName, Set<String>> orphanTableDirs = new HashMap<>();
316  private Map<TableName, TableState> tableStates = new HashMap<>();
317  private final RetryCounterFactory lockFileRetryCounterFactory;
318  private final RetryCounterFactory createZNodeRetryCounterFactory;
319
320  private Map<TableName, Set<String>> skippedRegions = new HashMap<>();
321
322  private ZKWatcher zkw = null;
323  private String hbckEphemeralNodePath = null;
324  private boolean hbckZodeCreated = false;
325
326  /**
327   * Constructor
328   * @param conf Configuration object
329   * @throws MasterNotRunningException    if the master is not running
330   * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
331   */
332  public HBaseFsck(Configuration conf) throws IOException, ClassNotFoundException {
333    this(conf, createThreadPool(conf));
334  }
335
336  private static ExecutorService createThreadPool(Configuration conf) {
337    int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
338    return new ScheduledThreadPoolExecutor(numThreads,
339      new ThreadFactoryBuilder().setNameFormat("hbasefsck-pool-%d").setDaemon(true)
340        .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build());
341  }
342
343  /**
344   * Constructor Configuration object if the master is not running if unable to connect to ZooKeeper
345   */
346  public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
347    ZooKeeperConnectionException, IOException, ClassNotFoundException {
348    super(conf);
349    errors = getErrorReporter(getConf());
350    this.executor = exec;
351    lockFileRetryCounterFactory = createLockRetryCounterFactory(getConf());
352    createZNodeRetryCounterFactory = createZnodeRetryCounterFactory(getConf());
353    zkw = createZooKeeperWatcher();
354  }
355
356  /** Returns A retry counter factory configured for retrying lock file creation. */
357  public static RetryCounterFactory createLockRetryCounterFactory(Configuration conf) {
358    return new RetryCounterFactory(
359      conf.getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
360      conf.getInt("hbase.hbck.lockfile.attempt.sleep.interval",
361        DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
362      conf.getInt("hbase.hbck.lockfile.attempt.maxsleeptime",
363        DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
364  }
365
366  /** Returns A retry counter factory configured for retrying znode creation. */
367  private static RetryCounterFactory createZnodeRetryCounterFactory(Configuration conf) {
368    return new RetryCounterFactory(
369      conf.getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
370      conf.getInt("hbase.hbck.createznode.attempt.sleep.interval",
371        DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
372      conf.getInt("hbase.hbck.createznode.attempt.maxsleeptime",
373        DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
374  }
375
376  /** Returns Return the tmp dir this tool writes too. */
377  @InterfaceAudience.Private
378  public static Path getTmpDir(Configuration conf) throws IOException {
379    return new Path(CommonFSUtils.getRootDir(conf), HConstants.HBASE_TEMP_DIRECTORY);
380  }
381
382  private static class FileLockCallable implements Callable<FSDataOutputStream> {
383    RetryCounter retryCounter;
384    private final Configuration conf;
385    private Path hbckLockPath = null;
386
387    public FileLockCallable(Configuration conf, RetryCounter retryCounter) {
388      this.retryCounter = retryCounter;
389      this.conf = conf;
390    }
391
392    /** Returns Will be <code>null</code> unless you call {@link #call()} */
393    Path getHbckLockPath() {
394      return this.hbckLockPath;
395    }
396
397    @Override
398    public FSDataOutputStream call() throws IOException {
399      try {
400        FileSystem fs = CommonFSUtils.getCurrentFileSystem(this.conf);
401        FsPermission defaultPerms =
402          CommonFSUtils.getFilePermissions(fs, this.conf, HConstants.DATA_FILE_UMASK_KEY);
403        Path tmpDir = getTmpDir(conf);
404        this.hbckLockPath = new Path(tmpDir, HBCK_LOCK_FILE);
405        fs.mkdirs(tmpDir);
406        final FSDataOutputStream out = createFileWithRetries(fs, this.hbckLockPath, defaultPerms);
407        out.writeBytes(InetAddress.getLocalHost().toString());
408        // Add a note into the file we write on why hbase2 is writing out an hbck1 lock file.
409        out.writeBytes(" Written by an hbase-2.x Master to block an "
410          + "attempt by an hbase-1.x HBCK tool making modification to state. "
411          + "See 'HBCK must match HBase server version' in the hbase refguide.");
412        out.flush();
413        return out;
414      } catch (RemoteException e) {
415        if (AlreadyBeingCreatedException.class.getName().equals(e.getClassName())) {
416          return null;
417        } else {
418          throw e;
419        }
420      }
421    }
422
423    private FSDataOutputStream createFileWithRetries(final FileSystem fs,
424      final Path hbckLockFilePath, final FsPermission defaultPerms) throws IOException {
425      IOException exception = null;
426      do {
427        try {
428          return CommonFSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
429        } catch (IOException ioe) {
430          LOG.info("Failed to create lock file " + hbckLockFilePath.getName() + ", try="
431            + (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
432          LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(), ioe);
433          try {
434            exception = ioe;
435            retryCounter.sleepUntilNextRetry();
436          } catch (InterruptedException ie) {
437            throw (InterruptedIOException) new InterruptedIOException(
438              "Can't create lock file " + hbckLockFilePath.getName()).initCause(ie);
439          }
440        }
441      } while (retryCounter.shouldRetry());
442
443      throw exception;
444    }
445  }
446
447  /**
448   * This method maintains a lock using a file. If the creation fails we return null
449   * @return FSDataOutputStream object corresponding to the newly opened lock file
450   * @throws IOException if IO failure occurs
451   */
452  public static Pair<Path, FSDataOutputStream> checkAndMarkRunningHbck(Configuration conf,
453    RetryCounter retryCounter) throws IOException {
454    FileLockCallable callable = new FileLockCallable(conf, retryCounter);
455    ExecutorService executor = Executors.newFixedThreadPool(1);
456    FutureTask<FSDataOutputStream> futureTask = new FutureTask<>(callable);
457    executor.execute(futureTask);
458    final int timeoutInSeconds =
459      conf.getInt("hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
460    FSDataOutputStream stream = null;
461    try {
462      stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
463    } catch (ExecutionException ee) {
464      LOG.warn("Encountered exception when opening lock file", ee);
465    } catch (InterruptedException ie) {
466      LOG.warn("Interrupted when opening lock file", ie);
467      Thread.currentThread().interrupt();
468    } catch (TimeoutException exception) {
469      // took too long to obtain lock
470      LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
471      futureTask.cancel(true);
472    } finally {
473      executor.shutdownNow();
474    }
475    return new Pair<Path, FSDataOutputStream>(callable.getHbckLockPath(), stream);
476  }
477
478  private void unlockHbck() {
479    if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) {
480      RetryCounter retryCounter = lockFileRetryCounterFactory.create();
481      do {
482        try {
483          Closeables.close(hbckOutFd, true);
484          CommonFSUtils.delete(CommonFSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
485          LOG.info("Finishing hbck");
486          return;
487        } catch (IOException ioe) {
488          LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
489            + (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
490          LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
491          try {
492            retryCounter.sleepUntilNextRetry();
493          } catch (InterruptedException ie) {
494            Thread.currentThread().interrupt();
495            LOG.warn("Interrupted while deleting lock file" + HBCK_LOCK_PATH);
496            return;
497          }
498        }
499      } while (retryCounter.shouldRetry());
500    }
501  }
502
503  /**
504   * To repair region consistency, one must call connect() in order to repair online state.
505   */
506  public void connect() throws IOException {
507
508    if (isExclusive()) {
509      // Grab the lock
510      Pair<Path, FSDataOutputStream> pair =
511        checkAndMarkRunningHbck(getConf(), this.lockFileRetryCounterFactory.create());
512      HBCK_LOCK_PATH = pair.getFirst();
513      this.hbckOutFd = pair.getSecond();
514      if (hbckOutFd == null) {
515        setRetCode(-1);
516        LOG.error("Another instance of hbck is fixing HBase, exiting this instance. "
517          + "[If you are sure no other instance is running, delete the lock file " + HBCK_LOCK_PATH
518          + " and rerun the tool]");
519        throw new IOException("Duplicate hbck - Abort");
520      }
521
522      // Make sure to cleanup the lock
523      hbckLockCleanup.set(true);
524    }
525
526    // Add a shutdown hook to this thread, in case user tries to
527    // kill the hbck with a ctrl-c, we want to cleanup the lock so that
528    // it is available for further calls
529    Runtime.getRuntime().addShutdownHook(new Thread() {
530      @Override
531      public void run() {
532        IOUtils.closeQuietly(HBaseFsck.this, e -> LOG.warn("", e));
533        cleanupHbckZnode();
534        unlockHbck();
535      }
536    });
537
538    LOG.info("Launching hbck");
539
540    connection = ConnectionFactory.createConnection(getConf());
541    admin = connection.getAdmin();
542    meta = connection.getTable(TableName.META_TABLE_NAME);
543    status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS, Option.DEAD_SERVERS,
544      Option.MASTER, Option.BACKUP_MASTERS, Option.REGIONS_IN_TRANSITION, Option.HBASE_VERSION));
545  }
546
547  /**
548   * Get deployed regions according to the region servers.
549   */
550  private void loadDeployedRegions() throws IOException, InterruptedException {
551    // From the master, get a list of all known live region servers
552    Collection<ServerName> regionServers = status.getLiveServerMetrics().keySet();
553    errors.print("Number of live region servers: " + regionServers.size());
554    if (details) {
555      for (ServerName rsinfo : regionServers) {
556        errors.print("  " + rsinfo.getServerName());
557      }
558    }
559
560    // From the master, get a list of all dead region servers
561    Collection<ServerName> deadRegionServers = status.getDeadServerNames();
562    errors.print("Number of dead region servers: " + deadRegionServers.size());
563    if (details) {
564      for (ServerName name : deadRegionServers) {
565        errors.print("  " + name);
566      }
567    }
568
569    // Print the current master name and state
570    errors.print("Master: " + status.getMasterName());
571
572    // Print the list of all backup masters
573    Collection<ServerName> backupMasters = status.getBackupMasterNames();
574    errors.print("Number of backup masters: " + backupMasters.size());
575    if (details) {
576      for (ServerName name : backupMasters) {
577        errors.print("  " + name);
578      }
579    }
580
581    errors.print("Average load: " + status.getAverageLoad());
582    errors.print("Number of requests: " + status.getRequestCount());
583    errors.print("Number of regions: " + status.getRegionCount());
584
585    List<RegionState> rits = status.getRegionStatesInTransition();
586    errors.print("Number of regions in transition: " + rits.size());
587    if (details) {
588      for (RegionState state : rits) {
589        errors.print("  " + state.toDescriptiveString());
590      }
591    }
592
593    // Determine what's deployed
594    processRegionServers(regionServers);
595  }
596
597  /**
598   * Clear the current state of hbck.
599   */
600  private void clearState() {
601    // Make sure regionInfo is empty before starting
602    fixes = 0;
603    regionInfoMap.clear();
604    emptyRegionInfoQualifiers.clear();
605    tableStates.clear();
606    errors.clear();
607    tablesInfo.clear();
608    orphanHdfsDirs.clear();
609    skippedRegions.clear();
610  }
611
612  /**
613   * This repair method analyzes hbase data in hdfs and repairs it to satisfy the table integrity
614   * rules. HBase doesn't need to be online for this operation to work.
615   */
616  public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
617    // Initial pass to fix orphans.
618    if (
619      shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
620        || shouldFixHdfsOverlaps() || shouldFixTableOrphans())
621    ) {
622      LOG.info("Loading regioninfos HDFS");
623      // if nothing is happening this should always complete in two iterations.
624      int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
625      int curIter = 0;
626      do {
627        clearState(); // clears hbck state and reset fixes to 0 and.
628        // repair what's on HDFS
629        restoreHdfsIntegrity();
630        curIter++;// limit the number of iterations.
631      } while (fixes > 0 && curIter <= maxIterations);
632
633      // Repairs should be done in the first iteration and verification in the second.
634      // If there are more than 2 passes, something funny has happened.
635      if (curIter > 2) {
636        if (curIter == maxIterations) {
637          LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
638            + "Tables integrity may not be fully repaired!");
639        } else {
640          LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
641        }
642      }
643    }
644  }
645
646  /**
647   * This repair method requires the cluster to be online since it contacts region servers and the
648   * masters. It makes each region's state in HDFS, in hbase:meta, and deployments consistent.
649   * @return If &gt; 0 , number of errors detected, if &lt; 0 there was an unrecoverable error. If
650   *         0, we have a clean hbase.
651   */
652  public int onlineConsistencyRepair() throws IOException, KeeperException, InterruptedException {
653
654    // get regions according to what is online on each RegionServer
655    loadDeployedRegions();
656    // check whether hbase:meta is deployed and online
657    recordMetaRegion();
658    // Report inconsistencies if there are any unknown servers.
659    reportUnknownServers();
660    // Check if hbase:meta is found only once and in the right place
661    if (!checkMetaRegion()) {
662      String errorMsg = "hbase:meta table is not consistent. ";
663      if (shouldFixAssignments()) {
664        errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
665      } else {
666        errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
667      }
668      errors.reportError(errorMsg + " Exiting...");
669      return -2;
670    }
671    // Not going with further consistency check for tables when hbase:meta itself is not consistent.
672    LOG.info("Loading regionsinfo from the hbase:meta table");
673    boolean success = loadMetaEntries();
674    if (!success) return -1;
675
676    // Empty cells in hbase:meta?
677    reportEmptyMetaCells();
678
679    // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
680    if (shouldFixEmptyMetaCells()) {
681      fixEmptyMetaCells();
682    }
683
684    // get a list of all tables that have not changed recently.
685    if (!checkMetaOnly) {
686      reportTablesInFlux();
687    }
688
689    // Get disabled tables states
690    loadTableStates();
691
692    // load regiondirs and regioninfos from HDFS
693    if (shouldCheckHdfs()) {
694      LOG.info("Loading region directories from HDFS");
695      loadHdfsRegionDirs();
696      LOG.info("Loading region information from HDFS");
697      loadHdfsRegionInfos();
698    }
699
700    // fix the orphan tables
701    fixOrphanTables();
702
703    LOG.info("Checking and fixing region consistency");
704    // Check and fix consistency
705    checkAndFixConsistency();
706
707    // Check integrity (does not fix)
708    checkIntegrity();
709    return errors.getErrorList().size();
710  }
711
712  private void reportUnknownServers() throws IOException {
713    List<ServerName> unknownServers = admin.listUnknownServers();
714    if (!unknownServers.isEmpty()) {
715      unknownServers.stream().forEach(serverName -> {
716        errors.reportError(ERROR_CODE.UNKNOWN_SERVER,
717          "Found unknown server, some of the regions held by this server may not get assigned. "
718            + String.format("Use HBCK2 scheduleRecoveries %s to recover.", serverName));
719      });
720    }
721  }
722
723  /**
724   * This method maintains an ephemeral znode. If the creation fails we return false or throw
725   * exception
726   * @return true if creating znode succeeds; false otherwise
727   * @throws IOException if IO failure occurs
728   */
729  private boolean setMasterInMaintenanceMode() throws IOException {
730    RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
731    hbckEphemeralNodePath = ZNodePaths.joinZNode(zkw.getZNodePaths().masterMaintZNode,
732      "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
733    do {
734      try {
735        hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
736        if (hbckZodeCreated) {
737          break;
738        }
739      } catch (KeeperException e) {
740        if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
741          throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
742        }
743        // fall through and retry
744      }
745
746      LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try="
747        + (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
748
749      try {
750        retryCounter.sleepUntilNextRetry();
751      } catch (InterruptedException ie) {
752        throw (InterruptedIOException) new InterruptedIOException(
753          "Can't create znode " + hbckEphemeralNodePath).initCause(ie);
754      }
755    } while (retryCounter.shouldRetry());
756    return hbckZodeCreated;
757  }
758
759  private void cleanupHbckZnode() {
760    try {
761      if (zkw != null && hbckZodeCreated) {
762        ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
763        hbckZodeCreated = false;
764      }
765    } catch (KeeperException e) {
766      // Ignore
767      if (!e.code().equals(KeeperException.Code.NONODE)) {
768        LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
769      }
770    }
771  }
772
773  /**
774   * Contacts the master and prints out cluster-wide information
775   * @return 0 on success, non-zero on failure
776   */
777  public int onlineHbck()
778    throws IOException, KeeperException, InterruptedException, ReplicationException {
779    // print hbase server version
780    errors.print("Version: " + status.getHBaseVersion());
781
782    // Clean start
783    clearState();
784    // Do offline check and repair first
785    offlineHdfsIntegrityRepair();
786    offlineReferenceFileRepair();
787    offlineHLinkFileRepair();
788    // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
789    // hbck, it is likely that hbck would be misled and report transient errors. Therefore, it
790    // is better to set Master into maintenance mode during online hbck.
791    //
792    if (!setMasterInMaintenanceMode()) {
793      LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
794        + "error.  Please run HBCK multiple times to reduce the chance of transient error.");
795    }
796
797    onlineConsistencyRepair();
798
799    if (checkRegionBoundaries) {
800      checkRegionBoundaries();
801    }
802
803    checkAndFixReplication();
804
805    cleanReplicationBarrier();
806
807    // Remove the hbck znode
808    cleanupHbckZnode();
809
810    // Remove the hbck lock
811    unlockHbck();
812
813    // Print table summary
814    printTableSummary(tablesInfo);
815    return errors.summarize();
816  }
817
818  public static byte[] keyOnly(byte[] b) {
819    if (b == null) return b;
820    int rowlength = Bytes.toShort(b, 0);
821    byte[] result = new byte[rowlength];
822    System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
823    return result;
824  }
825
826  @Override
827  public void close() throws IOException {
828    try {
829      cleanupHbckZnode();
830      unlockHbck();
831    } catch (Exception io) {
832      LOG.warn(io.toString(), io);
833    } finally {
834      if (zkw != null) {
835        zkw.close();
836        zkw = null;
837      }
838      IOUtils.closeQuietly(admin, e -> LOG.warn("", e));
839      IOUtils.closeQuietly(meta, e -> LOG.warn("", e));
840      IOUtils.closeQuietly(connection, e -> LOG.warn("", e));
841    }
842  }
843
844  private static class RegionBoundariesInformation {
845    public byte[] regionName;
846    public byte[] metaFirstKey;
847    public byte[] metaLastKey;
848    public byte[] storesFirstKey;
849    public byte[] storesLastKey;
850
851    @Override
852    public String toString() {
853      return "regionName=" + Bytes.toStringBinary(regionName) + "\nmetaFirstKey="
854        + Bytes.toStringBinary(metaFirstKey) + "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey)
855        + "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) + "\nstoresLastKey="
856        + Bytes.toStringBinary(storesLastKey);
857    }
858  }
859
860  public void checkRegionBoundaries() {
861    try {
862      ByteArrayComparator comparator = new ByteArrayComparator();
863      List<RegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
864      final RegionBoundariesInformation currentRegionBoundariesInformation =
865        new RegionBoundariesInformation();
866      Path hbaseRoot = CommonFSUtils.getRootDir(getConf());
867      for (RegionInfo regionInfo : regions) {
868        Path tableDir = CommonFSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
869        currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
870        // For each region, get the start and stop key from the META and compare them to the
871        // same information from the Stores.
872        Path path = new Path(tableDir, regionInfo.getEncodedName());
873        FileSystem fs = path.getFileSystem(getConf());
874        FileStatus[] files = fs.listStatus(path);
875        // For all the column families in this region...
876        byte[] storeFirstKey = null;
877        byte[] storeLastKey = null;
878        for (FileStatus file : files) {
879          String fileName = file.getPath().toString();
880          fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
881          if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
882            FileStatus[] storeFiles = fs.listStatus(file.getPath());
883            // For all the stores in this column family.
884            for (FileStatus storeFile : storeFiles) {
885              HFile.Reader reader =
886                HFile.createReader(fs, storeFile.getPath(), CacheConfig.DISABLED, true, getConf());
887              if (
888                (reader.getFirstKey() != null)
889                  && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
890                    ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey()) > 0))
891              ) {
892                storeFirstKey = ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey();
893              }
894              if (
895                (reader.getLastKey() != null)
896                  && ((storeLastKey == null) || (comparator.compare(storeLastKey,
897                    ((KeyValue.KeyOnlyKeyValue) reader.getLastKey().get()).getKey())) < 0)
898              ) {
899                storeLastKey = ((KeyValue.KeyOnlyKeyValue) reader.getLastKey().get()).getKey();
900              }
901              reader.close();
902            }
903          }
904        }
905        currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
906        currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
907        currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
908        currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
909        if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
910          currentRegionBoundariesInformation.metaFirstKey = null;
911        if (currentRegionBoundariesInformation.metaLastKey.length == 0)
912          currentRegionBoundariesInformation.metaLastKey = null;
913
914        // For a region to be correct, we need the META start key to be smaller or equal to the
915        // smallest start key from all the stores, and the start key from the next META entry to
916        // be bigger than the last key from all the current stores. First region start key is null;
917        // Last region end key is null; some regions can be empty and not have any store.
918
919        boolean valid = true;
920        // Checking start key.
921        if (
922          (currentRegionBoundariesInformation.storesFirstKey != null)
923            && (currentRegionBoundariesInformation.metaFirstKey != null)
924        ) {
925          valid = valid && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
926            currentRegionBoundariesInformation.metaFirstKey) >= 0;
927        }
928        // Checking stop key.
929        if (
930          (currentRegionBoundariesInformation.storesLastKey != null)
931            && (currentRegionBoundariesInformation.metaLastKey != null)
932        ) {
933          valid = valid && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
934            currentRegionBoundariesInformation.metaLastKey) < 0;
935        }
936        if (!valid) {
937          errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
938            tablesInfo.get(regionInfo.getTable()));
939          LOG.warn("Region's boundaries not aligned between stores and META for:");
940          LOG.warn(Objects.toString(currentRegionBoundariesInformation));
941        }
942      }
943    } catch (IOException e) {
944      LOG.error(e.toString(), e);
945    }
946  }
947
948  /**
949   * Iterates through the list of all orphan/invalid regiondirs.
950   */
951  private void adoptHdfsOrphans(Collection<HbckRegionInfo> orphanHdfsDirs) throws IOException {
952    for (HbckRegionInfo hi : orphanHdfsDirs) {
953      LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
954      adoptHdfsOrphan(hi);
955    }
956  }
957
958  /**
959   * Orphaned regions are regions without a .regioninfo file in them. We "adopt" these orphans by
960   * creating a new region, and moving the column families, recovered edits, WALs, into the new
961   * region dir. We determine the region startkey and endkeys by looking at all of the hfiles inside
962   * the column families to identify the min and max keys. The resulting region will likely violate
963   * table integrity but will be dealt with by merging overlapping regions.
964   */
965  @SuppressWarnings("deprecation")
966  private void adoptHdfsOrphan(HbckRegionInfo hi) throws IOException {
967    Path p = hi.getHdfsRegionDir();
968    FileSystem fs = p.getFileSystem(getConf());
969    FileStatus[] dirs = fs.listStatus(p);
970    if (dirs == null) {
971      LOG.warn("Attempt to adopt orphan hdfs region skipped because no files present in " + p
972        + ". This dir could probably be deleted.");
973      return;
974    }
975
976    TableName tableName = hi.getTableName();
977    HbckTableInfo tableInfo = tablesInfo.get(tableName);
978    Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
979    TableDescriptor template = tableInfo.getTableDescriptor();
980
981    // find min and max key values
982    Pair<byte[], byte[]> orphanRegionRange = null;
983    for (FileStatus cf : dirs) {
984      String cfName = cf.getPath().getName();
985      // TODO Figure out what the special dirs are
986      if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
987
988      FileStatus[] hfiles = fs.listStatus(cf.getPath());
989      for (FileStatus hfile : hfiles) {
990        byte[] start, end;
991        HFile.Reader hf = null;
992        try {
993          hf = HFile.createReader(fs, hfile.getPath(), CacheConfig.DISABLED, true, getConf());
994          Optional<Cell> startKv = hf.getFirstKey();
995          start = CellUtil.cloneRow(startKv.get());
996          Optional<Cell> endKv = hf.getLastKey();
997          end = CellUtil.cloneRow(endKv.get());
998        } catch (Exception ioe) {
999          LOG.warn("Problem reading orphan file " + hfile + ", skipping");
1000          continue;
1001        } finally {
1002          if (hf != null) {
1003            hf.close();
1004          }
1005        }
1006
1007        // expand the range to include the range of all hfiles
1008        if (orphanRegionRange == null) {
1009          // first range
1010          orphanRegionRange = new Pair<>(start, end);
1011        } else {
1012          // TODO add test
1013
1014          // expand range only if the hfile is wider.
1015          if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
1016            orphanRegionRange.setFirst(start);
1017          }
1018          if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0) {
1019            orphanRegionRange.setSecond(end);
1020          }
1021        }
1022      }
1023    }
1024    if (orphanRegionRange == null) {
1025      LOG.warn("No data in dir " + p + ", sidelining data");
1026      fixes++;
1027      sidelineRegionDir(fs, hi);
1028      return;
1029    }
1030    LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", "
1031      + Bytes.toString(orphanRegionRange.getSecond()) + ")");
1032
1033    // create new region on hdfs. move data into place.
1034    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(template.getTableName())
1035      .setStartKey(orphanRegionRange.getFirst())
1036      .setEndKey(Bytes.add(orphanRegionRange.getSecond(), new byte[1])).build();
1037    LOG.info("Creating new region : " + regionInfo);
1038    HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), regionInfo, template);
1039    Path target = region.getRegionFileSystem().getRegionDir();
1040
1041    // rename all the data to new region
1042    mergeRegionDirs(target, hi);
1043    fixes++;
1044  }
1045
1046  /**
1047   * This method determines if there are table integrity errors in HDFS. If there are errors and the
1048   * appropriate "fix" options are enabled, the method will first correct orphan regions making them
1049   * into legit regiondirs, and then reload to merge potentially overlapping regions.
1050   * @return number of table integrity errors found
1051   */
1052  private int restoreHdfsIntegrity() throws IOException, InterruptedException {
1053    // Determine what's on HDFS
1054    LOG.info("Loading HBase regioninfo from HDFS...");
1055    loadHdfsRegionDirs(); // populating regioninfo table.
1056
1057    int errs = errors.getErrorList().size();
1058    // First time just get suggestions.
1059    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1060    checkHdfsIntegrity(false, false);
1061
1062    if (errors.getErrorList().size() == errs) {
1063      LOG.info("No integrity errors.  We are done with this phase. Glorious.");
1064      return 0;
1065    }
1066
1067    if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
1068      adoptHdfsOrphans(orphanHdfsDirs);
1069      // TODO optimize by incrementally adding instead of reloading.
1070    }
1071
1072    // Make sure there are no holes now.
1073    if (shouldFixHdfsHoles()) {
1074      clearState(); // this also resets # fixes.
1075      loadHdfsRegionDirs();
1076      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1077      tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1078    }
1079
1080    // Now we fix overlaps
1081    if (shouldFixHdfsOverlaps()) {
1082      // second pass we fix overlaps.
1083      clearState(); // this also resets # fixes.
1084      loadHdfsRegionDirs();
1085      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1086      tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1087    }
1088
1089    return errors.getErrorList().size();
1090  }
1091
1092  /**
1093   * Scan all the store file names to find any lingering reference files, which refer to some
1094   * none-exiting files. If "fix" option is enabled, any lingering reference file will be sidelined
1095   * if found.
1096   * <p>
1097   * Lingering reference file prevents a region from opening. It has to be fixed before a cluster
1098   * can start properly.
1099   */
1100  private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1101    clearState();
1102    Configuration conf = getConf();
1103    Path hbaseRoot = CommonFSUtils.getRootDir(conf);
1104    FileSystem fs = hbaseRoot.getFileSystem(conf);
1105    LOG.info("Computing mapping of all store files");
1106    Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1107      new FSUtils.ReferenceFileFilter(fs), executor, errors);
1108    errors.print("");
1109    LOG.info("Validating mapping using HDFS state");
1110    for (Path path : allFiles.values()) {
1111      Path referredToFile = StoreFileInfo.getReferredToFile(path);
1112      if (fs.exists(referredToFile)) continue; // good, expected
1113
1114      // Found a lingering reference file
1115      errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1116        "Found lingering reference file " + path);
1117      if (!shouldFixReferenceFiles()) continue;
1118
1119      // Now, trying to fix it since requested
1120      boolean success = false;
1121      String pathStr = path.toString();
1122
1123      // A reference file path should be like
1124      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1125      // Up 5 directories to get the root folder.
1126      // So the file will be sidelined to a similar folder structure.
1127      int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1128      for (int i = 0; index > 0 && i < 5; i++) {
1129        index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1130      }
1131      if (index > 0) {
1132        Path rootDir = getSidelineDir();
1133        Path dst = new Path(rootDir, pathStr.substring(index + 1));
1134        fs.mkdirs(dst.getParent());
1135        LOG.info("Trying to sideline reference file " + path + " to " + dst);
1136        setShouldRerun();
1137
1138        success = fs.rename(path, dst);
1139        debugLsr(dst);
1140
1141      }
1142      if (!success) {
1143        LOG.error("Failed to sideline reference file " + path);
1144      }
1145    }
1146  }
1147
1148  /**
1149   * Scan all the store file names to find any lingering HFileLink files, which refer to some
1150   * none-exiting files. If "fix" option is enabled, any lingering HFileLink file will be sidelined
1151   * if found.
1152   */
1153  private void offlineHLinkFileRepair() throws IOException, InterruptedException {
1154    Configuration conf = getConf();
1155    Path hbaseRoot = CommonFSUtils.getRootDir(conf);
1156    FileSystem fs = hbaseRoot.getFileSystem(conf);
1157    LOG.info("Computing mapping of all link files");
1158    Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1159      new FSUtils.HFileLinkFilter(), executor, errors);
1160    errors.print("");
1161
1162    LOG.info("Validating mapping using HDFS state");
1163    for (Path path : allFiles.values()) {
1164      // building HFileLink object to gather locations
1165      HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
1166      if (actualLink.exists(fs)) continue; // good, expected
1167
1168      // Found a lingering HFileLink
1169      errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
1170      if (!shouldFixHFileLinks()) continue;
1171
1172      // Now, trying to fix it since requested
1173      setShouldRerun();
1174
1175      // An HFileLink path should be like
1176      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
1177      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same
1178      // folder structure.
1179      boolean success = sidelineFile(fs, hbaseRoot, path);
1180
1181      if (!success) {
1182        LOG.error("Failed to sideline HFileLink file " + path);
1183      }
1184
1185      // An HFileLink backreference path should be like
1186      // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
1187      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same
1188      // folder structure.
1189      Path backRefPath = FileLink.getBackReferencesDir(
1190        HFileArchiveUtil.getStoreArchivePath(conf,
1191          HFileLink.getReferencedTableName(path.getName().toString()),
1192          HFileLink.getReferencedRegionName(path.getName().toString()), path.getParent().getName()),
1193        HFileLink.getReferencedHFileName(path.getName().toString()));
1194      success = sidelineFile(fs, hbaseRoot, backRefPath);
1195
1196      if (!success) {
1197        LOG.error("Failed to sideline HFileLink backreference file " + path);
1198      }
1199    }
1200  }
1201
1202  private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
1203    URI uri = hbaseRoot.toUri().relativize(path.toUri());
1204    if (uri.isAbsolute()) return false;
1205    String relativePath = uri.getPath();
1206    Path rootDir = getSidelineDir();
1207    Path dst = new Path(rootDir, relativePath);
1208    boolean pathCreated = fs.mkdirs(dst.getParent());
1209    if (!pathCreated) {
1210      LOG.error("Failed to create path: " + dst.getParent());
1211      return false;
1212    }
1213    LOG.info("Trying to sideline file " + path + " to " + dst);
1214    return fs.rename(path, dst);
1215  }
1216
1217  /**
1218   * TODO -- need to add tests for this.
1219   */
1220  private void reportEmptyMetaCells() {
1221    errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: "
1222      + emptyRegionInfoQualifiers.size());
1223    if (details) {
1224      for (Result r : emptyRegionInfoQualifiers) {
1225        errors.print("  " + r);
1226      }
1227    }
1228  }
1229
1230  /**
1231   * TODO -- need to add tests for this.
1232   */
1233  private void reportTablesInFlux() {
1234    AtomicInteger numSkipped = new AtomicInteger(0);
1235    TableDescriptor[] allTables = getTables(numSkipped);
1236    errors.print("Number of Tables: " + allTables.length);
1237    if (details) {
1238      if (numSkipped.get() > 0) {
1239        errors.detail("Number of Tables in flux: " + numSkipped.get());
1240      }
1241      for (TableDescriptor td : allTables) {
1242        errors.detail("  Table: " + td.getTableName() + "\t" + (td.isReadOnly() ? "ro" : "rw")
1243          + "\t" + (td.isMetaRegion() ? "META" : "    ") + "\t" + " families: "
1244          + td.getColumnFamilyCount());
1245      }
1246    }
1247  }
1248
1249  public HbckErrorReporter getErrors() {
1250    return errors;
1251  }
1252
1253  /**
1254   * Populate hbi's from regionInfos loaded from file system.
1255   */
1256  private SortedMap<TableName, HbckTableInfo> loadHdfsRegionInfos()
1257    throws IOException, InterruptedException {
1258    tablesInfo.clear(); // regenerating the data
1259    // generate region split structure
1260    Collection<HbckRegionInfo> hbckRegionInfos = regionInfoMap.values();
1261
1262    // Parallelized read of .regioninfo files.
1263    List<WorkItemHdfsRegionInfo> hbis = new ArrayList<>(hbckRegionInfos.size());
1264    List<Future<Void>> hbiFutures;
1265
1266    for (HbckRegionInfo hbi : hbckRegionInfos) {
1267      WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1268      hbis.add(work);
1269    }
1270
1271    // Submit and wait for completion
1272    hbiFutures = executor.invokeAll(hbis);
1273
1274    for (int i = 0; i < hbiFutures.size(); i++) {
1275      WorkItemHdfsRegionInfo work = hbis.get(i);
1276      Future<Void> f = hbiFutures.get(i);
1277      try {
1278        f.get();
1279      } catch (ExecutionException e) {
1280        LOG.warn("Failed to read .regioninfo file for region " + work.hbi.getRegionNameAsString(),
1281          e.getCause());
1282      }
1283    }
1284
1285    Path hbaseRoot = CommonFSUtils.getRootDir(getConf());
1286    FileSystem fs = hbaseRoot.getFileSystem(getConf());
1287    // serialized table info gathering.
1288    for (HbckRegionInfo hbi : hbckRegionInfos) {
1289
1290      if (hbi.getHdfsHRI() == null) {
1291        // was an orphan
1292        continue;
1293      }
1294
1295      // get table name from hdfs, populate various HBaseFsck tables.
1296      TableName tableName = hbi.getTableName();
1297      if (tableName == null) {
1298        // There was an entry in hbase:meta not in the HDFS?
1299        LOG.warn("tableName was null for: " + hbi);
1300        continue;
1301      }
1302
1303      HbckTableInfo modTInfo = tablesInfo.get(tableName);
1304      if (modTInfo == null) {
1305        // only executed once per table.
1306        modTInfo = new HbckTableInfo(tableName, this);
1307        tablesInfo.put(tableName, modTInfo);
1308        try {
1309          TableDescriptor htd =
1310            FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1311          modTInfo.htds.add(htd);
1312        } catch (IOException ioe) {
1313          if (!orphanTableDirs.containsKey(tableName)) {
1314            LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1315            // should only report once for each table
1316            errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1317              "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1318            Set<String> columns = new HashSet<>();
1319            orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1320          }
1321        }
1322      }
1323      if (!hbi.isSkipChecks()) {
1324        modTInfo.addRegionInfo(hbi);
1325      }
1326    }
1327
1328    loadTableInfosForTablesWithNoRegion();
1329    errors.print("");
1330
1331    return tablesInfo;
1332  }
1333
1334  /**
1335   * To get the column family list according to the column family dirs
1336   * @return a set of column families
1337   */
1338  private Set<String> getColumnFamilyList(Set<String> columns, HbckRegionInfo hbi)
1339    throws IOException {
1340    Path regionDir = hbi.getHdfsRegionDir();
1341    FileSystem fs = regionDir.getFileSystem(getConf());
1342    FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1343    for (FileStatus subdir : subDirs) {
1344      String columnfamily = subdir.getPath().getName();
1345      columns.add(columnfamily);
1346    }
1347    return columns;
1348  }
1349
1350  /**
1351   * To fabricate a .tableinfo file with following contents<br>
1352   * 1. the correct tablename <br>
1353   * 2. the correct colfamily list<br>
1354   * 3. the default properties for both {@link TableDescriptor} and
1355   * {@link ColumnFamilyDescriptor}<br>
1356   */
1357  private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1358    Set<String> columns) throws IOException {
1359    if (columns == null || columns.isEmpty()) return false;
1360    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
1361    for (String columnfamimly : columns) {
1362      builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(columnfamimly));
1363    }
1364    fstd.createTableDescriptor(builder.build(), true);
1365    return true;
1366  }
1367
1368  /**
1369   * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1370   */
1371  public void fixEmptyMetaCells() throws IOException {
1372    if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1373      LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1374      for (Result region : emptyRegionInfoQualifiers) {
1375        deleteMetaRegion(region.getRow());
1376        errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1377      }
1378      emptyRegionInfoQualifiers.clear();
1379    }
1380  }
1381
1382  /**
1383   * To fix orphan table by creating a .tableinfo file under tableDir <br>
1384   * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1385   * 2. else create a default .tableinfo file with following items<br>
1386   * &nbsp;2.1 the correct tablename <br>
1387   * &nbsp;2.2 the correct colfamily list<br>
1388   * &nbsp;2.3 the default properties for both {@link TableDescriptor} and
1389   * {@link ColumnFamilyDescriptor}<br>
1390   */
1391  public void fixOrphanTables() throws IOException {
1392    if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1393
1394      List<TableName> tmpList = new ArrayList<>(orphanTableDirs.keySet().size());
1395      tmpList.addAll(orphanTableDirs.keySet());
1396      TableDescriptor[] htds = getTableDescriptors(tmpList);
1397      Iterator<Entry<TableName, Set<String>>> iter = orphanTableDirs.entrySet().iterator();
1398      int j = 0;
1399      int numFailedCase = 0;
1400      FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1401      while (iter.hasNext()) {
1402        Entry<TableName, Set<String>> entry = iter.next();
1403        TableName tableName = entry.getKey();
1404        LOG.info("Trying to fix orphan table error: " + tableName);
1405        if (j < htds.length) {
1406          if (tableName.equals(htds[j].getTableName())) {
1407            TableDescriptor htd = htds[j];
1408            LOG.info("fixing orphan table: " + tableName + " from cache");
1409            fstd.createTableDescriptor(htd, true);
1410            j++;
1411            iter.remove();
1412          }
1413        } else {
1414          if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1415            LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1416            LOG.warn(
1417              "Strongly recommend to modify the TableDescriptor if necessary for: " + tableName);
1418            iter.remove();
1419          } else {
1420            LOG.error("Unable to create default .tableinfo for " + tableName
1421              + " while missing column family information");
1422            numFailedCase++;
1423          }
1424        }
1425        fixes++;
1426      }
1427
1428      if (orphanTableDirs.isEmpty()) {
1429        // all orphanTableDirs are luckily recovered
1430        // re-run doFsck after recovering the .tableinfo file
1431        setShouldRerun();
1432        LOG.warn(
1433          "Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1434      } else if (numFailedCase > 0) {
1435        LOG.error("Failed to fix " + numFailedCase + " OrphanTables with default .tableinfo files");
1436      }
1437
1438    }
1439    // cleanup the list
1440    orphanTableDirs.clear();
1441
1442  }
1443
1444  /**
1445   * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1446   */
1447  private void logParallelMerge() {
1448    if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1449      LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to"
1450        + " false to run serially.");
1451    } else {
1452      LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to"
1453        + " true to run in parallel.");
1454    }
1455  }
1456
1457  private SortedMap<TableName, HbckTableInfo> checkHdfsIntegrity(boolean fixHoles,
1458    boolean fixOverlaps) throws IOException {
1459    LOG.info("Checking HBase region split map from HDFS data...");
1460    logParallelMerge();
1461    for (HbckTableInfo tInfo : tablesInfo.values()) {
1462      TableIntegrityErrorHandler handler;
1463      if (fixHoles || fixOverlaps) {
1464        handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(), fixHoles, fixOverlaps);
1465      } else {
1466        handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1467      }
1468      if (!tInfo.checkRegionChain(handler)) {
1469        // should dump info as well.
1470        errors.report("Found inconsistency in table " + tInfo.getName());
1471      }
1472    }
1473    return tablesInfo;
1474  }
1475
1476  Path getSidelineDir() throws IOException {
1477    if (sidelineDir == null) {
1478      Path hbaseDir = CommonFSUtils.getRootDir(getConf());
1479      Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1480      sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-" + startMillis);
1481    }
1482    return sidelineDir;
1483  }
1484
1485  /**
1486   * Sideline a region dir (instead of deleting it)
1487   */
1488  Path sidelineRegionDir(FileSystem fs, HbckRegionInfo hi) throws IOException {
1489    return sidelineRegionDir(fs, null, hi);
1490  }
1491
1492  /**
1493   * Sideline a region dir (instead of deleting it)
1494   * @param parentDir if specified, the region will be sidelined to folder like
1495   *                  {@literal .../parentDir/<table name>/<region name>}. The purpose is to group
1496   *                  together similar regions sidelined, for example, those regions should be bulk
1497   *                  loaded back later on. If NULL, it is ignored.
1498   */
1499  Path sidelineRegionDir(FileSystem fs, String parentDir, HbckRegionInfo hi) throws IOException {
1500    TableName tableName = hi.getTableName();
1501    Path regionDir = hi.getHdfsRegionDir();
1502
1503    if (!fs.exists(regionDir)) {
1504      LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1505      return null;
1506    }
1507
1508    Path rootDir = getSidelineDir();
1509    if (parentDir != null) {
1510      rootDir = new Path(rootDir, parentDir);
1511    }
1512    Path sidelineTableDir = CommonFSUtils.getTableDir(rootDir, tableName);
1513    Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1514    fs.mkdirs(sidelineRegionDir);
1515    boolean success = false;
1516    FileStatus[] cfs = fs.listStatus(regionDir);
1517    if (cfs == null) {
1518      LOG.info("Region dir is empty: " + regionDir);
1519    } else {
1520      for (FileStatus cf : cfs) {
1521        Path src = cf.getPath();
1522        Path dst = new Path(sidelineRegionDir, src.getName());
1523        if (fs.isFile(src)) {
1524          // simple file
1525          success = fs.rename(src, dst);
1526          if (!success) {
1527            String msg = "Unable to rename file " + src + " to " + dst;
1528            LOG.error(msg);
1529            throw new IOException(msg);
1530          }
1531          continue;
1532        }
1533
1534        // is a directory.
1535        fs.mkdirs(dst);
1536
1537        LOG.info("Sidelining files from " + src + " into containing region " + dst);
1538        // FileSystem.rename is inconsistent with directories -- if the
1539        // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1540        // it moves the src into the dst dir resulting in (foo/a/b). If
1541        // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1542        FileStatus[] hfiles = fs.listStatus(src);
1543        if (hfiles != null && hfiles.length > 0) {
1544          for (FileStatus hfile : hfiles) {
1545            success = fs.rename(hfile.getPath(), dst);
1546            if (!success) {
1547              String msg = "Unable to rename file " + src + " to " + dst;
1548              LOG.error(msg);
1549              throw new IOException(msg);
1550            }
1551          }
1552        }
1553        LOG.debug("Sideline directory contents:");
1554        debugLsr(sidelineRegionDir);
1555      }
1556    }
1557
1558    LOG.info("Removing old region dir: " + regionDir);
1559    success = fs.delete(regionDir, true);
1560    if (!success) {
1561      String msg = "Unable to delete dir " + regionDir;
1562      LOG.error(msg);
1563      throw new IOException(msg);
1564    }
1565    return sidelineRegionDir;
1566  }
1567
1568  /**
1569   * Load the list of disabled tables in ZK into local set.
1570   */
1571  private void loadTableStates() throws IOException {
1572    tableStates = MetaTableAccessor.getTableStates(connection);
1573    // Add hbase:meta so this tool keeps working. In hbase2, meta is always enabled though it
1574    // has no entry in the table states. HBCK doesn't work right w/ hbase2 but just do this in
1575    // meantime.
1576    this.tableStates.put(TableName.META_TABLE_NAME,
1577      new TableState(TableName.META_TABLE_NAME, TableState.State.ENABLED));
1578  }
1579
1580  /**
1581   * Check if the specified region's table is disabled.
1582   * @param tableName table to check status of
1583   */
1584  boolean isTableDisabled(TableName tableName) {
1585    return tableStates.containsKey(tableName)
1586      && tableStates.get(tableName).inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1587  }
1588
1589  /**
1590   * Scan HDFS for all regions, recording their information into regionInfoMap
1591   */
1592  public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1593    Path rootDir = CommonFSUtils.getRootDir(getConf());
1594    FileSystem fs = rootDir.getFileSystem(getConf());
1595
1596    // list all tables from HDFS
1597    List<FileStatus> tableDirs = Lists.newArrayList();
1598
1599    boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1600
1601    List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1602    for (Path path : paths) {
1603      TableName tableName = CommonFSUtils.getTableName(path);
1604      if (
1605        (!checkMetaOnly && isTableIncluded(tableName))
1606          || tableName.equals(TableName.META_TABLE_NAME)
1607      ) {
1608        tableDirs.add(fs.getFileStatus(path));
1609      }
1610    }
1611
1612    // verify that version file exists
1613    if (!foundVersionFile) {
1614      errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1615        "Version file does not exist in root dir " + rootDir);
1616      if (shouldFixVersionFile()) {
1617        LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME + " file.");
1618        setShouldRerun();
1619        FSUtils.setVersion(fs, rootDir,
1620          getConf().getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000),
1621          getConf().getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1622            HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1623      }
1624    }
1625
1626    // Avoid multithreading at table-level because already multithreaded internally at
1627    // region-level. Additionally multithreading at table-level can lead to deadlock
1628    // if there are many tables in the cluster. Since there are a limited # of threads
1629    // in the executor's thread pool and if we multithread at the table-level by putting
1630    // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1631    // executor tied up solely in waiting for the tables' region-level calls to complete.
1632    // If there are enough tables then there will be no actual threads in the pool left
1633    // for the region-level callables to be serviced.
1634    for (FileStatus tableDir : tableDirs) {
1635      LOG.debug("Loading region dirs from " + tableDir.getPath());
1636      WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1637      try {
1638        item.call();
1639      } catch (ExecutionException e) {
1640        LOG.warn("Could not completely load table dir " + tableDir.getPath(), e.getCause());
1641      }
1642    }
1643    errors.print("");
1644  }
1645
1646  /**
1647   * Record the location of the hbase:meta region as found in ZooKeeper.
1648   */
1649  private boolean recordMetaRegion() throws IOException {
1650    List<HRegionLocation> locs;
1651    try (RegionLocator locator = connection.getRegionLocator(TableName.META_TABLE_NAME)) {
1652      locs = locator.getRegionLocations(HConstants.EMPTY_START_ROW, true);
1653    }
1654    if (locs == null || locs.isEmpty()) {
1655      errors.reportError(ERROR_CODE.NULL_META_REGION, "META region was not found in ZooKeeper");
1656      return false;
1657    }
1658    for (HRegionLocation metaLocation : locs) {
1659      // Check if Meta region is valid and existing
1660      if (metaLocation == null) {
1661        errors.reportError(ERROR_CODE.NULL_META_REGION, "META region location is null");
1662        return false;
1663      }
1664      if (metaLocation.getRegion() == null) {
1665        errors.reportError(ERROR_CODE.NULL_META_REGION, "META location regionInfo is null");
1666        return false;
1667      }
1668      if (metaLocation.getHostname() == null) {
1669        errors.reportError(ERROR_CODE.NULL_META_REGION, "META location hostName is null");
1670        return false;
1671      }
1672      ServerName sn = metaLocation.getServerName();
1673      HbckRegionInfo.MetaEntry m = new HbckRegionInfo.MetaEntry(metaLocation.getRegion(), sn,
1674        EnvironmentEdgeManager.currentTime());
1675      HbckRegionInfo hbckRegionInfo = regionInfoMap.get(metaLocation.getRegion().getEncodedName());
1676      if (hbckRegionInfo == null) {
1677        regionInfoMap.put(metaLocation.getRegion().getEncodedName(), new HbckRegionInfo(m));
1678      } else {
1679        hbckRegionInfo.setMetaEntry(m);
1680      }
1681    }
1682    return true;
1683  }
1684
1685  private ZKWatcher createZooKeeperWatcher() throws IOException {
1686    return new ZKWatcher(getConf(), "hbase Fsck", new Abortable() {
1687      @Override
1688      public void abort(String why, Throwable e) {
1689        LOG.error(why, e);
1690        System.exit(1);
1691      }
1692
1693      @Override
1694      public boolean isAborted() {
1695        return false;
1696      }
1697
1698    });
1699  }
1700
1701  /**
1702   * Contacts each regionserver and fetches metadata about regions.
1703   * @param regionServerList - the list of region servers to connect to
1704   * @throws IOException if a remote or network exception occurs
1705   */
1706  void processRegionServers(Collection<ServerName> regionServerList)
1707    throws IOException, InterruptedException {
1708
1709    List<WorkItemRegion> workItems = new ArrayList<>(regionServerList.size());
1710    List<Future<Void>> workFutures;
1711
1712    // loop to contact each region server in parallel
1713    for (ServerName rsinfo : regionServerList) {
1714      workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1715    }
1716
1717    workFutures = executor.invokeAll(workItems);
1718
1719    for (int i = 0; i < workFutures.size(); i++) {
1720      WorkItemRegion item = workItems.get(i);
1721      Future<Void> f = workFutures.get(i);
1722      try {
1723        f.get();
1724      } catch (ExecutionException e) {
1725        LOG.warn("Could not process regionserver {}", item.rsinfo.getAddress(), e.getCause());
1726      }
1727    }
1728  }
1729
1730  /**
1731   * Check consistency of all regions that have been found in previous phases.
1732   */
1733  private void checkAndFixConsistency() throws IOException, KeeperException, InterruptedException {
1734    // Divide the checks in two phases. One for default/primary replicas and another
1735    // for the non-primary ones. Keeps code cleaner this way.
1736
1737    List<CheckRegionConsistencyWorkItem> workItems = new ArrayList<>(regionInfoMap.size());
1738    for (java.util.Map.Entry<String, HbckRegionInfo> e : regionInfoMap.entrySet()) {
1739      if (e.getValue().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1740        workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1741      }
1742    }
1743    checkRegionConsistencyConcurrently(workItems);
1744
1745    boolean prevHdfsCheck = shouldCheckHdfs();
1746    setCheckHdfs(false); // replicas don't have any hdfs data
1747    // Run a pass over the replicas and fix any assignment issues that exist on the currently
1748    // deployed/undeployed replicas.
1749    List<CheckRegionConsistencyWorkItem> replicaWorkItems = new ArrayList<>(regionInfoMap.size());
1750    for (java.util.Map.Entry<String, HbckRegionInfo> e : regionInfoMap.entrySet()) {
1751      if (e.getValue().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
1752        replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1753      }
1754    }
1755    checkRegionConsistencyConcurrently(replicaWorkItems);
1756    setCheckHdfs(prevHdfsCheck);
1757
1758    // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
1759    // not get accurate state of the hbase if continuing. The config here allows users to tune
1760    // the tolerance of number of skipped region.
1761    // TODO: evaluate the consequence to continue the hbck operation without config.
1762    int terminateThreshold = getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
1763    int numOfSkippedRegions = skippedRegions.size();
1764    if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
1765      throw new IOException(
1766        numOfSkippedRegions + " region(s) could not be checked or repaired.  See logs for detail.");
1767    }
1768
1769    if (shouldCheckHdfs()) {
1770      checkAndFixTableStates();
1771    }
1772  }
1773
1774  /**
1775   * Check consistency of all regions using multiple threads concurrently.
1776   */
1777  private void
1778    checkRegionConsistencyConcurrently(final List<CheckRegionConsistencyWorkItem> workItems)
1779      throws IOException, KeeperException, InterruptedException {
1780    if (workItems.isEmpty()) {
1781      return; // nothing to check
1782    }
1783
1784    List<Future<Void>> workFutures = executor.invokeAll(workItems);
1785    for (Future<Void> f : workFutures) {
1786      try {
1787        f.get();
1788      } catch (ExecutionException e1) {
1789        LOG.warn("Could not check region consistency ", e1.getCause());
1790        if (e1.getCause() instanceof IOException) {
1791          throw (IOException) e1.getCause();
1792        } else if (e1.getCause() instanceof KeeperException) {
1793          throw (KeeperException) e1.getCause();
1794        } else if (e1.getCause() instanceof InterruptedException) {
1795          throw (InterruptedException) e1.getCause();
1796        } else {
1797          throw new IOException(e1.getCause());
1798        }
1799      }
1800    }
1801  }
1802
1803  class CheckRegionConsistencyWorkItem implements Callable<Void> {
1804    private final String key;
1805    private final HbckRegionInfo hbi;
1806
1807    CheckRegionConsistencyWorkItem(String key, HbckRegionInfo hbi) {
1808      this.key = key;
1809      this.hbi = hbi;
1810    }
1811
1812    @Override
1813    public synchronized Void call() throws Exception {
1814      try {
1815        checkRegionConsistency(key, hbi);
1816      } catch (Exception e) {
1817        // If the region is non-META region, skip this region and send warning/error message; if
1818        // the region is META region, we should not continue.
1819        LOG.warn(
1820          "Unable to complete check or repair the region '" + hbi.getRegionNameAsString() + "'.",
1821          e);
1822        if (hbi.getHdfsHRI().isMetaRegion()) {
1823          throw e;
1824        }
1825        LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
1826        addSkippedRegion(hbi);
1827      }
1828      return null;
1829    }
1830  }
1831
1832  private void addSkippedRegion(final HbckRegionInfo hbi) {
1833    Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
1834    if (skippedRegionNames == null) {
1835      skippedRegionNames = new HashSet<>();
1836    }
1837    skippedRegionNames.add(hbi.getRegionNameAsString());
1838    skippedRegions.put(hbi.getTableName(), skippedRegionNames);
1839  }
1840
1841  /**
1842   * Check and fix table states, assumes full info available: - tableInfos - empty tables loaded
1843   */
1844  private void checkAndFixTableStates() throws IOException {
1845    // first check dangling states
1846    for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
1847      TableName tableName = entry.getKey();
1848      TableState tableState = entry.getValue();
1849      HbckTableInfo tableInfo = tablesInfo.get(tableName);
1850      if (isTableIncluded(tableName) && !tableName.isSystemTable() && tableInfo == null) {
1851        if (fixMeta) {
1852          MetaTableAccessor.deleteTableState(connection, tableName);
1853          TableState state = MetaTableAccessor.getTableState(connection, tableName);
1854          if (state != null) {
1855            errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1856              tableName + " unable to delete dangling table state " + tableState);
1857          }
1858        } else if (!checkMetaOnly) {
1859          // dangling table state in meta if checkMetaOnly is false. If checkMetaOnly is
1860          // true, tableInfo will be null as tablesInfo are not polulated for all tables from hdfs
1861          errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1862            tableName + " has dangling table state " + tableState);
1863        }
1864      }
1865    }
1866    // check that all tables have states
1867    for (TableName tableName : tablesInfo.keySet()) {
1868      if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
1869        if (fixMeta) {
1870          MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
1871          TableState newState = MetaTableAccessor.getTableState(connection, tableName);
1872          if (newState == null) {
1873            errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1874              "Unable to change state for table " + tableName + " in meta ");
1875          }
1876        } else {
1877          errors.reportError(ERROR_CODE.NO_TABLE_STATE, tableName + " has no state in meta ");
1878        }
1879      }
1880    }
1881  }
1882
1883  private void preCheckPermission() throws IOException {
1884    if (shouldIgnorePreCheckPermission()) {
1885      return;
1886    }
1887
1888    Path hbaseDir = CommonFSUtils.getRootDir(getConf());
1889    FileSystem fs = hbaseDir.getFileSystem(getConf());
1890    UserProvider userProvider = UserProvider.instantiate(getConf());
1891    UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1892    FileStatus[] files = fs.listStatus(hbaseDir);
1893    for (FileStatus file : files) {
1894      try {
1895        fs.access(file.getPath(), FsAction.WRITE);
1896      } catch (AccessControlException ace) {
1897        LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1898        errors.reportError(ERROR_CODE.WRONG_USAGE,
1899          "Current user " + ugi.getUserName() + " does not have write perms to " + file.getPath()
1900            + ". Please rerun hbck as hdfs user " + file.getOwner());
1901        throw ace;
1902      }
1903    }
1904  }
1905
1906  /**
1907   * Deletes region from meta table
1908   */
1909  private void deleteMetaRegion(HbckRegionInfo hi) throws IOException {
1910    deleteMetaRegion(hi.getMetaEntry().getRegionInfo().getRegionName());
1911  }
1912
1913  /**
1914   * Deletes region from meta table
1915   */
1916  private void deleteMetaRegion(byte[] metaKey) throws IOException {
1917    Delete d = new Delete(metaKey);
1918    meta.delete(d);
1919    LOG.info("Deleted " + Bytes.toString(metaKey) + " from META");
1920  }
1921
1922  /**
1923   * Reset the split parent region info in meta table
1924   */
1925  private void resetSplitParent(HbckRegionInfo hi) throws IOException {
1926    RowMutations mutations = new RowMutations(hi.getMetaEntry().getRegionInfo().getRegionName());
1927    Delete d = new Delete(hi.getMetaEntry().getRegionInfo().getRegionName());
1928    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1929    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1930    mutations.add(d);
1931
1932    RegionInfo hri = RegionInfoBuilder.newBuilder(hi.getMetaEntry().getRegionInfo())
1933      .setOffline(false).setSplit(false).build();
1934    Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1935    mutations.add(p);
1936
1937    meta.mutateRow(mutations);
1938    LOG.info("Reset split parent " + hi.getMetaEntry().getRegionInfo().getRegionNameAsString()
1939      + " in META");
1940  }
1941
1942  /**
1943   * This backwards-compatibility wrapper for permanently offlining a region that should not be
1944   * alive. If the region server does not support the "offline" method, it will use the closest
1945   * unassign method instead. This will basically work until one attempts to disable or delete the
1946   * affected table. The problem has to do with in-memory only master state, so restarting the
1947   * HMaster or failing over to another should fix this.
1948   */
1949  void offline(byte[] regionName) throws IOException {
1950    String regionString = Bytes.toStringBinary(regionName);
1951    if (!rsSupportsOffline) {
1952      LOG.warn("Using unassign region " + regionString
1953        + " instead of using offline method, you should" + " restart HMaster after these repairs");
1954      admin.unassign(regionName, true);
1955      return;
1956    }
1957
1958    // first time we assume the rs's supports #offline.
1959    try {
1960      LOG.info("Offlining region " + regionString);
1961      admin.offline(regionName);
1962    } catch (IOException ioe) {
1963      String notFoundMsg =
1964        "java.lang.NoSuchMethodException: " + "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1965      if (ioe.getMessage().contains(notFoundMsg)) {
1966        LOG.warn(
1967          "Using unassign region " + regionString + " instead of using offline method, you should"
1968            + " restart HMaster after these repairs");
1969        rsSupportsOffline = false; // in the future just use unassign
1970        admin.unassign(regionName, true);
1971        return;
1972      }
1973      throw ioe;
1974    }
1975  }
1976
1977  /**
1978   * Attempts to undeploy a region from a region server based in information in META. Any operations
1979   * that modify the file system should make sure that its corresponding region is not deployed to
1980   * prevent data races. A separate call is required to update the master in-memory region state
1981   * kept in the AssignementManager. Because disable uses this state instead of that found in META,
1982   * we can't seem to cleanly disable/delete tables that have been hbck fixed. When used on a
1983   * version of HBase that does not have the offline ipc call exposed on the master (&lt;0.90.5,
1984   * &lt;0.92.0) a master restart or failover may be required.
1985   */
1986  void closeRegion(HbckRegionInfo hi) throws IOException, InterruptedException {
1987    if (hi.getMetaEntry() == null && hi.getHdfsEntry() == null) {
1988      undeployRegions(hi);
1989      return;
1990    }
1991
1992    // get assignment info and hregioninfo from meta.
1993    Get get = new Get(hi.getRegionName());
1994    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1995    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1996    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1997    // also get the locations of the replicas to close if the primary region is being closed
1998    if (hi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1999      int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2000      for (int i = 0; i < numReplicas; i++) {
2001        get.addColumn(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(i));
2002        get.addColumn(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getStartCodeColumn(i));
2003      }
2004    }
2005    Result r = meta.get(get);
2006    RegionLocations rl = CatalogFamilyFormat.getRegionLocations(r);
2007    if (rl == null) {
2008      LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2009        + " since meta does not have handle to reach it");
2010      return;
2011    }
2012    for (HRegionLocation h : rl.getRegionLocations()) {
2013      ServerName serverName = h.getServerName();
2014      if (serverName == null) {
2015        errors.reportError("Unable to close region " + hi.getRegionNameAsString()
2016          + " because meta does not " + "have handle to reach it.");
2017        continue;
2018      }
2019      RegionInfo hri = h.getRegion();
2020      if (hri == null) {
2021        LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2022          + " because hbase:meta had invalid or missing " + HConstants.CATALOG_FAMILY_STR + ":"
2023          + Bytes.toString(HConstants.REGIONINFO_QUALIFIER) + " qualifier value.");
2024        continue;
2025      }
2026      // close the region -- close files and remove assignment
2027      HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2028    }
2029  }
2030
2031  private void undeployRegions(HbckRegionInfo hi) throws IOException, InterruptedException {
2032    undeployRegionsForHbi(hi);
2033    // undeploy replicas of the region (but only if the method is invoked for the primary)
2034    if (hi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2035      return;
2036    }
2037    int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2038    for (int i = 1; i < numReplicas; i++) {
2039      if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2040      RegionInfo hri =
2041        RegionReplicaUtil.getRegionInfoForReplica(hi.getPrimaryHRIForDeployedReplica(), i);
2042      HbckRegionInfo h = regionInfoMap.get(hri.getEncodedName());
2043      if (h != null) {
2044        undeployRegionsForHbi(h);
2045        // set skip checks; we undeployed it, and we don't want to evaluate this anymore
2046        // in consistency checks
2047        h.setSkipChecks(true);
2048      }
2049    }
2050  }
2051
2052  private void undeployRegionsForHbi(HbckRegionInfo hi) throws IOException, InterruptedException {
2053    for (HbckRegionInfo.OnlineEntry rse : hi.getOnlineEntries()) {
2054      LOG.debug("Undeploy region " + rse.getRegionInfo() + " from " + rse.getServerName());
2055      try {
2056        HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.getServerName(),
2057          rse.getRegionInfo());
2058        offline(rse.getRegionInfo().getRegionName());
2059      } catch (IOException ioe) {
2060        LOG.warn("Got exception when attempting to offline region "
2061          + Bytes.toString(rse.getRegionInfo().getRegionName()), ioe);
2062      }
2063    }
2064  }
2065
2066  private void tryAssignmentRepair(HbckRegionInfo hbi, String msg)
2067    throws IOException, KeeperException, InterruptedException {
2068    // If we are trying to fix the errors
2069    if (shouldFixAssignments()) {
2070      errors.print(msg);
2071      undeployRegions(hbi);
2072      setShouldRerun();
2073      RegionInfo hri = hbi.getHdfsHRI();
2074      if (hri == null) {
2075        hri = hbi.getMetaEntry().getRegionInfo();
2076      }
2077      HBaseFsckRepair.fixUnassigned(admin, hri);
2078      HBaseFsckRepair.waitUntilAssigned(admin, hri);
2079
2080      // also assign replicas if needed (do it only when this call operates on a primary replica)
2081      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) return;
2082      int replicationCount = admin.getDescriptor(hri.getTable()).getRegionReplication();
2083      for (int i = 1; i < replicationCount; i++) {
2084        hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2085        HbckRegionInfo h = regionInfoMap.get(hri.getEncodedName());
2086        if (h != null) {
2087          undeployRegions(h);
2088          // set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2089          // in consistency checks
2090          h.setSkipChecks(true);
2091        }
2092        HBaseFsckRepair.fixUnassigned(admin, hri);
2093        HBaseFsckRepair.waitUntilAssigned(admin, hri);
2094      }
2095
2096    }
2097  }
2098
2099  /**
2100   * Check a single region for consistency and correct deployment.
2101   */
2102  private void checkRegionConsistency(final String key, final HbckRegionInfo hbi)
2103    throws IOException, KeeperException, InterruptedException {
2104
2105    if (hbi.isSkipChecks()) return;
2106    String descriptiveName = hbi.toString();
2107    boolean inMeta = hbi.getMetaEntry() != null;
2108    // In case not checking HDFS, assume the region is on HDFS
2109    boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2110    boolean hasMetaAssignment = inMeta && hbi.getMetaEntry().regionServer != null;
2111    boolean isDeployed = !hbi.getDeployedOn().isEmpty();
2112    boolean isMultiplyDeployed = hbi.getDeployedOn().size() > 1;
2113    boolean deploymentMatchesMeta = hasMetaAssignment && isDeployed && !isMultiplyDeployed
2114      && hbi.getMetaEntry().regionServer.equals(hbi.getDeployedOn().get(0));
2115    boolean splitParent = inMeta && hbi.getMetaEntry().getRegionInfo().isSplit()
2116      && hbi.getMetaEntry().getRegionInfo().isOffline();
2117    boolean shouldBeDeployed =
2118      inMeta && !isTableDisabled(hbi.getMetaEntry().getRegionInfo().getTable());
2119    boolean recentlyModified =
2120      inHdfs && hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2121
2122    // ========== First the healthy cases =============
2123    if (hbi.containsOnlyHdfsEdits()) {
2124      return;
2125    }
2126    if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2127      return;
2128    } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2129      LOG.info("Region " + descriptiveName + " is in META, and in a disabled "
2130        + "tabled that is not deployed");
2131      return;
2132    } else if (recentlyModified) {
2133      LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2134      return;
2135    }
2136    // ========== Cases where the region is not in hbase:meta =============
2137    else if (!inMeta && !inHdfs && !isDeployed) {
2138      // We shouldn't have record of this region at all then!
2139      assert false : "Entry for region with no data";
2140    } else if (!inMeta && !inHdfs && isDeployed) {
2141      errors.reportError(ERROR_CODE.NOT_IN_META_HDFS,
2142        "Region " + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but "
2143          + "deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2144      if (shouldFixAssignments()) {
2145        undeployRegions(hbi);
2146      }
2147
2148    } else if (!inMeta && inHdfs && !isDeployed) {
2149      if (hbi.isMerged()) {
2150        // This region has already been merged, the remaining hdfs file will be
2151        // cleaned by CatalogJanitor later
2152        hbi.setSkipChecks(true);
2153        LOG.info("Region " + descriptiveName
2154          + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2155        return;
2156      }
2157      errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region " + descriptiveName
2158        + " on HDFS, but not listed in hbase:meta " + "or deployed on any region server");
2159      // restore region consistency of an adopted orphan
2160      if (shouldFixMeta()) {
2161        if (!hbi.isHdfsRegioninfoPresent()) {
2162          LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2163            + " in table integrity repair phase if -fixHdfsOrphans was" + " used.");
2164          return;
2165        }
2166
2167        RegionInfo hri = hbi.getHdfsHRI();
2168        HbckTableInfo tableInfo = tablesInfo.get(hri.getTable());
2169
2170        for (RegionInfo region : tableInfo.getRegionsFromMeta(this.regionInfoMap)) {
2171          if (
2172            Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2173              && (region.getEndKey().length == 0
2174                || Bytes.compareTo(region.getEndKey(), hri.getEndKey()) >= 0)
2175              && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0
2176          ) {
2177            if (region.isSplit() || region.isOffline()) continue;
2178            Path regionDir = hbi.getHdfsRegionDir();
2179            FileSystem fs = regionDir.getFileSystem(getConf());
2180            List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2181            for (Path familyDir : familyDirs) {
2182              List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2183              for (Path referenceFilePath : referenceFilePaths) {
2184                Path parentRegionDir =
2185                  StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2186                if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2187                  LOG.warn(hri + " start and stop keys are in the range of " + region
2188                    + ". The region might not be cleaned up from hdfs when region " + region
2189                    + " split failed. Hence deleting from hdfs.");
2190                  HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs, regionDir.getParent(),
2191                    hri);
2192                  return;
2193                }
2194              }
2195            }
2196          }
2197        }
2198        LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2199        int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
2200        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2201          admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet(),
2202          numReplicas);
2203
2204        tryAssignmentRepair(hbi, "Trying to reassign region...");
2205      }
2206
2207    } else if (!inMeta && inHdfs && isDeployed) {
2208      errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2209        + " not in META, but deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2210      debugLsr(hbi.getHdfsRegionDir());
2211      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2212        // for replicas, this means that we should undeploy the region (we would have
2213        // gone over the primaries and fixed meta holes in first phase under
2214        // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2215        // this stage unless unwanted replica)
2216        if (shouldFixAssignments()) {
2217          undeployRegionsForHbi(hbi);
2218        }
2219      }
2220      if (shouldFixMeta() && hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2221        if (!hbi.isHdfsRegioninfoPresent()) {
2222          LOG.error("This should have been repaired in table integrity repair phase");
2223          return;
2224        }
2225
2226        LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2227        int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
2228        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2229          admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet(),
2230          numReplicas);
2231        tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2232      }
2233
2234      // ========== Cases where the region is in hbase:meta =============
2235    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2236      // check whether this is an actual error, or just transient state where parent
2237      // is not cleaned
2238      if (hbi.getMetaEntry().splitA != null && hbi.getMetaEntry().splitB != null) {
2239        // check that split daughters are there
2240        HbckRegionInfo infoA = this.regionInfoMap.get(hbi.getMetaEntry().splitA.getEncodedName());
2241        HbckRegionInfo infoB = this.regionInfoMap.get(hbi.getMetaEntry().splitB.getEncodedName());
2242        if (infoA != null && infoB != null) {
2243          // we already processed or will process daughters. Move on, nothing to see here.
2244          hbi.setSkipChecks(true);
2245          return;
2246        }
2247      }
2248
2249      // For Replica region, we need to do a similar check. If replica is not split successfully,
2250      // error is going to be reported against primary daughter region.
2251      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2252        LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, "
2253          + "and not deployed on any region server. This may be transient.");
2254        hbi.setSkipChecks(true);
2255        return;
2256      }
2257
2258      errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT,
2259        "Region " + descriptiveName + " is a split parent in META, in HDFS, "
2260          + "and not deployed on any region server. This could be transient, "
2261          + "consider to run the catalog janitor first!");
2262      if (shouldFixSplitParents()) {
2263        setShouldRerun();
2264        resetSplitParent(hbi);
2265      }
2266    } else if (inMeta && !inHdfs && !isDeployed) {
2267      errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region " + descriptiveName
2268        + " found in META, but not in HDFS " + "or deployed on any region server.");
2269      if (shouldFixMeta()) {
2270        deleteMetaRegion(hbi);
2271      }
2272    } else if (inMeta && !inHdfs && isDeployed) {
2273      errors.reportError(ERROR_CODE.NOT_IN_HDFS,
2274        "Region " + descriptiveName + " found in META, but not in HDFS, " + "and deployed on "
2275          + Joiner.on(", ").join(hbi.getDeployedOn()));
2276      // We treat HDFS as ground truth. Any information in meta is transient
2277      // and equivalent data can be regenerated. So, lets unassign and remove
2278      // these problems from META.
2279      if (shouldFixAssignments()) {
2280        errors.print("Trying to fix unassigned region...");
2281        undeployRegions(hbi);
2282      }
2283      if (shouldFixMeta()) {
2284        // wait for it to complete
2285        deleteMetaRegion(hbi);
2286      }
2287    } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2288      errors.reportError(ERROR_CODE.NOT_DEPLOYED,
2289        "Region " + descriptiveName + " not deployed on any region server.");
2290      tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2291    } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2292      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2293        "Region " + descriptiveName + " should not be deployed according "
2294          + "to META, but is deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2295      if (shouldFixAssignments()) {
2296        errors.print("Trying to close the region " + descriptiveName);
2297        setShouldRerun();
2298        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
2299          hbi.getDeployedOn());
2300      }
2301    } else if (inMeta && inHdfs && isMultiplyDeployed) {
2302      errors.reportError(ERROR_CODE.MULTI_DEPLOYED,
2303        "Region " + descriptiveName + " is listed in hbase:meta on region server "
2304          + hbi.getMetaEntry().regionServer + " but is multiply assigned to region servers "
2305          + Joiner.on(", ").join(hbi.getDeployedOn()));
2306      // If we are trying to fix the errors
2307      if (shouldFixAssignments()) {
2308        errors.print("Trying to fix assignment error...");
2309        setShouldRerun();
2310        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
2311          hbi.getDeployedOn());
2312      }
2313    } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2314      errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META,
2315        "Region " + descriptiveName + " listed in hbase:meta on region server "
2316          + hbi.getMetaEntry().regionServer + " but found on region server "
2317          + hbi.getDeployedOn().get(0));
2318      // If we are trying to fix the errors
2319      if (shouldFixAssignments()) {
2320        errors.print("Trying to fix assignment error...");
2321        setShouldRerun();
2322        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
2323          hbi.getDeployedOn());
2324        HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2325      }
2326    } else {
2327      errors.reportError(ERROR_CODE.UNKNOWN,
2328        "Region " + descriptiveName + " is in an unforeseen state:" + " inMeta=" + inMeta
2329          + " inHdfs=" + inHdfs + " isDeployed=" + isDeployed + " isMultiplyDeployed="
2330          + isMultiplyDeployed + " deploymentMatchesMeta=" + deploymentMatchesMeta
2331          + " shouldBeDeployed=" + shouldBeDeployed);
2332    }
2333  }
2334
2335  /**
2336   * Checks tables integrity. Goes over all regions and scans the tables. Collects all the pieces
2337   * for each table and checks if there are missing, repeated or overlapping ones.
2338   */
2339  SortedMap<TableName, HbckTableInfo> checkIntegrity() throws IOException {
2340    tablesInfo = new TreeMap<>();
2341    LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2342    for (HbckRegionInfo hbi : regionInfoMap.values()) {
2343      // Check only valid, working regions
2344      if (hbi.getMetaEntry() == null) {
2345        // this assumes that consistency check has run loadMetaEntry
2346        Path p = hbi.getHdfsRegionDir();
2347        if (p == null) {
2348          errors.report("No regioninfo in Meta or HDFS. " + hbi);
2349        }
2350
2351        // TODO test.
2352        continue;
2353      }
2354      if (hbi.getMetaEntry().regionServer == null) {
2355        errors.detail("Skipping region because no region server: " + hbi);
2356        continue;
2357      }
2358      if (hbi.getMetaEntry().getRegionInfo().isOffline()) {
2359        errors.detail("Skipping region because it is offline: " + hbi);
2360        continue;
2361      }
2362      if (hbi.containsOnlyHdfsEdits()) {
2363        errors.detail("Skipping region because it only contains edits" + hbi);
2364        continue;
2365      }
2366
2367      // Missing regionDir or over-deployment is checked elsewhere. Include
2368      // these cases in modTInfo, so we can evaluate those regions as part of
2369      // the region chain in META
2370      // if (hbi.foundRegionDir == null) continue;
2371      // if (hbi.deployedOn.size() != 1) continue;
2372      if (hbi.getDeployedOn().isEmpty()) {
2373        continue;
2374      }
2375
2376      // We should be safe here
2377      TableName tableName = hbi.getMetaEntry().getRegionInfo().getTable();
2378      HbckTableInfo modTInfo = tablesInfo.get(tableName);
2379      if (modTInfo == null) {
2380        modTInfo = new HbckTableInfo(tableName, this);
2381      }
2382      for (ServerName server : hbi.getDeployedOn()) {
2383        modTInfo.addServer(server);
2384      }
2385
2386      if (!hbi.isSkipChecks()) {
2387        modTInfo.addRegionInfo(hbi);
2388      }
2389
2390      tablesInfo.put(tableName, modTInfo);
2391    }
2392
2393    loadTableInfosForTablesWithNoRegion();
2394
2395    logParallelMerge();
2396    for (HbckTableInfo tInfo : tablesInfo.values()) {
2397      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2398      if (!tInfo.checkRegionChain(handler)) {
2399        errors.report("Found inconsistency in table " + tInfo.getName());
2400      }
2401    }
2402    return tablesInfo;
2403  }
2404
2405  /**
2406   * Loads table info's for tables that may not have been included, since there are no regions
2407   * reported for the table, but table dir is there in hdfs
2408   */
2409  private void loadTableInfosForTablesWithNoRegion() throws IOException {
2410    Map<String, TableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2411    for (TableDescriptor htd : allTables.values()) {
2412      if (checkMetaOnly && !htd.isMetaTable()) {
2413        continue;
2414      }
2415
2416      TableName tableName = htd.getTableName();
2417      if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2418        HbckTableInfo tableInfo = new HbckTableInfo(tableName, this);
2419        tableInfo.htds.add(htd);
2420        tablesInfo.put(htd.getTableName(), tableInfo);
2421      }
2422    }
2423  }
2424
2425  /**
2426   * Merge hdfs data by moving from contained HbckRegionInfo into targetRegionDir.
2427   * @return number of file move fixes done to merge regions.
2428   */
2429  public int mergeRegionDirs(Path targetRegionDir, HbckRegionInfo contained) throws IOException {
2430    int fileMoves = 0;
2431    String thread = Thread.currentThread().getName();
2432    LOG.debug("[" + thread + "] Contained region dir after close and pause");
2433    debugLsr(contained.getHdfsRegionDir());
2434
2435    // rename the contained into the container.
2436    FileSystem fs = targetRegionDir.getFileSystem(getConf());
2437    FileStatus[] dirs = null;
2438    try {
2439      dirs = fs.listStatus(contained.getHdfsRegionDir());
2440    } catch (FileNotFoundException fnfe) {
2441      // region we are attempting to merge in is not present! Since this is a merge, there is
2442      // no harm skipping this region if it does not exist.
2443      if (!fs.exists(contained.getHdfsRegionDir())) {
2444        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2445          + " is missing. Assuming already sidelined or moved.");
2446      } else {
2447        sidelineRegionDir(fs, contained);
2448      }
2449      return fileMoves;
2450    }
2451
2452    if (dirs == null) {
2453      if (!fs.exists(contained.getHdfsRegionDir())) {
2454        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2455          + " already sidelined.");
2456      } else {
2457        sidelineRegionDir(fs, contained);
2458      }
2459      return fileMoves;
2460    }
2461
2462    for (FileStatus cf : dirs) {
2463      Path src = cf.getPath();
2464      Path dst = new Path(targetRegionDir, src.getName());
2465
2466      if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2467        // do not copy the old .regioninfo file.
2468        continue;
2469      }
2470
2471      if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2472        // do not copy the .oldlogs files
2473        continue;
2474      }
2475
2476      LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2477      // FileSystem.rename is inconsistent with directories -- if the
2478      // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2479      // it moves the src into the dst dir resulting in (foo/a/b). If
2480      // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2481      for (FileStatus hfile : fs.listStatus(src)) {
2482        boolean success = fs.rename(hfile.getPath(), dst);
2483        if (success) {
2484          fileMoves++;
2485        }
2486      }
2487      LOG.debug("[" + thread + "] Sideline directory contents:");
2488      debugLsr(targetRegionDir);
2489    }
2490
2491    // if all success.
2492    sidelineRegionDir(fs, contained);
2493    LOG.info("[" + thread + "] Sidelined region dir " + contained.getHdfsRegionDir() + " into "
2494      + getSidelineDir());
2495    debugLsr(contained.getHdfsRegionDir());
2496
2497    return fileMoves;
2498  }
2499
2500  static class WorkItemOverlapMerge implements Callable<Void> {
2501    private TableIntegrityErrorHandler handler;
2502    Collection<HbckRegionInfo> overlapgroup;
2503
2504    WorkItemOverlapMerge(Collection<HbckRegionInfo> overlapgroup,
2505      TableIntegrityErrorHandler handler) {
2506      this.handler = handler;
2507      this.overlapgroup = overlapgroup;
2508    }
2509
2510    @Override
2511    public Void call() throws Exception {
2512      handler.handleOverlapGroup(overlapgroup);
2513      return null;
2514    }
2515  }
2516
2517  /**
2518   * Return a list of user-space table names whose metadata have not been modified in the last few
2519   * milliseconds specified by timelag if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER,
2520   * STARTCODE_QUALIFIER, SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
2521   * milliseconds specified by timelag, then the table is a candidate to be returned.
2522   * @return tables that have not been modified recently
2523   * @throws IOException if an error is encountered
2524   */
2525  TableDescriptor[] getTables(AtomicInteger numSkipped) {
2526    List<TableName> tableNames = new ArrayList<>();
2527    long now = EnvironmentEdgeManager.currentTime();
2528
2529    for (HbckRegionInfo hbi : regionInfoMap.values()) {
2530      HbckRegionInfo.MetaEntry info = hbi.getMetaEntry();
2531
2532      // if the start key is zero, then we have found the first region of a table.
2533      // pick only those tables that were not modified in the last few milliseconds.
2534      if (
2535        info != null && info.getRegionInfo().getStartKey().length == 0
2536          && !info.getRegionInfo().isMetaRegion()
2537      ) {
2538        if (info.modTime + timelag < now) {
2539          tableNames.add(info.getRegionInfo().getTable());
2540        } else {
2541          numSkipped.incrementAndGet(); // one more in-flux table
2542        }
2543      }
2544    }
2545    return getTableDescriptors(tableNames);
2546  }
2547
2548  TableDescriptor[] getTableDescriptors(List<TableName> tableNames) {
2549    LOG.info("getTableDescriptors == tableNames => " + tableNames);
2550    try (Connection conn = ConnectionFactory.createConnection(getConf());
2551      Admin admin = conn.getAdmin()) {
2552      List<TableDescriptor> tds = admin.listTableDescriptors(tableNames);
2553      return tds.toArray(new TableDescriptor[tds.size()]);
2554    } catch (IOException e) {
2555      LOG.debug("Exception getting table descriptors", e);
2556    }
2557    return new TableDescriptor[0];
2558  }
2559
2560  /**
2561   * Gets the entry in regionInfo corresponding to the the given encoded region name. If the region
2562   * has not been seen yet, a new entry is added and returned.
2563   */
2564  private synchronized HbckRegionInfo getOrCreateInfo(String name) {
2565    HbckRegionInfo hbi = regionInfoMap.get(name);
2566    if (hbi == null) {
2567      hbi = new HbckRegionInfo(null);
2568      regionInfoMap.put(name, hbi);
2569    }
2570    return hbi;
2571  }
2572
2573  private void checkAndFixReplication() throws ReplicationException, IOException {
2574    ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, connection, errors);
2575
2576    if (!checker.checkHasDataInQueues()) {
2577      return;
2578    }
2579
2580    checker.checkUnDeletedQueues();
2581
2582    if (checker.hasUnDeletedQueues() && this.fixReplication) {
2583      checker.fixUnDeletedQueues();
2584      setShouldRerun();
2585    }
2586  }
2587
2588  /**
2589   * Check values in regionInfo for hbase:meta Check if zero or more than one regions with
2590   * hbase:meta are found. If there are inconsistencies (i.e. zero or more than one regions pretend
2591   * to be holding the hbase:meta) try to fix that and report an error.
2592   * @throws IOException from HBaseFsckRepair functions
2593   */
2594  boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
2595    Map<Integer, HbckRegionInfo> metaRegions = new HashMap<>();
2596    for (HbckRegionInfo value : regionInfoMap.values()) {
2597      if (value.getMetaEntry() != null && value.getMetaEntry().getRegionInfo().isMetaRegion()) {
2598        metaRegions.put(value.getReplicaId(), value);
2599      }
2600    }
2601    int metaReplication = admin.getDescriptor(TableName.META_TABLE_NAME).getRegionReplication();
2602    boolean noProblem = true;
2603    // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
2604    // Check the deployed servers. It should be exactly one server for each replica.
2605    for (int i = 0; i < metaReplication; i++) {
2606      HbckRegionInfo metaHbckRegionInfo = metaRegions.remove(i);
2607      List<ServerName> servers = new ArrayList<>();
2608      if (metaHbckRegionInfo != null) {
2609        servers = metaHbckRegionInfo.getDeployedOn();
2610      }
2611      if (servers.size() != 1) {
2612        noProblem = false;
2613        if (servers.isEmpty()) {
2614          assignMetaReplica(i);
2615        } else if (servers.size() > 1) {
2616          errors.reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId "
2617            + metaHbckRegionInfo.getReplicaId() + " is found on more than one region.");
2618          if (shouldFixAssignments()) {
2619            errors.print("Trying to fix a problem with hbase:meta, replicaId "
2620              + metaHbckRegionInfo.getReplicaId() + "..");
2621            setShouldRerun();
2622            // try fix it (treat is a dupe assignment)
2623            HBaseFsckRepair.fixMultiAssignment(connection,
2624              metaHbckRegionInfo.getMetaEntry().getRegionInfo(), servers);
2625          }
2626        }
2627      }
2628    }
2629    // unassign whatever is remaining in metaRegions. They are excess replicas.
2630    for (Map.Entry<Integer, HbckRegionInfo> entry : metaRegions.entrySet()) {
2631      noProblem = false;
2632      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2633        "hbase:meta replicas are deployed in excess. Configured " + metaReplication + ", deployed "
2634          + metaRegions.size());
2635      if (shouldFixAssignments()) {
2636        errors.print(
2637          "Trying to undeploy excess replica, replicaId: " + entry.getKey() + " of hbase:meta..");
2638        setShouldRerun();
2639        unassignMetaReplica(entry.getValue());
2640      }
2641    }
2642    // if noProblem is false, rerun hbck with hopefully fixed META
2643    // if noProblem is true, no errors, so continue normally
2644    return noProblem;
2645  }
2646
2647  private void unassignMetaReplica(HbckRegionInfo hi)
2648    throws IOException, InterruptedException, KeeperException {
2649    undeployRegions(hi);
2650    ZKUtil.deleteNode(zkw,
2651      zkw.getZNodePaths().getZNodeForReplica(hi.getMetaEntry().getRegionInfo().getReplicaId()));
2652  }
2653
2654  private void assignMetaReplica(int replicaId)
2655    throws IOException, KeeperException, InterruptedException {
2656    errors.reportError(ERROR_CODE.NO_META_REGION,
2657      "hbase:meta, replicaId " + replicaId + " is not found on any region.");
2658    if (shouldFixAssignments()) {
2659      errors.print("Trying to fix a problem with hbase:meta..");
2660      setShouldRerun();
2661      // try to fix it (treat it as unassigned region)
2662      RegionInfo h = RegionReplicaUtil
2663        .getRegionInfoForReplica(RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId);
2664      HBaseFsckRepair.fixUnassigned(admin, h);
2665      HBaseFsckRepair.waitUntilAssigned(admin, h);
2666    }
2667  }
2668
2669  /**
2670   * Scan hbase:meta, adding all regions found to the regionInfo map.
2671   * @throws IOException if an error is encountered
2672   */
2673  boolean loadMetaEntries() throws IOException {
2674    ClientMetaTableAccessor.Visitor visitor = new ClientMetaTableAccessor.Visitor() {
2675      int countRecord = 1;
2676
2677      // comparator to sort KeyValues with latest modtime
2678      final Comparator<Cell> comp = new Comparator<Cell>() {
2679        @Override
2680        public int compare(Cell k1, Cell k2) {
2681          return Long.compare(k1.getTimestamp(), k2.getTimestamp());
2682        }
2683      };
2684
2685      @Override
2686      public boolean visit(Result result) throws IOException {
2687        try {
2688
2689          // record the latest modification of this META record
2690          long ts = Collections.max(result.listCells(), comp).getTimestamp();
2691          RegionLocations rl = CatalogFamilyFormat.getRegionLocations(result);
2692          if (rl == null) {
2693            emptyRegionInfoQualifiers.add(result);
2694            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2695              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
2696            return true;
2697          }
2698          ServerName sn = null;
2699          if (
2700            rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID) == null
2701              || rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegion() == null
2702          ) {
2703            emptyRegionInfoQualifiers.add(result);
2704            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2705              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
2706            return true;
2707          }
2708          RegionInfo hri = rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegion();
2709          if (!(isTableIncluded(hri.getTable()) || hri.isMetaRegion())) {
2710            return true;
2711          }
2712          PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(result);
2713          for (HRegionLocation h : rl.getRegionLocations()) {
2714            if (h == null || h.getRegion() == null) {
2715              continue;
2716            }
2717            sn = h.getServerName();
2718            hri = h.getRegion();
2719
2720            HbckRegionInfo.MetaEntry m = null;
2721            if (hri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2722              m = new HbckRegionInfo.MetaEntry(hri, sn, ts, daughters.getFirst(),
2723                daughters.getSecond());
2724            } else {
2725              m = new HbckRegionInfo.MetaEntry(hri, sn, ts, null, null);
2726            }
2727            HbckRegionInfo previous = regionInfoMap.get(hri.getEncodedName());
2728            if (previous == null) {
2729              regionInfoMap.put(hri.getEncodedName(), new HbckRegionInfo(m));
2730            } else if (previous.getMetaEntry() == null) {
2731              previous.setMetaEntry(m);
2732            } else {
2733              throw new IOException("Two entries in hbase:meta are same " + previous);
2734            }
2735          }
2736          List<RegionInfo> mergeParents = CatalogFamilyFormat.getMergeRegions(result.rawCells());
2737          if (mergeParents != null) {
2738            for (RegionInfo mergeRegion : mergeParents) {
2739              if (mergeRegion != null) {
2740                // This region is already being merged
2741                HbckRegionInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
2742                hbInfo.setMerged(true);
2743              }
2744            }
2745          }
2746
2747          // show proof of progress to the user, once for every 100 records.
2748          if (countRecord % 100 == 0) {
2749            errors.progress();
2750          }
2751          countRecord++;
2752          return true;
2753        } catch (RuntimeException e) {
2754          LOG.error("Result=" + result);
2755          throw e;
2756        }
2757      }
2758    };
2759    if (!checkMetaOnly) {
2760      // Scan hbase:meta to pick up user regions
2761      MetaTableAccessor.fullScanRegions(connection, visitor);
2762    }
2763
2764    errors.print("");
2765    return true;
2766  }
2767
2768  /**
2769   * Prints summary of all tables found on the system.
2770   */
2771  private void printTableSummary(SortedMap<TableName, HbckTableInfo> tablesInfo) {
2772    StringBuilder sb = new StringBuilder();
2773    int numOfSkippedRegions;
2774    errors.print("Summary:");
2775    for (HbckTableInfo tInfo : tablesInfo.values()) {
2776      numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName()))
2777        ? skippedRegions.get(tInfo.getName()).size()
2778        : 0;
2779
2780      if (errors.tableHasErrors(tInfo)) {
2781        errors.print("Table " + tInfo.getName() + " is inconsistent.");
2782      } else if (numOfSkippedRegions > 0) {
2783        errors.print("Table " + tInfo.getName() + " is okay (with " + numOfSkippedRegions
2784          + " skipped regions).");
2785      } else {
2786        errors.print("Table " + tInfo.getName() + " is okay.");
2787      }
2788      errors.print("    Number of regions: " + tInfo.getNumRegions());
2789      if (numOfSkippedRegions > 0) {
2790        Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
2791        System.out.println("    Number of skipped regions: " + numOfSkippedRegions);
2792        System.out.println("      List of skipped regions:");
2793        for (String sr : skippedRegionStrings) {
2794          System.out.println("        " + sr);
2795        }
2796      }
2797      sb.setLength(0); // clear out existing buffer, if any.
2798      sb.append("    Deployed on: ");
2799      for (ServerName server : tInfo.deployedOn) {
2800        sb.append(" " + server.toString());
2801      }
2802      errors.print(sb.toString());
2803    }
2804  }
2805
2806  static HbckErrorReporter getErrorReporter(final Configuration conf)
2807    throws ClassNotFoundException {
2808    Class<? extends HbckErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter",
2809      PrintingErrorReporter.class, HbckErrorReporter.class);
2810    return ReflectionUtils.newInstance(reporter, conf);
2811  }
2812
2813  static class PrintingErrorReporter implements HbckErrorReporter {
2814    public int errorCount = 0;
2815    private int showProgress;
2816    // How frequently calls to progress() will create output
2817    private static final int progressThreshold = 100;
2818
2819    Set<HbckTableInfo> errorTables = new HashSet<>();
2820
2821    // for use by unit tests to verify which errors were discovered
2822    private ArrayList<ERROR_CODE> errorList = new ArrayList<>();
2823
2824    @Override
2825    public void clear() {
2826      errorTables.clear();
2827      errorList.clear();
2828      errorCount = 0;
2829    }
2830
2831    @Override
2832    public synchronized void reportError(ERROR_CODE errorCode, String message) {
2833      if (errorCode == ERROR_CODE.WRONG_USAGE) {
2834        System.err.println(message);
2835        return;
2836      }
2837
2838      errorList.add(errorCode);
2839      if (!summary) {
2840        System.out.println("ERROR: " + message);
2841      }
2842      errorCount++;
2843      showProgress = 0;
2844    }
2845
2846    @Override
2847    public synchronized void reportError(ERROR_CODE errorCode, String message,
2848      HbckTableInfo table) {
2849      errorTables.add(table);
2850      reportError(errorCode, message);
2851    }
2852
2853    @Override
2854    public synchronized void reportError(ERROR_CODE errorCode, String message, HbckTableInfo table,
2855      HbckRegionInfo info) {
2856      errorTables.add(table);
2857      String reference = "(region " + info.getRegionNameAsString() + ")";
2858      reportError(errorCode, reference + " " + message);
2859    }
2860
2861    @Override
2862    public synchronized void reportError(ERROR_CODE errorCode, String message, HbckTableInfo table,
2863      HbckRegionInfo info1, HbckRegionInfo info2) {
2864      errorTables.add(table);
2865      String reference =
2866        "(regions " + info1.getRegionNameAsString() + " and " + info2.getRegionNameAsString() + ")";
2867      reportError(errorCode, reference + " " + message);
2868    }
2869
2870    @Override
2871    public synchronized void reportError(String message) {
2872      reportError(ERROR_CODE.UNKNOWN, message);
2873    }
2874
2875    /**
2876     * Report error information, but do not increment the error count. Intended for cases where the
2877     * actual error would have been reported previously.
2878     */
2879    @Override
2880    public synchronized void report(String message) {
2881      if (!summary) {
2882        System.out.println("ERROR: " + message);
2883      }
2884      showProgress = 0;
2885    }
2886
2887    @Override
2888    public synchronized int summarize() {
2889      System.out.println(Integer.toString(errorCount) + " inconsistencies detected.");
2890      if (errorCount == 0) {
2891        System.out.println("Status: OK");
2892        return 0;
2893      } else {
2894        System.out.println("Status: INCONSISTENT");
2895        return -1;
2896      }
2897    }
2898
2899    @Override
2900    public ArrayList<ERROR_CODE> getErrorList() {
2901      return errorList;
2902    }
2903
2904    @Override
2905    public synchronized void print(String message) {
2906      if (!summary) {
2907        System.out.println(message);
2908      }
2909    }
2910
2911    @Override
2912    public boolean tableHasErrors(HbckTableInfo table) {
2913      return errorTables.contains(table);
2914    }
2915
2916    @Override
2917    public void resetErrors() {
2918      errorCount = 0;
2919    }
2920
2921    @Override
2922    public synchronized void detail(String message) {
2923      if (details) {
2924        System.out.println(message);
2925      }
2926      showProgress = 0;
2927    }
2928
2929    @Override
2930    public synchronized void progress() {
2931      if (showProgress++ == progressThreshold) {
2932        if (!summary) {
2933          System.out.print(".");
2934        }
2935        showProgress = 0;
2936      }
2937    }
2938  }
2939
2940  /**
2941   * Contact a region server and get all information from it
2942   */
2943  static class WorkItemRegion implements Callable<Void> {
2944    private final HBaseFsck hbck;
2945    private final ServerName rsinfo;
2946    private final HbckErrorReporter errors;
2947    private final Connection connection;
2948
2949    WorkItemRegion(HBaseFsck hbck, ServerName info, HbckErrorReporter errors,
2950      Connection connection) {
2951      this.hbck = hbck;
2952      this.rsinfo = info;
2953      this.errors = errors;
2954      this.connection = connection;
2955    }
2956
2957    @Override
2958    public synchronized Void call() throws IOException {
2959      errors.progress();
2960      try {
2961        // list all online regions from this region server
2962        List<RegionInfo> regions = connection.getAdmin().getRegions(rsinfo);
2963        regions = filterRegions(regions);
2964
2965        if (details) {
2966          errors.detail(
2967            "RegionServer: " + rsinfo.getServerName() + " number of regions: " + regions.size());
2968          for (RegionInfo rinfo : regions) {
2969            errors.detail("  " + rinfo.getRegionNameAsString() + " id: " + rinfo.getRegionId()
2970              + " encoded_name: " + rinfo.getEncodedName() + " start: "
2971              + Bytes.toStringBinary(rinfo.getStartKey()) + " end: "
2972              + Bytes.toStringBinary(rinfo.getEndKey()));
2973          }
2974        }
2975
2976        // check to see if the existence of this region matches the region in META
2977        for (RegionInfo r : regions) {
2978          HbckRegionInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
2979          hbi.addServer(r, rsinfo);
2980        }
2981      } catch (IOException e) { // unable to connect to the region server.
2982        errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE,
2983          "RegionServer: " + rsinfo.getServerName() + " Unable to fetch region information. " + e);
2984        throw e;
2985      }
2986      return null;
2987    }
2988
2989    private List<RegionInfo> filterRegions(List<RegionInfo> regions) {
2990      List<RegionInfo> ret = Lists.newArrayList();
2991      for (RegionInfo hri : regions) {
2992        if (hri.isMetaRegion() || (!hbck.checkMetaOnly && hbck.isTableIncluded(hri.getTable()))) {
2993          ret.add(hri);
2994        }
2995      }
2996      return ret;
2997    }
2998  }
2999
3000  /**
3001   * Contact hdfs and get all information about specified table directory into regioninfo list.
3002   */
3003  class WorkItemHdfsDir implements Callable<Void> {
3004    private FileStatus tableDir;
3005    private HbckErrorReporter errors;
3006    private FileSystem fs;
3007
3008    WorkItemHdfsDir(FileSystem fs, HbckErrorReporter errors, FileStatus status) {
3009      this.fs = fs;
3010      this.tableDir = status;
3011      this.errors = errors;
3012    }
3013
3014    @Override
3015    public synchronized Void call() throws InterruptedException, ExecutionException {
3016      final Vector<Exception> exceptions = new Vector<>();
3017
3018      try {
3019        final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
3020        final List<Future<?>> futures = new ArrayList<>(regionDirs.length);
3021
3022        for (final FileStatus regionDir : regionDirs) {
3023          errors.progress();
3024          final String encodedName = regionDir.getPath().getName();
3025          // ignore directories that aren't hexadecimal
3026          if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
3027            continue;
3028          }
3029
3030          if (!exceptions.isEmpty()) {
3031            break;
3032          }
3033
3034          futures.add(executor.submit(new Runnable() {
3035            @Override
3036            public void run() {
3037              try {
3038                LOG.debug("Loading region info from hdfs:" + regionDir.getPath());
3039
3040                Path regioninfoFile =
3041                  new Path(regionDir.getPath(), HRegionFileSystem.REGION_INFO_FILE);
3042                boolean regioninfoFileExists = fs.exists(regioninfoFile);
3043
3044                if (!regioninfoFileExists) {
3045                  // As tables become larger it is more and more likely that by the time you
3046                  // reach a given region that it will be gone due to region splits/merges.
3047                  if (!fs.exists(regionDir.getPath())) {
3048                    LOG.warn("By the time we tried to process this region dir it was already gone: "
3049                      + regionDir.getPath());
3050                    return;
3051                  }
3052                }
3053
3054                HbckRegionInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
3055                HbckRegionInfo.HdfsEntry he = new HbckRegionInfo.HdfsEntry();
3056                synchronized (hbi) {
3057                  if (hbi.getHdfsRegionDir() != null) {
3058                    errors
3059                      .print("Directory " + encodedName + " duplicate??" + hbi.getHdfsRegionDir());
3060                  }
3061
3062                  he.regionDir = regionDir.getPath();
3063                  he.regionDirModTime = regionDir.getModificationTime();
3064                  he.hdfsRegioninfoFilePresent = regioninfoFileExists;
3065                  // we add to orphan list when we attempt to read .regioninfo
3066
3067                  // Set a flag if this region contains only edits
3068                  // This is special case if a region is left after split
3069                  he.hdfsOnlyEdits = true;
3070                  FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
3071                  Path ePath = WALSplitUtil.getRegionDirRecoveredEditsDir(regionDir.getPath());
3072                  for (FileStatus subDir : subDirs) {
3073                    errors.progress();
3074                    String sdName = subDir.getPath().getName();
3075                    if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
3076                      he.hdfsOnlyEdits = false;
3077                      break;
3078                    }
3079                  }
3080                  hbi.setHdfsEntry(he);
3081                }
3082              } catch (Exception e) {
3083                LOG.error("Could not load region dir", e);
3084                exceptions.add(e);
3085              }
3086            }
3087          }));
3088        }
3089
3090        // Ensure all pending tasks are complete (or that we run into an exception)
3091        for (Future<?> f : futures) {
3092          if (!exceptions.isEmpty()) {
3093            break;
3094          }
3095          try {
3096            f.get();
3097          } catch (ExecutionException e) {
3098            LOG.error("Unexpected exec exception!  Should've been caught already.  (Bug?)", e);
3099            // Shouldn't happen, we already logged/caught any exceptions in the Runnable
3100          }
3101        }
3102      } catch (IOException e) {
3103        LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
3104        exceptions.add(e);
3105      } finally {
3106        if (!exceptions.isEmpty()) {
3107          errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
3108            + tableDir.getPath().getName() + " Unable to fetch all HDFS region information. ");
3109          // Just throw the first exception as an indication something bad happened
3110          // Don't need to propagate all the exceptions, we already logged them all anyway
3111          throw new ExecutionException("First exception in WorkItemHdfsDir",
3112            exceptions.firstElement());
3113        }
3114      }
3115      return null;
3116    }
3117  }
3118
3119  /**
3120   * Contact hdfs and get all information about specified table directory into regioninfo list.
3121   */
3122  static class WorkItemHdfsRegionInfo implements Callable<Void> {
3123    private HbckRegionInfo hbi;
3124    private HBaseFsck hbck;
3125    private HbckErrorReporter errors;
3126
3127    WorkItemHdfsRegionInfo(HbckRegionInfo hbi, HBaseFsck hbck, HbckErrorReporter errors) {
3128      this.hbi = hbi;
3129      this.hbck = hbck;
3130      this.errors = errors;
3131    }
3132
3133    @Override
3134    public synchronized Void call() throws IOException {
3135      // only load entries that haven't been loaded yet.
3136      if (hbi.getHdfsHRI() == null) {
3137        try {
3138          errors.progress();
3139          hbi.loadHdfsRegioninfo(hbck.getConf());
3140        } catch (IOException ioe) {
3141          String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3142            + hbi.getTableName() + " in hdfs dir " + hbi.getHdfsRegionDir()
3143            + "!  It may be an invalid format or version file.  Treating as "
3144            + "an orphaned regiondir.";
3145          errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3146          try {
3147            hbck.debugLsr(hbi.getHdfsRegionDir());
3148          } catch (IOException ioe2) {
3149            LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3150            throw ioe2;
3151          }
3152          hbck.orphanHdfsDirs.add(hbi);
3153          throw ioe;
3154        }
3155      }
3156      return null;
3157    }
3158  }
3159
3160  /**
3161   * Display the full report from fsck. This displays all live and dead region servers, and all
3162   * known regions.
3163   */
3164  public static void setDisplayFullReport() {
3165    details = true;
3166  }
3167
3168  public static boolean shouldDisplayFullReport() {
3169    return details;
3170  }
3171
3172  /**
3173   * Set exclusive mode.
3174   */
3175  public static void setForceExclusive() {
3176    forceExclusive = true;
3177  }
3178
3179  /**
3180   * Only one instance of hbck can modify HBase at a time.
3181   */
3182  public boolean isExclusive() {
3183    return fixAny || forceExclusive;
3184  }
3185
3186  /**
3187   * Set summary mode. Print only summary of the tables and status (OK or INCONSISTENT)
3188   */
3189  static void setSummary() {
3190    summary = true;
3191  }
3192
3193  /**
3194   * Set hbase:meta check mode. Print only info about hbase:meta table deployment/state
3195   */
3196  void setCheckMetaOnly() {
3197    checkMetaOnly = true;
3198  }
3199
3200  /**
3201   * Set region boundaries check mode.
3202   */
3203  void setRegionBoundariesCheck() {
3204    checkRegionBoundaries = true;
3205  }
3206
3207  /**
3208   * Set replication fix mode.
3209   */
3210  public void setFixReplication(boolean shouldFix) {
3211    fixReplication = shouldFix;
3212    fixAny |= shouldFix;
3213  }
3214
3215  public void setCleanReplicationBarrier(boolean shouldClean) {
3216    cleanReplicationBarrier = shouldClean;
3217  }
3218
3219  /**
3220   * Check if we should rerun fsck again. This checks if we've tried to fix something and we should
3221   * rerun fsck tool again. Display the full report from fsck. This displays all live and dead
3222   * region servers, and all known regions.
3223   */
3224  void setShouldRerun() {
3225    rerun = true;
3226  }
3227
3228  public boolean shouldRerun() {
3229    return rerun;
3230  }
3231
3232  /**
3233   * Fix inconsistencies found by fsck. This should try to fix errors (if any) found by fsck
3234   * utility.
3235   */
3236  public void setFixAssignments(boolean shouldFix) {
3237    fixAssignments = shouldFix;
3238    fixAny |= shouldFix;
3239  }
3240
3241  boolean shouldFixAssignments() {
3242    return fixAssignments;
3243  }
3244
3245  public void setFixMeta(boolean shouldFix) {
3246    fixMeta = shouldFix;
3247    fixAny |= shouldFix;
3248  }
3249
3250  boolean shouldFixMeta() {
3251    return fixMeta;
3252  }
3253
3254  public void setFixEmptyMetaCells(boolean shouldFix) {
3255    fixEmptyMetaCells = shouldFix;
3256    fixAny |= shouldFix;
3257  }
3258
3259  boolean shouldFixEmptyMetaCells() {
3260    return fixEmptyMetaCells;
3261  }
3262
3263  public void setCheckHdfs(boolean checking) {
3264    checkHdfs = checking;
3265  }
3266
3267  boolean shouldCheckHdfs() {
3268    return checkHdfs;
3269  }
3270
3271  public void setFixHdfsHoles(boolean shouldFix) {
3272    fixHdfsHoles = shouldFix;
3273    fixAny |= shouldFix;
3274  }
3275
3276  boolean shouldFixHdfsHoles() {
3277    return fixHdfsHoles;
3278  }
3279
3280  public void setFixTableOrphans(boolean shouldFix) {
3281    fixTableOrphans = shouldFix;
3282    fixAny |= shouldFix;
3283  }
3284
3285  boolean shouldFixTableOrphans() {
3286    return fixTableOrphans;
3287  }
3288
3289  public void setFixHdfsOverlaps(boolean shouldFix) {
3290    fixHdfsOverlaps = shouldFix;
3291    fixAny |= shouldFix;
3292  }
3293
3294  boolean shouldFixHdfsOverlaps() {
3295    return fixHdfsOverlaps;
3296  }
3297
3298  public void setFixHdfsOrphans(boolean shouldFix) {
3299    fixHdfsOrphans = shouldFix;
3300    fixAny |= shouldFix;
3301  }
3302
3303  boolean shouldFixHdfsOrphans() {
3304    return fixHdfsOrphans;
3305  }
3306
3307  public void setFixVersionFile(boolean shouldFix) {
3308    fixVersionFile = shouldFix;
3309    fixAny |= shouldFix;
3310  }
3311
3312  public boolean shouldFixVersionFile() {
3313    return fixVersionFile;
3314  }
3315
3316  public void setSidelineBigOverlaps(boolean sbo) {
3317    this.sidelineBigOverlaps = sbo;
3318  }
3319
3320  public boolean shouldSidelineBigOverlaps() {
3321    return sidelineBigOverlaps;
3322  }
3323
3324  public void setFixSplitParents(boolean shouldFix) {
3325    fixSplitParents = shouldFix;
3326    fixAny |= shouldFix;
3327  }
3328
3329  public void setRemoveParents(boolean shouldFix) {
3330    removeParents = shouldFix;
3331    fixAny |= shouldFix;
3332  }
3333
3334  boolean shouldFixSplitParents() {
3335    return fixSplitParents;
3336  }
3337
3338  boolean shouldRemoveParents() {
3339    return removeParents;
3340  }
3341
3342  public void setFixReferenceFiles(boolean shouldFix) {
3343    fixReferenceFiles = shouldFix;
3344    fixAny |= shouldFix;
3345  }
3346
3347  boolean shouldFixReferenceFiles() {
3348    return fixReferenceFiles;
3349  }
3350
3351  public void setFixHFileLinks(boolean shouldFix) {
3352    fixHFileLinks = shouldFix;
3353    fixAny |= shouldFix;
3354  }
3355
3356  boolean shouldFixHFileLinks() {
3357    return fixHFileLinks;
3358  }
3359
3360  public boolean shouldIgnorePreCheckPermission() {
3361    return !fixAny || ignorePreCheckPermission;
3362  }
3363
3364  public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3365    this.ignorePreCheckPermission = ignorePreCheckPermission;
3366  }
3367
3368  /**
3369   * @param mm maximum number of regions to merge into a single region.
3370   */
3371  public void setMaxMerge(int mm) {
3372    this.maxMerge = mm;
3373  }
3374
3375  public int getMaxMerge() {
3376    return maxMerge;
3377  }
3378
3379  public void setMaxOverlapsToSideline(int mo) {
3380    this.maxOverlapsToSideline = mo;
3381  }
3382
3383  public int getMaxOverlapsToSideline() {
3384    return maxOverlapsToSideline;
3385  }
3386
3387  /**
3388   * Only check/fix tables specified by the list, Empty list means all tables are included.
3389   */
3390  boolean isTableIncluded(TableName table) {
3391    return (tablesIncluded.isEmpty()) || tablesIncluded.contains(table);
3392  }
3393
3394  public void includeTable(TableName table) {
3395    tablesIncluded.add(table);
3396  }
3397
3398  Set<TableName> getIncludedTables() {
3399    return new HashSet<>(tablesIncluded);
3400  }
3401
3402  /**
3403   * We are interested in only those tables that have not changed their state in hbase:meta during
3404   * the last few seconds specified by hbase.admin.fsck.timelag
3405   * @param seconds - the time in seconds
3406   */
3407  public void setTimeLag(long seconds) {
3408    timelag = seconds * 1000; // convert to milliseconds
3409  }
3410
3411  /**
3412   * @param sidelineDir - HDFS path to sideline data
3413   */
3414  public void setSidelineDir(String sidelineDir) {
3415    this.sidelineDir = new Path(sidelineDir);
3416  }
3417
3418  protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
3419    throws IOException {
3420    return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
3421  }
3422
3423  public HFileCorruptionChecker getHFilecorruptionChecker() {
3424    return hfcc;
3425  }
3426
3427  public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
3428    this.hfcc = hfcc;
3429  }
3430
3431  public void setRetCode(int code) {
3432    this.retcode = code;
3433  }
3434
3435  public int getRetCode() {
3436    return retcode;
3437  }
3438
3439  protected HBaseFsck printUsageAndExit() {
3440    StringWriter sw = new StringWriter(2048);
3441    PrintWriter out = new PrintWriter(sw);
3442    out.println("");
3443    out.println("-----------------------------------------------------------------------");
3444    out.println("NOTE: As of HBase version 2.0, the hbck tool is significantly changed.");
3445    out.println("In general, all Read-Only options are supported and can be be used");
3446    out.println("safely. Most -fix/ -repair options are NOT supported. Please see usage");
3447    out.println("below for details on which options are not supported.");
3448    out.println("-----------------------------------------------------------------------");
3449    out.println("");
3450    out.println("Usage: fsck [opts] {only tables}");
3451    out.println(" where [opts] are:");
3452    out.println("   -help Display help options (this)");
3453    out.println("   -details Display full report of all regions.");
3454    out.println("   -timelag <timeInSeconds>  Process only regions that "
3455      + " have not experienced any metadata updates in the last " + " <timeInSeconds> seconds.");
3456    out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds"
3457      + " before checking if the fix worked if run with -fix");
3458    out.println("   -summary Print only summary of the tables and status.");
3459    out.println("   -metaonly Only check the state of the hbase:meta table.");
3460    out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
3461    out.println(
3462      "   -boundaries Verify that regions boundaries are the same between META and store files.");
3463    out.println("   -exclusive Abort if another hbck is exclusive or fixing.");
3464
3465    out.println("");
3466    out.println("  Datafile Repair options: (expert features, use with caution!)");
3467    out.println(
3468      "   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
3469    out.println(
3470      "   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
3471
3472    out.println("");
3473    out.println(" Replication options");
3474    out.println("   -fixReplication   Deletes replication queues for removed peers");
3475
3476    out.println("");
3477    out.println(
3478      "  Metadata Repair options supported as of version 2.0: (expert features, use with caution!)");
3479    out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
3480    out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
3481    out.println("   -fixHFileLinks  Try to offline lingering HFileLinks");
3482    out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
3483      + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
3484    out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
3485
3486    out.println("");
3487    out.println("NOTE: Following options are NOT supported as of HBase version 2.0+.");
3488    out.println("");
3489    out.println("  UNSUPPORTED Metadata Repair options: (expert features, use with caution!)");
3490    out.println(
3491      "   -fix              Try to fix region assignments.  This is for backwards compatibility");
3492    out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
3493    out.println(
3494      "   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
3495    out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
3496    out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
3497    out.println(
3498      "   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
3499    out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
3500    out.println(
3501      "   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n="
3502        + DEFAULT_MAX_MERGE + " by default)");
3503    out.println(
3504      "   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
3505    out.println(
3506      "   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n="
3507        + DEFAULT_OVERLAPS_TO_SIDELINE + " by default)");
3508    out.println("   -fixSplitParents  Try to force offline split parents to be online.");
3509    out.println(
3510      "   -removeParents    Try to offline and sideline lingering parents and keep daughter regions.");
3511    out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
3512      + " (empty REGIONINFO_QUALIFIER rows)");
3513
3514    out.println("");
3515    out.println("  UNSUPPORTED Metadata Repair shortcuts");
3516    out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles "
3517      + "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles"
3518      + "-fixHFileLinks");
3519    out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
3520    out.println("");
3521    out.println(" Replication options");
3522    out.println("   -fixReplication   Deletes replication queues for removed peers");
3523    out.println("   -cleanReplicationBarrier [tableName] clean the replication barriers "
3524      + "of a specified table, tableName is required");
3525    out.flush();
3526    errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
3527
3528    setRetCode(-2);
3529    return this;
3530  }
3531
3532  /**
3533   * Main program
3534   */
3535  public static void main(String[] args) throws Exception {
3536    // create a fsck object
3537    Configuration conf = HBaseConfiguration.create();
3538    Path hbasedir = CommonFSUtils.getRootDir(conf);
3539    URI defaultFs = hbasedir.getFileSystem(conf).getUri();
3540    CommonFSUtils.setFsDefault(conf, new Path(defaultFs));
3541    int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
3542    System.exit(ret);
3543  }
3544
3545  /**
3546   * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
3547   */
3548  static class HBaseFsckTool extends Configured implements Tool {
3549    HBaseFsckTool(Configuration conf) {
3550      super(conf);
3551    }
3552
3553    @Override
3554    public int run(String[] args) throws Exception {
3555      HBaseFsck hbck = new HBaseFsck(getConf());
3556      hbck.exec(hbck.executor, args);
3557      hbck.close();
3558      return hbck.getRetCode();
3559    }
3560  }
3561
3562  public HBaseFsck exec(ExecutorService exec, String[] args)
3563    throws KeeperException, IOException, InterruptedException, ReplicationException {
3564    long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
3565
3566    boolean checkCorruptHFiles = false;
3567    boolean sidelineCorruptHFiles = false;
3568
3569    // Process command-line args.
3570    for (int i = 0; i < args.length; i++) {
3571      String cmd = args[i];
3572      if (cmd.equals("-help") || cmd.equals("-h")) {
3573        return printUsageAndExit();
3574      } else if (cmd.equals("-details")) {
3575        setDisplayFullReport();
3576      } else if (cmd.equals("-exclusive")) {
3577        setForceExclusive();
3578      } else if (cmd.equals("-timelag")) {
3579        if (i == args.length - 1) {
3580          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
3581          return printUsageAndExit();
3582        }
3583        try {
3584          long timelag = Long.parseLong(args[++i]);
3585          setTimeLag(timelag);
3586        } catch (NumberFormatException e) {
3587          errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
3588          return printUsageAndExit();
3589        }
3590      } else if (cmd.equals("-sleepBeforeRerun")) {
3591        if (i == args.length - 1) {
3592          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sleepBeforeRerun needs a value.");
3593          return printUsageAndExit();
3594        }
3595        try {
3596          sleepBeforeRerun = Long.parseLong(args[++i]);
3597        } catch (NumberFormatException e) {
3598          errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
3599          return printUsageAndExit();
3600        }
3601      } else if (cmd.equals("-sidelineDir")) {
3602        if (i == args.length - 1) {
3603          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
3604          return printUsageAndExit();
3605        }
3606        setSidelineDir(args[++i]);
3607      } else if (cmd.equals("-fix")) {
3608        errors.reportError(ERROR_CODE.WRONG_USAGE,
3609          "This option is deprecated, please use  -fixAssignments instead.");
3610        setFixAssignments(true);
3611      } else if (cmd.equals("-fixAssignments")) {
3612        setFixAssignments(true);
3613      } else if (cmd.equals("-fixMeta")) {
3614        setFixMeta(true);
3615      } else if (cmd.equals("-noHdfsChecking")) {
3616        setCheckHdfs(false);
3617      } else if (cmd.equals("-fixHdfsHoles")) {
3618        setFixHdfsHoles(true);
3619      } else if (cmd.equals("-fixHdfsOrphans")) {
3620        setFixHdfsOrphans(true);
3621      } else if (cmd.equals("-fixTableOrphans")) {
3622        setFixTableOrphans(true);
3623      } else if (cmd.equals("-fixHdfsOverlaps")) {
3624        setFixHdfsOverlaps(true);
3625      } else if (cmd.equals("-fixVersionFile")) {
3626        setFixVersionFile(true);
3627      } else if (cmd.equals("-sidelineBigOverlaps")) {
3628        setSidelineBigOverlaps(true);
3629      } else if (cmd.equals("-fixSplitParents")) {
3630        setFixSplitParents(true);
3631      } else if (cmd.equals("-removeParents")) {
3632        setRemoveParents(true);
3633      } else if (cmd.equals("-ignorePreCheckPermission")) {
3634        setIgnorePreCheckPermission(true);
3635      } else if (cmd.equals("-checkCorruptHFiles")) {
3636        checkCorruptHFiles = true;
3637      } else if (cmd.equals("-sidelineCorruptHFiles")) {
3638        sidelineCorruptHFiles = true;
3639      } else if (cmd.equals("-fixReferenceFiles")) {
3640        setFixReferenceFiles(true);
3641      } else if (cmd.equals("-fixHFileLinks")) {
3642        setFixHFileLinks(true);
3643      } else if (cmd.equals("-fixEmptyMetaCells")) {
3644        setFixEmptyMetaCells(true);
3645      } else if (cmd.equals("-repair")) {
3646        // this attempts to merge overlapping hdfs regions, needs testing
3647        // under load
3648        setFixHdfsHoles(true);
3649        setFixHdfsOrphans(true);
3650        setFixMeta(true);
3651        setFixAssignments(true);
3652        setFixHdfsOverlaps(true);
3653        setFixVersionFile(true);
3654        setSidelineBigOverlaps(true);
3655        setFixSplitParents(false);
3656        setCheckHdfs(true);
3657        setFixReferenceFiles(true);
3658        setFixHFileLinks(true);
3659      } else if (cmd.equals("-repairHoles")) {
3660        // this will make all missing hdfs regions available but may lose data
3661        setFixHdfsHoles(true);
3662        setFixHdfsOrphans(false);
3663        setFixMeta(true);
3664        setFixAssignments(true);
3665        setFixHdfsOverlaps(false);
3666        setSidelineBigOverlaps(false);
3667        setFixSplitParents(false);
3668        setCheckHdfs(true);
3669      } else if (cmd.equals("-maxOverlapsToSideline")) {
3670        if (i == args.length - 1) {
3671          errors.reportError(ERROR_CODE.WRONG_USAGE,
3672            "-maxOverlapsToSideline needs a numeric value argument.");
3673          return printUsageAndExit();
3674        }
3675        try {
3676          int maxOverlapsToSideline = Integer.parseInt(args[++i]);
3677          setMaxOverlapsToSideline(maxOverlapsToSideline);
3678        } catch (NumberFormatException e) {
3679          errors.reportError(ERROR_CODE.WRONG_USAGE,
3680            "-maxOverlapsToSideline needs a numeric value argument.");
3681          return printUsageAndExit();
3682        }
3683      } else if (cmd.equals("-maxMerge")) {
3684        if (i == args.length - 1) {
3685          errors.reportError(ERROR_CODE.WRONG_USAGE, "-maxMerge needs a numeric value argument.");
3686          return printUsageAndExit();
3687        }
3688        try {
3689          int maxMerge = Integer.parseInt(args[++i]);
3690          setMaxMerge(maxMerge);
3691        } catch (NumberFormatException e) {
3692          errors.reportError(ERROR_CODE.WRONG_USAGE, "-maxMerge needs a numeric value argument.");
3693          return printUsageAndExit();
3694        }
3695      } else if (cmd.equals("-summary")) {
3696        setSummary();
3697      } else if (cmd.equals("-metaonly")) {
3698        setCheckMetaOnly();
3699      } else if (cmd.equals("-boundaries")) {
3700        setRegionBoundariesCheck();
3701      } else if (cmd.equals("-fixReplication")) {
3702        setFixReplication(true);
3703      } else if (cmd.equals("-cleanReplicationBarrier")) {
3704        setCleanReplicationBarrier(true);
3705        if (args[++i].startsWith("-")) {
3706          printUsageAndExit();
3707        }
3708        setCleanReplicationBarrierTable(args[i]);
3709      } else if (cmd.startsWith("-")) {
3710        errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
3711        return printUsageAndExit();
3712      } else {
3713        includeTable(TableName.valueOf(cmd));
3714        errors.print("Allow checking/fixes for table: " + cmd);
3715      }
3716    }
3717
3718    errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
3719
3720    // pre-check current user has FS write permission or not
3721    try {
3722      preCheckPermission();
3723    } catch (IOException ioe) {
3724      Runtime.getRuntime().exit(-1);
3725    }
3726
3727    // do the real work of hbck
3728    connect();
3729
3730    // after connecting to server above, we have server version
3731    // check if unsupported option is specified based on server version
3732    if (!isOptionsSupported(args)) {
3733      return printUsageAndExit();
3734    }
3735
3736    try {
3737      // if corrupt file mode is on, first fix them since they may be opened later
3738      if (checkCorruptHFiles || sidelineCorruptHFiles) {
3739        LOG.info("Checking all hfiles for corruption");
3740        HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
3741        setHFileCorruptionChecker(hfcc); // so we can get result
3742        Collection<TableName> tables = getIncludedTables();
3743        Collection<Path> tableDirs = new ArrayList<>();
3744        Path rootdir = CommonFSUtils.getRootDir(getConf());
3745        if (tables.size() > 0) {
3746          for (TableName t : tables) {
3747            tableDirs.add(CommonFSUtils.getTableDir(rootdir, t));
3748          }
3749        } else {
3750          tableDirs = FSUtils.getTableDirs(CommonFSUtils.getCurrentFileSystem(getConf()), rootdir);
3751        }
3752        hfcc.checkTables(tableDirs);
3753        hfcc.report(errors);
3754      }
3755
3756      // check and fix table integrity, region consistency.
3757      int code = onlineHbck();
3758      setRetCode(code);
3759      // If we have changed the HBase state it is better to run hbck again
3760      // to see if we haven't broken something else in the process.
3761      // We run it only once more because otherwise we can easily fall into
3762      // an infinite loop.
3763      if (shouldRerun()) {
3764        try {
3765          LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
3766          Thread.sleep(sleepBeforeRerun);
3767        } catch (InterruptedException ie) {
3768          LOG.warn("Interrupted while sleeping");
3769          return this;
3770        }
3771        // Just report
3772        setFixAssignments(false);
3773        setFixMeta(false);
3774        setFixHdfsHoles(false);
3775        setFixHdfsOverlaps(false);
3776        setFixVersionFile(false);
3777        setFixTableOrphans(false);
3778        errors.resetErrors();
3779        code = onlineHbck();
3780        setRetCode(code);
3781      }
3782    } finally {
3783      IOUtils.closeQuietly(this, e -> LOG.warn("", e));
3784    }
3785    return this;
3786  }
3787
3788  private boolean isOptionsSupported(String[] args) {
3789    boolean result = true;
3790    String hbaseServerVersion = status.getHBaseVersion();
3791    if (VersionInfo.compareVersion("2.any.any", hbaseServerVersion) < 0) {
3792      // Process command-line args.
3793      for (String arg : args) {
3794        if (unsupportedOptionsInV2.contains(arg)) {
3795          errors.reportError(ERROR_CODE.UNSUPPORTED_OPTION,
3796            "option '" + arg + "' is not " + "supported!");
3797          result = false;
3798          break;
3799        }
3800      }
3801    }
3802    return result;
3803  }
3804
3805  public void setCleanReplicationBarrierTable(String cleanReplicationBarrierTable) {
3806    this.cleanReplicationBarrierTable = TableName.valueOf(cleanReplicationBarrierTable);
3807  }
3808
3809  public void cleanReplicationBarrier() throws IOException {
3810    if (!cleanReplicationBarrier || cleanReplicationBarrierTable == null) {
3811      return;
3812    }
3813    if (cleanReplicationBarrierTable.isSystemTable()) {
3814      errors.reportError(ERROR_CODE.INVALID_TABLE,
3815        "invalid table: " + cleanReplicationBarrierTable);
3816      return;
3817    }
3818
3819    boolean isGlobalScope = false;
3820    try {
3821      isGlobalScope = admin.getDescriptor(cleanReplicationBarrierTable).hasGlobalReplicationScope();
3822    } catch (TableNotFoundException e) {
3823      LOG.info("we may need to clean some erroneous data due to bugs");
3824    }
3825
3826    if (isGlobalScope) {
3827      errors.reportError(ERROR_CODE.INVALID_TABLE,
3828        "table's replication scope is global: " + cleanReplicationBarrierTable);
3829      return;
3830    }
3831    List<byte[]> regionNames = new ArrayList<>();
3832    Scan barrierScan = new Scan();
3833    barrierScan.setCaching(100);
3834    barrierScan.addFamily(HConstants.REPLICATION_BARRIER_FAMILY);
3835    barrierScan
3836      .withStartRow(ClientMetaTableAccessor.getTableStartRowForMeta(cleanReplicationBarrierTable,
3837        ClientMetaTableAccessor.QueryType.REGION))
3838      .withStopRow(ClientMetaTableAccessor.getTableStopRowForMeta(cleanReplicationBarrierTable,
3839        ClientMetaTableAccessor.QueryType.REGION));
3840    Result result;
3841    try (ResultScanner scanner = meta.getScanner(barrierScan)) {
3842      while ((result = scanner.next()) != null) {
3843        regionNames.add(result.getRow());
3844      }
3845    }
3846    if (regionNames.size() <= 0) {
3847      errors.reportError(ERROR_CODE.INVALID_TABLE,
3848        "there is no barriers of this table: " + cleanReplicationBarrierTable);
3849      return;
3850    }
3851    ReplicationQueueStorage queueStorage =
3852      ReplicationStorageFactory.getReplicationQueueStorage(connection, getConf());
3853    List<ReplicationPeerDescription> peerDescriptions = admin.listReplicationPeers();
3854    if (peerDescriptions != null && peerDescriptions.size() > 0) {
3855      List<String> peers = peerDescriptions.stream()
3856        .filter(
3857          peerConfig -> peerConfig.getPeerConfig().needToReplicate(cleanReplicationBarrierTable))
3858        .map(peerConfig -> peerConfig.getPeerId()).collect(Collectors.toList());
3859      try {
3860        List<String> batch = new ArrayList<>();
3861        for (String peer : peers) {
3862          for (byte[] regionName : regionNames) {
3863            batch.add(RegionInfo.encodeRegionName(regionName));
3864            if (batch.size() % 100 == 0) {
3865              queueStorage.removeLastSequenceIds(peer, batch);
3866              batch.clear();
3867            }
3868          }
3869          if (batch.size() > 0) {
3870            queueStorage.removeLastSequenceIds(peer, batch);
3871            batch.clear();
3872          }
3873        }
3874      } catch (ReplicationException re) {
3875        throw new IOException(re);
3876      }
3877    }
3878    for (byte[] regionName : regionNames) {
3879      meta.delete(new Delete(regionName).addFamily(HConstants.REPLICATION_BARRIER_FAMILY));
3880    }
3881    setShouldRerun();
3882  }
3883
3884  /**
3885   * ls -r for debugging purposes
3886   */
3887  void debugLsr(Path p) throws IOException {
3888    debugLsr(getConf(), p, errors);
3889  }
3890
3891  /**
3892   * ls -r for debugging purposes
3893   */
3894  public static void debugLsr(Configuration conf, Path p) throws IOException {
3895    debugLsr(conf, p, new PrintingErrorReporter());
3896  }
3897
3898  /**
3899   * ls -r for debugging purposes
3900   */
3901  public static void debugLsr(Configuration conf, Path p, HbckErrorReporter errors)
3902    throws IOException {
3903    if (!LOG.isDebugEnabled() || p == null) {
3904      return;
3905    }
3906    FileSystem fs = p.getFileSystem(conf);
3907
3908    if (!fs.exists(p)) {
3909      // nothing
3910      return;
3911    }
3912    errors.print(p.toString());
3913
3914    if (fs.isFile(p)) {
3915      return;
3916    }
3917
3918    if (fs.getFileStatus(p).isDirectory()) {
3919      FileStatus[] fss = fs.listStatus(p);
3920      for (FileStatus status : fss) {
3921        debugLsr(conf, status.getPath(), errors);
3922      }
3923    }
3924  }
3925}