001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.io.Closeable;
021import java.io.FileNotFoundException;
022import java.io.IOException;
023import java.io.InterruptedIOException;
024import java.io.PrintWriter;
025import java.io.StringWriter;
026import java.net.InetAddress;
027import java.net.URI;
028import java.util.ArrayList;
029import java.util.Collection;
030import java.util.Collections;
031import java.util.Comparator;
032import java.util.EnumSet;
033import java.util.HashMap;
034import java.util.HashSet;
035import java.util.Iterator;
036import java.util.List;
037import java.util.Locale;
038import java.util.Map;
039import java.util.Map.Entry;
040import java.util.Objects;
041import java.util.Optional;
042import java.util.Set;
043import java.util.SortedMap;
044import java.util.TreeMap;
045import java.util.Vector;
046import java.util.concurrent.Callable;
047import java.util.concurrent.ConcurrentSkipListMap;
048import java.util.concurrent.ExecutionException;
049import java.util.concurrent.ExecutorService;
050import java.util.concurrent.Executors;
051import java.util.concurrent.Future;
052import java.util.concurrent.FutureTask;
053import java.util.concurrent.ScheduledThreadPoolExecutor;
054import java.util.concurrent.TimeUnit;
055import java.util.concurrent.TimeoutException;
056import java.util.concurrent.atomic.AtomicBoolean;
057import java.util.concurrent.atomic.AtomicInteger;
058import java.util.stream.Collectors;
059import org.apache.commons.io.IOUtils;
060import org.apache.commons.lang3.StringUtils;
061import org.apache.hadoop.conf.Configuration;
062import org.apache.hadoop.conf.Configured;
063import org.apache.hadoop.fs.FSDataOutputStream;
064import org.apache.hadoop.fs.FileStatus;
065import org.apache.hadoop.fs.FileSystem;
066import org.apache.hadoop.fs.Path;
067import org.apache.hadoop.fs.permission.FsAction;
068import org.apache.hadoop.fs.permission.FsPermission;
069import org.apache.hadoop.hbase.Abortable;
070import org.apache.hadoop.hbase.Cell;
071import org.apache.hadoop.hbase.CellUtil;
072import org.apache.hadoop.hbase.ClusterMetrics;
073import org.apache.hadoop.hbase.ClusterMetrics.Option;
074import org.apache.hadoop.hbase.HBaseConfiguration;
075import org.apache.hadoop.hbase.HBaseInterfaceAudience;
076import org.apache.hadoop.hbase.HConstants;
077import org.apache.hadoop.hbase.HRegionLocation;
078import org.apache.hadoop.hbase.KeyValue;
079import org.apache.hadoop.hbase.MasterNotRunningException;
080import org.apache.hadoop.hbase.MetaTableAccessor;
081import org.apache.hadoop.hbase.RegionLocations;
082import org.apache.hadoop.hbase.ServerName;
083import org.apache.hadoop.hbase.TableName;
084import org.apache.hadoop.hbase.TableNotFoundException;
085import org.apache.hadoop.hbase.ZooKeeperConnectionException;
086import org.apache.hadoop.hbase.client.Admin;
087import org.apache.hadoop.hbase.client.ClusterConnection;
088import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
089import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
090import org.apache.hadoop.hbase.client.Connection;
091import org.apache.hadoop.hbase.client.ConnectionFactory;
092import org.apache.hadoop.hbase.client.Delete;
093import org.apache.hadoop.hbase.client.Get;
094import org.apache.hadoop.hbase.client.Put;
095import org.apache.hadoop.hbase.client.RegionInfo;
096import org.apache.hadoop.hbase.client.RegionInfoBuilder;
097import org.apache.hadoop.hbase.client.RegionReplicaUtil;
098import org.apache.hadoop.hbase.client.Result;
099import org.apache.hadoop.hbase.client.ResultScanner;
100import org.apache.hadoop.hbase.client.RowMutations;
101import org.apache.hadoop.hbase.client.Scan;
102import org.apache.hadoop.hbase.client.Table;
103import org.apache.hadoop.hbase.client.TableDescriptor;
104import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
105import org.apache.hadoop.hbase.client.TableState;
106import org.apache.hadoop.hbase.io.FileLink;
107import org.apache.hadoop.hbase.io.HFileLink;
108import org.apache.hadoop.hbase.io.hfile.CacheConfig;
109import org.apache.hadoop.hbase.io.hfile.HFile;
110import org.apache.hadoop.hbase.master.RegionState;
111import org.apache.hadoop.hbase.regionserver.HRegion;
112import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
113import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
114import org.apache.hadoop.hbase.replication.ReplicationException;
115import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
116import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
117import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
118import org.apache.hadoop.hbase.security.UserProvider;
119import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
120import org.apache.hadoop.hbase.util.HbckErrorReporter.ERROR_CODE;
121import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
122import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
123import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
124import org.apache.hadoop.hbase.wal.WALSplitUtil;
125import org.apache.hadoop.hbase.zookeeper.ZKUtil;
126import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
127import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
128import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
129import org.apache.hadoop.ipc.RemoteException;
130import org.apache.hadoop.security.AccessControlException;
131import org.apache.hadoop.security.UserGroupInformation;
132import org.apache.hadoop.util.ReflectionUtils;
133import org.apache.hadoop.util.Tool;
134import org.apache.hadoop.util.ToolRunner;
135import org.apache.yetus.audience.InterfaceAudience;
136import org.apache.yetus.audience.InterfaceStability;
137import org.apache.zookeeper.KeeperException;
138import org.slf4j.Logger;
139import org.slf4j.LoggerFactory;
140
141import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
142import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
143import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
144import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
145import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
146import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
147
148import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
149import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
150
151/**
152 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and table integrity
153 * problems in a corrupted HBase. This tool was written for hbase-1.x. It does not work with
154 * hbase-2.x; it can read state but is not allowed to change state; i.e. effect 'repair'. Even
155 * though it can 'read' state, given how so much has changed in how hbase1 and hbase2 operate, it
156 * will often misread. See hbck2 (HBASE-19121) for a hbck tool for hbase2. This class is deprecated.
157 * <p>
158 * Region consistency checks verify that hbase:meta, region deployment on region servers and the
159 * state of data in HDFS (.regioninfo files) all are in accordance.
160 * <p>
161 * Table integrity checks verify that all possible row keys resolve to exactly one region of a
162 * table. This means there are no individual degenerate or backwards regions; no holes between
163 * regions; and that there are no overlapping regions.
164 * <p>
165 * The general repair strategy works in two phases:
166 * <ol>
167 * <li>Repair Table Integrity on HDFS. (merge or fabricate regions)
168 * <li>Repair Region Consistency with hbase:meta and assignments
169 * </ol>
170 * <p>
171 * For table integrity repairs, the tables' region directories are scanned for .regioninfo files.
172 * Each table's integrity is then verified. If there are any orphan regions (regions with no
173 * .regioninfo files) or holes, new regions are fabricated. Backwards regions are sidelined as well
174 * as empty degenerate (endkey==startkey) regions. If there are any overlapping regions, a new
175 * region is created and all data is merged into the new region.
176 * <p>
177 * Table integrity repairs deal solely with HDFS and could potentially be done offline -- the hbase
178 * region servers or master do not need to be running. This phase can eventually be used to
179 * completely reconstruct the hbase:meta table in an offline fashion.
180 * <p>
181 * Region consistency requires three conditions -- 1) valid .regioninfo file present in an HDFS
182 * region dir, 2) valid row with .regioninfo data in META, and 3) a region is deployed only at the
183 * regionserver that was assigned to with proper state in the master.
184 * <p>
185 * Region consistency repairs require hbase to be online so that hbck can contact the HBase master
186 * and region servers. The hbck#connect() method must first be called successfully. Much of the
187 * region consistency information is transient and less risky to repair.
188 * <p>
189 * If hbck is run from the command line, there are a handful of arguments that can be used to limit
190 * the kinds of repairs hbck will do. See the code in {@link #printUsageAndExit()} for more details.
191 * @deprecated For removal in hbase-4.0.0. Use HBCK2 instead.
192 */
193@Deprecated
194@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
195@InterfaceStability.Evolving
196public class HBaseFsck extends Configured implements Closeable {
197  public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
198  public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
199  private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
200  private static boolean rsSupportsOffline = true;
201  private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
202  private static final int DEFAULT_MAX_MERGE = 5;
203
204  /**
205   * Here is where hbase-1.x used to default the lock for hbck1. It puts in place a lock when it
206   * goes to write/make changes.
207   */
208  @InterfaceAudience.Private
209  public static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
210
211  private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
212  private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
213  private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
214  // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
215  // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
216  // AlreadyBeingCreatedException which is implies timeout on this operations up to
217  // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
218  private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
219  private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
220  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
221  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
222
223  /**********************
224   * Internal resources
225   **********************/
226  private static final Logger LOG = LoggerFactory.getLogger(HBaseFsck.class.getName());
227  private ClusterMetrics status;
228  private ClusterConnection connection;
229  private Admin admin;
230  private Table meta;
231  // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
232  protected ExecutorService executor;
233  private long startMillis = EnvironmentEdgeManager.currentTime();
234  private HFileCorruptionChecker hfcc;
235  private int retcode = 0;
236  private Path HBCK_LOCK_PATH;
237  private FSDataOutputStream hbckOutFd;
238  // This lock is to prevent cleanup of balancer resources twice between
239  // ShutdownHook and the main code. We cleanup only if the connect() is
240  // successful
241  private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
242
243  // Unsupported options in HBase 2.0+
244  private static final Set<String> unsupportedOptionsInV2 = Sets.newHashSet("-fix",
245    "-fixAssignments", "-fixMeta", "-fixHdfsHoles", "-fixHdfsOrphans", "-fixTableOrphans",
246    "-fixHdfsOverlaps", "-sidelineBigOverlaps", "-fixSplitParents", "-removeParents",
247    "-fixEmptyMetaCells", "-repair", "-repairHoles", "-maxOverlapsToSideline", "-maxMerge");
248
249  /***********
250   * Options
251   ***********/
252  private static boolean details = false; // do we display the full report
253  private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
254  private static boolean forceExclusive = false; // only this hbck can modify HBase
255  private boolean fixAssignments = false; // fix assignment errors?
256  private boolean fixMeta = false; // fix meta errors?
257  private boolean checkHdfs = true; // load and check fs consistency?
258  private boolean fixHdfsHoles = false; // fix fs holes?
259  private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
260  private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
261  private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
262  private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
263  private boolean fixSplitParents = false; // fix lingering split parents
264  private boolean removeParents = false; // remove split parents
265  private boolean fixReferenceFiles = false; // fix lingering reference store file
266  private boolean fixHFileLinks = false; // fix lingering HFileLinks
267  private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
268  private boolean fixReplication = false; // fix undeleted replication queues for removed peer
269  private boolean cleanReplicationBarrier = false; // clean replication barriers of a table
270  private boolean fixAny = false; // Set to true if any of the fix is required.
271
272  // limit checking/fixes to listed tables, if empty attempt to check/fix all
273  // hbase:meta are always checked
274  private Set<TableName> tablesIncluded = new HashSet<>();
275  private TableName cleanReplicationBarrierTable;
276  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
277  // maximum number of overlapping regions to sideline
278  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
279  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
280  private Path sidelineDir = null;
281
282  private boolean rerun = false; // if we tried to fix something, rerun hbck
283  private static boolean summary = false; // if we want to print less output
284  private boolean checkMetaOnly = false;
285  private boolean checkRegionBoundaries = false;
286  private boolean ignorePreCheckPermission = false; // if pre-check permission
287
288  /*********
289   * State
290   *********/
291  final private HbckErrorReporter errors;
292  int fixes = 0;
293
294  /**
295   * This map contains the state of all hbck items. It maps from encoded region name to
296   * HbckRegionInfo structure. The information contained in HbckRegionInfo is used to detect and
297   * correct consistency (hdfs/meta/deployment) problems.
298   */
299  private TreeMap<String, HbckRegionInfo> regionInfoMap = new TreeMap<>();
300  // Empty regioninfo qualifiers in hbase:meta
301  private Set<Result> emptyRegionInfoQualifiers = new HashSet<>();
302
303  /**
304   * This map from Tablename -> TableInfo contains the structures necessary to detect table
305   * consistency problems (holes, dupes, overlaps). It is sorted to prevent dupes. If tablesIncluded
306   * is empty, this map contains all tables. Otherwise, it contains only meta tables and tables in
307   * tablesIncluded, unless checkMetaOnly is specified, in which case, it contains only the meta
308   * table
309   */
310  private SortedMap<TableName, HbckTableInfo> tablesInfo = new ConcurrentSkipListMap<>();
311
312  /**
313   * When initially looking at HDFS, we attempt to find any orphaned data.
314   */
315  private List<HbckRegionInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<>());
316
317  private Map<TableName, Set<String>> orphanTableDirs = new HashMap<>();
318  private Map<TableName, TableState> tableStates = new HashMap<>();
319  private final RetryCounterFactory lockFileRetryCounterFactory;
320  private final RetryCounterFactory createZNodeRetryCounterFactory;
321
322  private Map<TableName, Set<String>> skippedRegions = new HashMap<>();
323
324  private ZKWatcher zkw = null;
325  private String hbckEphemeralNodePath = null;
326  private boolean hbckZodeCreated = false;
327
328  /**
329   * Constructor
330   * @param conf Configuration object
331   * @throws MasterNotRunningException    if the master is not running
332   * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
333   */
334  public HBaseFsck(Configuration conf) throws IOException, ClassNotFoundException {
335    this(conf, createThreadPool(conf));
336  }
337
338  private static ExecutorService createThreadPool(Configuration conf) {
339    int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
340    return new ScheduledThreadPoolExecutor(numThreads,
341      new ThreadFactoryBuilder().setNameFormat("hbasefsck-pool-%d").setDaemon(true)
342        .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build());
343  }
344
345  /**
346   * Constructor n * Configuration object n * if the master is not running n * if unable to connect
347   * to ZooKeeper
348   */
349  public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
350    ZooKeeperConnectionException, IOException, ClassNotFoundException {
351    super(conf);
352    errors = getErrorReporter(getConf());
353    this.executor = exec;
354    lockFileRetryCounterFactory = createLockRetryCounterFactory(getConf());
355    createZNodeRetryCounterFactory = createZnodeRetryCounterFactory(getConf());
356    zkw = createZooKeeperWatcher();
357  }
358
359  /** Returns A retry counter factory configured for retrying lock file creation. */
360  public static RetryCounterFactory createLockRetryCounterFactory(Configuration conf) {
361    return new RetryCounterFactory(
362      conf.getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
363      conf.getInt("hbase.hbck.lockfile.attempt.sleep.interval",
364        DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
365      conf.getInt("hbase.hbck.lockfile.attempt.maxsleeptime",
366        DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
367  }
368
369  /** Returns A retry counter factory configured for retrying znode creation. */
370  private static RetryCounterFactory createZnodeRetryCounterFactory(Configuration conf) {
371    return new RetryCounterFactory(
372      conf.getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
373      conf.getInt("hbase.hbck.createznode.attempt.sleep.interval",
374        DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
375      conf.getInt("hbase.hbck.createznode.attempt.maxsleeptime",
376        DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
377  }
378
379  /** Returns Return the tmp dir this tool writes too. */
380  @InterfaceAudience.Private
381  public static Path getTmpDir(Configuration conf) throws IOException {
382    return new Path(CommonFSUtils.getRootDir(conf), HConstants.HBASE_TEMP_DIRECTORY);
383  }
384
385  private static class FileLockCallable implements Callable<FSDataOutputStream> {
386    RetryCounter retryCounter;
387    private final Configuration conf;
388    private Path hbckLockPath = null;
389
390    public FileLockCallable(Configuration conf, RetryCounter retryCounter) {
391      this.retryCounter = retryCounter;
392      this.conf = conf;
393    }
394
395    /** Returns Will be <code>null</code> unless you call {@link #call()} */
396    Path getHbckLockPath() {
397      return this.hbckLockPath;
398    }
399
400    @Override
401    public FSDataOutputStream call() throws IOException {
402      try {
403        FileSystem fs = CommonFSUtils.getCurrentFileSystem(this.conf);
404        FsPermission defaultPerms =
405          CommonFSUtils.getFilePermissions(fs, this.conf, HConstants.DATA_FILE_UMASK_KEY);
406        Path tmpDir = getTmpDir(conf);
407        this.hbckLockPath = new Path(tmpDir, HBCK_LOCK_FILE);
408        fs.mkdirs(tmpDir);
409        final FSDataOutputStream out = createFileWithRetries(fs, this.hbckLockPath, defaultPerms);
410        out.writeBytes(InetAddress.getLocalHost().toString());
411        // Add a note into the file we write on why hbase2 is writing out an hbck1 lock file.
412        out.writeBytes(" Written by an hbase-2.x Master to block an "
413          + "attempt by an hbase-1.x HBCK tool making modification to state. "
414          + "See 'HBCK must match HBase server version' in the hbase refguide.");
415        out.flush();
416        return out;
417      } catch (RemoteException e) {
418        if (AlreadyBeingCreatedException.class.getName().equals(e.getClassName())) {
419          return null;
420        } else {
421          throw e;
422        }
423      }
424    }
425
426    private FSDataOutputStream createFileWithRetries(final FileSystem fs,
427      final Path hbckLockFilePath, final FsPermission defaultPerms) throws IOException {
428      IOException exception = null;
429      do {
430        try {
431          return CommonFSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
432        } catch (IOException ioe) {
433          LOG.info("Failed to create lock file " + hbckLockFilePath.getName() + ", try="
434            + (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
435          LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(), ioe);
436          try {
437            exception = ioe;
438            retryCounter.sleepUntilNextRetry();
439          } catch (InterruptedException ie) {
440            throw (InterruptedIOException) new InterruptedIOException(
441              "Can't create lock file " + hbckLockFilePath.getName()).initCause(ie);
442          }
443        }
444      } while (retryCounter.shouldRetry());
445
446      throw exception;
447    }
448  }
449
450  /**
451   * This method maintains a lock using a file. If the creation fails we return null
452   * @return FSDataOutputStream object corresponding to the newly opened lock file
453   * @throws IOException if IO failure occurs
454   */
455  public static Pair<Path, FSDataOutputStream> checkAndMarkRunningHbck(Configuration conf,
456    RetryCounter retryCounter) throws IOException {
457    FileLockCallable callable = new FileLockCallable(conf, retryCounter);
458    ExecutorService executor = Executors.newFixedThreadPool(1);
459    FutureTask<FSDataOutputStream> futureTask = new FutureTask<>(callable);
460    executor.execute(futureTask);
461    final int timeoutInSeconds =
462      conf.getInt("hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
463    FSDataOutputStream stream = null;
464    try {
465      stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
466    } catch (ExecutionException ee) {
467      LOG.warn("Encountered exception when opening lock file", ee);
468    } catch (InterruptedException ie) {
469      LOG.warn("Interrupted when opening lock file", ie);
470      Thread.currentThread().interrupt();
471    } catch (TimeoutException exception) {
472      // took too long to obtain lock
473      LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
474      futureTask.cancel(true);
475    } finally {
476      executor.shutdownNow();
477    }
478    return new Pair<Path, FSDataOutputStream>(callable.getHbckLockPath(), stream);
479  }
480
481  private void unlockHbck() {
482    if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) {
483      RetryCounter retryCounter = lockFileRetryCounterFactory.create();
484      do {
485        try {
486          Closeables.close(hbckOutFd, true);
487          CommonFSUtils.delete(CommonFSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
488          LOG.info("Finishing hbck");
489          return;
490        } catch (IOException ioe) {
491          LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
492            + (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
493          LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
494          try {
495            retryCounter.sleepUntilNextRetry();
496          } catch (InterruptedException ie) {
497            Thread.currentThread().interrupt();
498            LOG.warn("Interrupted while deleting lock file" + HBCK_LOCK_PATH);
499            return;
500          }
501        }
502      } while (retryCounter.shouldRetry());
503    }
504  }
505
506  /**
507   * To repair region consistency, one must call connect() in order to repair online state.
508   */
509  public void connect() throws IOException {
510
511    if (isExclusive()) {
512      // Grab the lock
513      Pair<Path, FSDataOutputStream> pair =
514        checkAndMarkRunningHbck(getConf(), this.lockFileRetryCounterFactory.create());
515      HBCK_LOCK_PATH = pair.getFirst();
516      this.hbckOutFd = pair.getSecond();
517      if (hbckOutFd == null) {
518        setRetCode(-1);
519        LOG.error("Another instance of hbck is fixing HBase, exiting this instance. "
520          + "[If you are sure no other instance is running, delete the lock file " + HBCK_LOCK_PATH
521          + " and rerun the tool]");
522        throw new IOException("Duplicate hbck - Abort");
523      }
524
525      // Make sure to cleanup the lock
526      hbckLockCleanup.set(true);
527    }
528
529    // Add a shutdown hook to this thread, in case user tries to
530    // kill the hbck with a ctrl-c, we want to cleanup the lock so that
531    // it is available for further calls
532    Runtime.getRuntime().addShutdownHook(new Thread() {
533      @Override
534      public void run() {
535        IOUtils.closeQuietly(HBaseFsck.this, e -> LOG.warn("", e));
536        cleanupHbckZnode();
537        unlockHbck();
538      }
539    });
540
541    LOG.info("Launching hbck");
542
543    connection = (ClusterConnection) ConnectionFactory.createConnection(getConf());
544    admin = connection.getAdmin();
545    meta = connection.getTable(TableName.META_TABLE_NAME);
546    status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS, Option.DEAD_SERVERS,
547      Option.MASTER, Option.BACKUP_MASTERS, Option.REGIONS_IN_TRANSITION, Option.HBASE_VERSION));
548  }
549
550  /**
551   * Get deployed regions according to the region servers.
552   */
553  private void loadDeployedRegions() throws IOException, InterruptedException {
554    // From the master, get a list of all known live region servers
555    Collection<ServerName> regionServers = status.getLiveServerMetrics().keySet();
556    errors.print("Number of live region servers: " + regionServers.size());
557    if (details) {
558      for (ServerName rsinfo : regionServers) {
559        errors.print("  " + rsinfo.getServerName());
560      }
561    }
562
563    // From the master, get a list of all dead region servers
564    Collection<ServerName> deadRegionServers = status.getDeadServerNames();
565    errors.print("Number of dead region servers: " + deadRegionServers.size());
566    if (details) {
567      for (ServerName name : deadRegionServers) {
568        errors.print("  " + name);
569      }
570    }
571
572    // Print the current master name and state
573    errors.print("Master: " + status.getMasterName());
574
575    // Print the list of all backup masters
576    Collection<ServerName> backupMasters = status.getBackupMasterNames();
577    errors.print("Number of backup masters: " + backupMasters.size());
578    if (details) {
579      for (ServerName name : backupMasters) {
580        errors.print("  " + name);
581      }
582    }
583
584    errors.print("Average load: " + status.getAverageLoad());
585    errors.print("Number of requests: " + status.getRequestCount());
586    errors.print("Number of regions: " + status.getRegionCount());
587
588    List<RegionState> rits = status.getRegionStatesInTransition();
589    errors.print("Number of regions in transition: " + rits.size());
590    if (details) {
591      for (RegionState state : rits) {
592        errors.print("  " + state.toDescriptiveString());
593      }
594    }
595
596    // Determine what's deployed
597    processRegionServers(regionServers);
598  }
599
600  /**
601   * Clear the current state of hbck.
602   */
603  private void clearState() {
604    // Make sure regionInfo is empty before starting
605    fixes = 0;
606    regionInfoMap.clear();
607    emptyRegionInfoQualifiers.clear();
608    tableStates.clear();
609    errors.clear();
610    tablesInfo.clear();
611    orphanHdfsDirs.clear();
612    skippedRegions.clear();
613  }
614
615  /**
616   * This repair method analyzes hbase data in hdfs and repairs it to satisfy the table integrity
617   * rules. HBase doesn't need to be online for this operation to work.
618   */
619  public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
620    // Initial pass to fix orphans.
621    if (
622      shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
623        || shouldFixHdfsOverlaps() || shouldFixTableOrphans())
624    ) {
625      LOG.info("Loading regioninfos HDFS");
626      // if nothing is happening this should always complete in two iterations.
627      int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
628      int curIter = 0;
629      do {
630        clearState(); // clears hbck state and reset fixes to 0 and.
631        // repair what's on HDFS
632        restoreHdfsIntegrity();
633        curIter++;// limit the number of iterations.
634      } while (fixes > 0 && curIter <= maxIterations);
635
636      // Repairs should be done in the first iteration and verification in the second.
637      // If there are more than 2 passes, something funny has happened.
638      if (curIter > 2) {
639        if (curIter == maxIterations) {
640          LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
641            + "Tables integrity may not be fully repaired!");
642        } else {
643          LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
644        }
645      }
646    }
647  }
648
649  /**
650   * This repair method requires the cluster to be online since it contacts region servers and the
651   * masters. It makes each region's state in HDFS, in hbase:meta, and deployments consistent.
652   * @return If &gt; 0 , number of errors detected, if &lt; 0 there was an unrecoverable error. If
653   *         0, we have a clean hbase.
654   */
655  public int onlineConsistencyRepair() throws IOException, KeeperException, InterruptedException {
656
657    // get regions according to what is online on each RegionServer
658    loadDeployedRegions();
659    // check whether hbase:meta is deployed and online
660    recordMetaRegion();
661    // Check if hbase:meta is found only once and in the right place
662    if (!checkMetaRegion()) {
663      String errorMsg = "hbase:meta table is not consistent. ";
664      if (shouldFixAssignments()) {
665        errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
666      } else {
667        errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
668      }
669      errors.reportError(errorMsg + " Exiting...");
670      return -2;
671    }
672    // Not going with further consistency check for tables when hbase:meta itself is not consistent.
673    LOG.info("Loading regionsinfo from the hbase:meta table");
674    boolean success = loadMetaEntries();
675    if (!success) return -1;
676
677    // Empty cells in hbase:meta?
678    reportEmptyMetaCells();
679
680    // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
681    if (shouldFixEmptyMetaCells()) {
682      fixEmptyMetaCells();
683    }
684
685    // get a list of all tables that have not changed recently.
686    if (!checkMetaOnly) {
687      reportTablesInFlux();
688    }
689
690    // Get disabled tables states
691    loadTableStates();
692
693    // load regiondirs and regioninfos from HDFS
694    if (shouldCheckHdfs()) {
695      LOG.info("Loading region directories from HDFS");
696      loadHdfsRegionDirs();
697      LOG.info("Loading region information from HDFS");
698      loadHdfsRegionInfos();
699    }
700
701    // fix the orphan tables
702    fixOrphanTables();
703
704    LOG.info("Checking and fixing region consistency");
705    // Check and fix consistency
706    checkAndFixConsistency();
707
708    // Check integrity (does not fix)
709    checkIntegrity();
710    return errors.getErrorList().size();
711  }
712
713  /**
714   * This method maintains an ephemeral znode. If the creation fails we return false or throw
715   * exception
716   * @return true if creating znode succeeds; false otherwise
717   * @throws IOException if IO failure occurs
718   */
719  private boolean setMasterInMaintenanceMode() throws IOException {
720    RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
721    hbckEphemeralNodePath = ZNodePaths.joinZNode(zkw.getZNodePaths().masterMaintZNode,
722      "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
723    do {
724      try {
725        hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
726        if (hbckZodeCreated) {
727          break;
728        }
729      } catch (KeeperException e) {
730        if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
731          throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
732        }
733        // fall through and retry
734      }
735
736      LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try="
737        + (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
738
739      try {
740        retryCounter.sleepUntilNextRetry();
741      } catch (InterruptedException ie) {
742        throw (InterruptedIOException) new InterruptedIOException(
743          "Can't create znode " + hbckEphemeralNodePath).initCause(ie);
744      }
745    } while (retryCounter.shouldRetry());
746    return hbckZodeCreated;
747  }
748
749  private void cleanupHbckZnode() {
750    try {
751      if (zkw != null && hbckZodeCreated) {
752        ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
753        hbckZodeCreated = false;
754      }
755    } catch (KeeperException e) {
756      // Ignore
757      if (!e.code().equals(KeeperException.Code.NONODE)) {
758        LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
759      }
760    }
761  }
762
763  /**
764   * Contacts the master and prints out cluster-wide information
765   * @return 0 on success, non-zero on failure
766   */
767  public int onlineHbck()
768    throws IOException, KeeperException, InterruptedException, ReplicationException {
769    // print hbase server version
770    errors.print("Version: " + status.getHBaseVersion());
771
772    // Clean start
773    clearState();
774    // Do offline check and repair first
775    offlineHdfsIntegrityRepair();
776    offlineReferenceFileRepair();
777    offlineHLinkFileRepair();
778    // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
779    // hbck, it is likely that hbck would be misled and report transient errors. Therefore, it
780    // is better to set Master into maintenance mode during online hbck.
781    //
782    if (!setMasterInMaintenanceMode()) {
783      LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
784        + "error.  Please run HBCK multiple times to reduce the chance of transient error.");
785    }
786
787    onlineConsistencyRepair();
788
789    if (checkRegionBoundaries) {
790      checkRegionBoundaries();
791    }
792
793    checkAndFixReplication();
794
795    cleanReplicationBarrier();
796
797    // Remove the hbck znode
798    cleanupHbckZnode();
799
800    // Remove the hbck lock
801    unlockHbck();
802
803    // Print table summary
804    printTableSummary(tablesInfo);
805    return errors.summarize();
806  }
807
808  public static byte[] keyOnly(byte[] b) {
809    if (b == null) return b;
810    int rowlength = Bytes.toShort(b, 0);
811    byte[] result = new byte[rowlength];
812    System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
813    return result;
814  }
815
816  @Override
817  public void close() throws IOException {
818    try {
819      cleanupHbckZnode();
820      unlockHbck();
821    } catch (Exception io) {
822      LOG.warn(io.toString(), io);
823    } finally {
824      if (zkw != null) {
825        zkw.close();
826        zkw = null;
827      }
828      IOUtils.closeQuietly(admin, e -> LOG.warn("", e));
829      IOUtils.closeQuietly(meta, e -> LOG.warn("", e));
830      IOUtils.closeQuietly(connection, e -> LOG.warn("", e));
831    }
832  }
833
834  private static class RegionBoundariesInformation {
835    public byte[] regionName;
836    public byte[] metaFirstKey;
837    public byte[] metaLastKey;
838    public byte[] storesFirstKey;
839    public byte[] storesLastKey;
840
841    @Override
842    public String toString() {
843      return "regionName=" + Bytes.toStringBinary(regionName) + "\nmetaFirstKey="
844        + Bytes.toStringBinary(metaFirstKey) + "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey)
845        + "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) + "\nstoresLastKey="
846        + Bytes.toStringBinary(storesLastKey);
847    }
848  }
849
850  public void checkRegionBoundaries() {
851    try {
852      ByteArrayComparator comparator = new ByteArrayComparator();
853      List<RegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
854      final RegionBoundariesInformation currentRegionBoundariesInformation =
855        new RegionBoundariesInformation();
856      Path hbaseRoot = CommonFSUtils.getRootDir(getConf());
857      for (RegionInfo regionInfo : regions) {
858        Path tableDir = CommonFSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
859        currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
860        // For each region, get the start and stop key from the META and compare them to the
861        // same information from the Stores.
862        Path path = new Path(tableDir, regionInfo.getEncodedName());
863        FileSystem fs = path.getFileSystem(getConf());
864        FileStatus[] files = fs.listStatus(path);
865        // For all the column families in this region...
866        byte[] storeFirstKey = null;
867        byte[] storeLastKey = null;
868        for (FileStatus file : files) {
869          String fileName = file.getPath().toString();
870          fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
871          if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
872            FileStatus[] storeFiles = fs.listStatus(file.getPath());
873            // For all the stores in this column family.
874            for (FileStatus storeFile : storeFiles) {
875              HFile.Reader reader =
876                HFile.createReader(fs, storeFile.getPath(), CacheConfig.DISABLED, true, getConf());
877              if (
878                (reader.getFirstKey() != null)
879                  && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
880                    ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey()) > 0))
881              ) {
882                storeFirstKey = ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey();
883              }
884              if (
885                (reader.getLastKey() != null)
886                  && ((storeLastKey == null) || (comparator.compare(storeLastKey,
887                    ((KeyValue.KeyOnlyKeyValue) reader.getLastKey().get()).getKey())) < 0)
888              ) {
889                storeLastKey = ((KeyValue.KeyOnlyKeyValue) reader.getLastKey().get()).getKey();
890              }
891              reader.close();
892            }
893          }
894        }
895        currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
896        currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
897        currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
898        currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
899        if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
900          currentRegionBoundariesInformation.metaFirstKey = null;
901        if (currentRegionBoundariesInformation.metaLastKey.length == 0)
902          currentRegionBoundariesInformation.metaLastKey = null;
903
904        // For a region to be correct, we need the META start key to be smaller or equal to the
905        // smallest start key from all the stores, and the start key from the next META entry to
906        // be bigger than the last key from all the current stores. First region start key is null;
907        // Last region end key is null; some regions can be empty and not have any store.
908
909        boolean valid = true;
910        // Checking start key.
911        if (
912          (currentRegionBoundariesInformation.storesFirstKey != null)
913            && (currentRegionBoundariesInformation.metaFirstKey != null)
914        ) {
915          valid = valid && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
916            currentRegionBoundariesInformation.metaFirstKey) >= 0;
917        }
918        // Checking stop key.
919        if (
920          (currentRegionBoundariesInformation.storesLastKey != null)
921            && (currentRegionBoundariesInformation.metaLastKey != null)
922        ) {
923          valid = valid && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
924            currentRegionBoundariesInformation.metaLastKey) < 0;
925        }
926        if (!valid) {
927          errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
928            tablesInfo.get(regionInfo.getTable()));
929          LOG.warn("Region's boundaries not aligned between stores and META for:");
930          LOG.warn(Objects.toString(currentRegionBoundariesInformation));
931        }
932      }
933    } catch (IOException e) {
934      LOG.error(e.toString(), e);
935    }
936  }
937
938  /**
939   * Iterates through the list of all orphan/invalid regiondirs.
940   */
941  private void adoptHdfsOrphans(Collection<HbckRegionInfo> orphanHdfsDirs) throws IOException {
942    for (HbckRegionInfo hi : orphanHdfsDirs) {
943      LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
944      adoptHdfsOrphan(hi);
945    }
946  }
947
948  /**
949   * Orphaned regions are regions without a .regioninfo file in them. We "adopt" these orphans by
950   * creating a new region, and moving the column families, recovered edits, WALs, into the new
951   * region dir. We determine the region startkey and endkeys by looking at all of the hfiles inside
952   * the column families to identify the min and max keys. The resulting region will likely violate
953   * table integrity but will be dealt with by merging overlapping regions.
954   */
955  @SuppressWarnings("deprecation")
956  private void adoptHdfsOrphan(HbckRegionInfo hi) throws IOException {
957    Path p = hi.getHdfsRegionDir();
958    FileSystem fs = p.getFileSystem(getConf());
959    FileStatus[] dirs = fs.listStatus(p);
960    if (dirs == null) {
961      LOG.warn("Attempt to adopt orphan hdfs region skipped because no files present in " + p
962        + ". This dir could probably be deleted.");
963      return;
964    }
965
966    TableName tableName = hi.getTableName();
967    HbckTableInfo tableInfo = tablesInfo.get(tableName);
968    Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
969    TableDescriptor template = tableInfo.getTableDescriptor();
970
971    // find min and max key values
972    Pair<byte[], byte[]> orphanRegionRange = null;
973    for (FileStatus cf : dirs) {
974      String cfName = cf.getPath().getName();
975      // TODO Figure out what the special dirs are
976      if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
977
978      FileStatus[] hfiles = fs.listStatus(cf.getPath());
979      for (FileStatus hfile : hfiles) {
980        byte[] start, end;
981        HFile.Reader hf = null;
982        try {
983          hf = HFile.createReader(fs, hfile.getPath(), CacheConfig.DISABLED, true, getConf());
984          Optional<Cell> startKv = hf.getFirstKey();
985          start = CellUtil.cloneRow(startKv.get());
986          Optional<Cell> endKv = hf.getLastKey();
987          end = CellUtil.cloneRow(endKv.get());
988        } catch (IOException ioe) {
989          LOG.warn("Problem reading orphan file " + hfile + ", skipping");
990          continue;
991        } catch (NullPointerException ioe) {
992          LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
993          continue;
994        } finally {
995          if (hf != null) {
996            hf.close();
997          }
998        }
999
1000        // expand the range to include the range of all hfiles
1001        if (orphanRegionRange == null) {
1002          // first range
1003          orphanRegionRange = new Pair<>(start, end);
1004        } else {
1005          // TODO add test
1006
1007          // expand range only if the hfile is wider.
1008          if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
1009            orphanRegionRange.setFirst(start);
1010          }
1011          if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0) {
1012            orphanRegionRange.setSecond(end);
1013          }
1014        }
1015      }
1016    }
1017    if (orphanRegionRange == null) {
1018      LOG.warn("No data in dir " + p + ", sidelining data");
1019      fixes++;
1020      sidelineRegionDir(fs, hi);
1021      return;
1022    }
1023    LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", "
1024      + Bytes.toString(orphanRegionRange.getSecond()) + ")");
1025
1026    // create new region on hdfs. move data into place.
1027    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(template.getTableName())
1028      .setStartKey(orphanRegionRange.getFirst())
1029      .setEndKey(Bytes.add(orphanRegionRange.getSecond(), new byte[1])).build();
1030    LOG.info("Creating new region : " + regionInfo);
1031    HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), regionInfo, template);
1032    Path target = region.getRegionFileSystem().getRegionDir();
1033
1034    // rename all the data to new region
1035    mergeRegionDirs(target, hi);
1036    fixes++;
1037  }
1038
1039  /**
1040   * This method determines if there are table integrity errors in HDFS. If there are errors and the
1041   * appropriate "fix" options are enabled, the method will first correct orphan regions making them
1042   * into legit regiondirs, and then reload to merge potentially overlapping regions.
1043   * @return number of table integrity errors found
1044   */
1045  private int restoreHdfsIntegrity() throws IOException, InterruptedException {
1046    // Determine what's on HDFS
1047    LOG.info("Loading HBase regioninfo from HDFS...");
1048    loadHdfsRegionDirs(); // populating regioninfo table.
1049
1050    int errs = errors.getErrorList().size();
1051    // First time just get suggestions.
1052    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1053    checkHdfsIntegrity(false, false);
1054
1055    if (errors.getErrorList().size() == errs) {
1056      LOG.info("No integrity errors.  We are done with this phase. Glorious.");
1057      return 0;
1058    }
1059
1060    if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
1061      adoptHdfsOrphans(orphanHdfsDirs);
1062      // TODO optimize by incrementally adding instead of reloading.
1063    }
1064
1065    // Make sure there are no holes now.
1066    if (shouldFixHdfsHoles()) {
1067      clearState(); // this also resets # fixes.
1068      loadHdfsRegionDirs();
1069      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1070      tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1071    }
1072
1073    // Now we fix overlaps
1074    if (shouldFixHdfsOverlaps()) {
1075      // second pass we fix overlaps.
1076      clearState(); // this also resets # fixes.
1077      loadHdfsRegionDirs();
1078      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1079      tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1080    }
1081
1082    return errors.getErrorList().size();
1083  }
1084
1085  /**
1086   * Scan all the store file names to find any lingering reference files, which refer to some
1087   * none-exiting files. If "fix" option is enabled, any lingering reference file will be sidelined
1088   * if found.
1089   * <p>
1090   * Lingering reference file prevents a region from opening. It has to be fixed before a cluster
1091   * can start properly.
1092   */
1093  private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1094    clearState();
1095    Configuration conf = getConf();
1096    Path hbaseRoot = CommonFSUtils.getRootDir(conf);
1097    FileSystem fs = hbaseRoot.getFileSystem(conf);
1098    LOG.info("Computing mapping of all store files");
1099    Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1100      new FSUtils.ReferenceFileFilter(fs), executor, errors);
1101    errors.print("");
1102    LOG.info("Validating mapping using HDFS state");
1103    for (Path path : allFiles.values()) {
1104      Path referredToFile = StoreFileInfo.getReferredToFile(path);
1105      if (fs.exists(referredToFile)) continue; // good, expected
1106
1107      // Found a lingering reference file
1108      errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1109        "Found lingering reference file " + path);
1110      if (!shouldFixReferenceFiles()) continue;
1111
1112      // Now, trying to fix it since requested
1113      boolean success = false;
1114      String pathStr = path.toString();
1115
1116      // A reference file path should be like
1117      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1118      // Up 5 directories to get the root folder.
1119      // So the file will be sidelined to a similar folder structure.
1120      int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1121      for (int i = 0; index > 0 && i < 5; i++) {
1122        index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1123      }
1124      if (index > 0) {
1125        Path rootDir = getSidelineDir();
1126        Path dst = new Path(rootDir, pathStr.substring(index + 1));
1127        fs.mkdirs(dst.getParent());
1128        LOG.info("Trying to sideline reference file " + path + " to " + dst);
1129        setShouldRerun();
1130
1131        success = fs.rename(path, dst);
1132        debugLsr(dst);
1133
1134      }
1135      if (!success) {
1136        LOG.error("Failed to sideline reference file " + path);
1137      }
1138    }
1139  }
1140
1141  /**
1142   * Scan all the store file names to find any lingering HFileLink files, which refer to some
1143   * none-exiting files. If "fix" option is enabled, any lingering HFileLink file will be sidelined
1144   * if found.
1145   */
1146  private void offlineHLinkFileRepair() throws IOException, InterruptedException {
1147    Configuration conf = getConf();
1148    Path hbaseRoot = CommonFSUtils.getRootDir(conf);
1149    FileSystem fs = hbaseRoot.getFileSystem(conf);
1150    LOG.info("Computing mapping of all link files");
1151    Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1152      new FSUtils.HFileLinkFilter(), executor, errors);
1153    errors.print("");
1154
1155    LOG.info("Validating mapping using HDFS state");
1156    for (Path path : allFiles.values()) {
1157      // building HFileLink object to gather locations
1158      HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
1159      if (actualLink.exists(fs)) continue; // good, expected
1160
1161      // Found a lingering HFileLink
1162      errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
1163      if (!shouldFixHFileLinks()) continue;
1164
1165      // Now, trying to fix it since requested
1166      setShouldRerun();
1167
1168      // An HFileLink path should be like
1169      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
1170      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same
1171      // folder structure.
1172      boolean success = sidelineFile(fs, hbaseRoot, path);
1173
1174      if (!success) {
1175        LOG.error("Failed to sideline HFileLink file " + path);
1176      }
1177
1178      // An HFileLink backreference path should be like
1179      // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
1180      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same
1181      // folder structure.
1182      Path backRefPath = FileLink.getBackReferencesDir(
1183        HFileArchiveUtil.getStoreArchivePath(conf,
1184          HFileLink.getReferencedTableName(path.getName().toString()),
1185          HFileLink.getReferencedRegionName(path.getName().toString()), path.getParent().getName()),
1186        HFileLink.getReferencedHFileName(path.getName().toString()));
1187      success = sidelineFile(fs, hbaseRoot, backRefPath);
1188
1189      if (!success) {
1190        LOG.error("Failed to sideline HFileLink backreference file " + path);
1191      }
1192    }
1193  }
1194
1195  private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
1196    URI uri = hbaseRoot.toUri().relativize(path.toUri());
1197    if (uri.isAbsolute()) return false;
1198    String relativePath = uri.getPath();
1199    Path rootDir = getSidelineDir();
1200    Path dst = new Path(rootDir, relativePath);
1201    boolean pathCreated = fs.mkdirs(dst.getParent());
1202    if (!pathCreated) {
1203      LOG.error("Failed to create path: " + dst.getParent());
1204      return false;
1205    }
1206    LOG.info("Trying to sideline file " + path + " to " + dst);
1207    return fs.rename(path, dst);
1208  }
1209
1210  /**
1211   * TODO -- need to add tests for this.
1212   */
1213  private void reportEmptyMetaCells() {
1214    errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: "
1215      + emptyRegionInfoQualifiers.size());
1216    if (details) {
1217      for (Result r : emptyRegionInfoQualifiers) {
1218        errors.print("  " + r);
1219      }
1220    }
1221  }
1222
1223  /**
1224   * TODO -- need to add tests for this.
1225   */
1226  private void reportTablesInFlux() {
1227    AtomicInteger numSkipped = new AtomicInteger(0);
1228    TableDescriptor[] allTables = getTables(numSkipped);
1229    errors.print("Number of Tables: " + allTables.length);
1230    if (details) {
1231      if (numSkipped.get() > 0) {
1232        errors.detail("Number of Tables in flux: " + numSkipped.get());
1233      }
1234      for (TableDescriptor td : allTables) {
1235        errors.detail("  Table: " + td.getTableName() + "\t" + (td.isReadOnly() ? "ro" : "rw")
1236          + "\t" + (td.isMetaRegion() ? "META" : "    ") + "\t" + " families: "
1237          + td.getColumnFamilyCount());
1238      }
1239    }
1240  }
1241
1242  public HbckErrorReporter getErrors() {
1243    return errors;
1244  }
1245
1246  /**
1247   * Populate hbi's from regionInfos loaded from file system.
1248   */
1249  private SortedMap<TableName, HbckTableInfo> loadHdfsRegionInfos()
1250    throws IOException, InterruptedException {
1251    tablesInfo.clear(); // regenerating the data
1252    // generate region split structure
1253    Collection<HbckRegionInfo> hbckRegionInfos = regionInfoMap.values();
1254
1255    // Parallelized read of .regioninfo files.
1256    List<WorkItemHdfsRegionInfo> hbis = new ArrayList<>(hbckRegionInfos.size());
1257    List<Future<Void>> hbiFutures;
1258
1259    for (HbckRegionInfo hbi : hbckRegionInfos) {
1260      WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1261      hbis.add(work);
1262    }
1263
1264    // Submit and wait for completion
1265    hbiFutures = executor.invokeAll(hbis);
1266
1267    for (int i = 0; i < hbiFutures.size(); i++) {
1268      WorkItemHdfsRegionInfo work = hbis.get(i);
1269      Future<Void> f = hbiFutures.get(i);
1270      try {
1271        f.get();
1272      } catch (ExecutionException e) {
1273        LOG.warn("Failed to read .regioninfo file for region " + work.hbi.getRegionNameAsString(),
1274          e.getCause());
1275      }
1276    }
1277
1278    Path hbaseRoot = CommonFSUtils.getRootDir(getConf());
1279    FileSystem fs = hbaseRoot.getFileSystem(getConf());
1280    // serialized table info gathering.
1281    for (HbckRegionInfo hbi : hbckRegionInfos) {
1282
1283      if (hbi.getHdfsHRI() == null) {
1284        // was an orphan
1285        continue;
1286      }
1287
1288      // get table name from hdfs, populate various HBaseFsck tables.
1289      TableName tableName = hbi.getTableName();
1290      if (tableName == null) {
1291        // There was an entry in hbase:meta not in the HDFS?
1292        LOG.warn("tableName was null for: " + hbi);
1293        continue;
1294      }
1295
1296      HbckTableInfo modTInfo = tablesInfo.get(tableName);
1297      if (modTInfo == null) {
1298        // only executed once per table.
1299        modTInfo = new HbckTableInfo(tableName, this);
1300        tablesInfo.put(tableName, modTInfo);
1301        try {
1302          TableDescriptor htd =
1303            FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1304          modTInfo.htds.add(htd);
1305        } catch (IOException ioe) {
1306          if (!orphanTableDirs.containsKey(tableName)) {
1307            LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1308            // should only report once for each table
1309            errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1310              "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1311            Set<String> columns = new HashSet<>();
1312            orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1313          }
1314        }
1315      }
1316      if (!hbi.isSkipChecks()) {
1317        modTInfo.addRegionInfo(hbi);
1318      }
1319    }
1320
1321    loadTableInfosForTablesWithNoRegion();
1322    errors.print("");
1323
1324    return tablesInfo;
1325  }
1326
1327  /**
1328   * To get the column family list according to the column family dirs nn * @return a set of column
1329   * families n
1330   */
1331  private Set<String> getColumnFamilyList(Set<String> columns, HbckRegionInfo hbi)
1332    throws IOException {
1333    Path regionDir = hbi.getHdfsRegionDir();
1334    FileSystem fs = regionDir.getFileSystem(getConf());
1335    FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1336    for (FileStatus subdir : subDirs) {
1337      String columnfamily = subdir.getPath().getName();
1338      columns.add(columnfamily);
1339    }
1340    return columns;
1341  }
1342
1343  /**
1344   * To fabricate a .tableinfo file with following contents<br>
1345   * 1. the correct tablename <br>
1346   * 2. the correct colfamily list<br>
1347   * 3. the default properties for both {@link TableDescriptor} and
1348   * {@link ColumnFamilyDescriptor}<br>
1349   * n
1350   */
1351  private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1352    Set<String> columns) throws IOException {
1353    if (columns == null || columns.isEmpty()) return false;
1354    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
1355    for (String columnfamimly : columns) {
1356      builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(columnfamimly));
1357    }
1358    fstd.createTableDescriptor(builder.build(), true);
1359    return true;
1360  }
1361
1362  /**
1363   * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1364   * n
1365   */
1366  public void fixEmptyMetaCells() throws IOException {
1367    if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1368      LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1369      for (Result region : emptyRegionInfoQualifiers) {
1370        deleteMetaRegion(region.getRow());
1371        errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1372      }
1373      emptyRegionInfoQualifiers.clear();
1374    }
1375  }
1376
1377  /**
1378   * To fix orphan table by creating a .tableinfo file under tableDir <br>
1379   * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1380   * 2. else create a default .tableinfo file with following items<br>
1381   * &nbsp;2.1 the correct tablename <br>
1382   * &nbsp;2.2 the correct colfamily list<br>
1383   * &nbsp;2.3 the default properties for both {@link TableDescriptor} and
1384   * {@link ColumnFamilyDescriptor}<br>
1385   * n
1386   */
1387  public void fixOrphanTables() throws IOException {
1388    if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1389
1390      List<TableName> tmpList = new ArrayList<>(orphanTableDirs.keySet().size());
1391      tmpList.addAll(orphanTableDirs.keySet());
1392      TableDescriptor[] htds = getTableDescriptors(tmpList);
1393      Iterator<Entry<TableName, Set<String>>> iter = orphanTableDirs.entrySet().iterator();
1394      int j = 0;
1395      int numFailedCase = 0;
1396      FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1397      while (iter.hasNext()) {
1398        Entry<TableName, Set<String>> entry = iter.next();
1399        TableName tableName = entry.getKey();
1400        LOG.info("Trying to fix orphan table error: " + tableName);
1401        if (j < htds.length) {
1402          if (tableName.equals(htds[j].getTableName())) {
1403            TableDescriptor htd = htds[j];
1404            LOG.info("fixing orphan table: " + tableName + " from cache");
1405            fstd.createTableDescriptor(htd, true);
1406            j++;
1407            iter.remove();
1408          }
1409        } else {
1410          if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1411            LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1412            LOG.warn(
1413              "Strongly recommend to modify the TableDescriptor if necessary for: " + tableName);
1414            iter.remove();
1415          } else {
1416            LOG.error("Unable to create default .tableinfo for " + tableName
1417              + " while missing column family information");
1418            numFailedCase++;
1419          }
1420        }
1421        fixes++;
1422      }
1423
1424      if (orphanTableDirs.isEmpty()) {
1425        // all orphanTableDirs are luckily recovered
1426        // re-run doFsck after recovering the .tableinfo file
1427        setShouldRerun();
1428        LOG.warn(
1429          "Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1430      } else if (numFailedCase > 0) {
1431        LOG.error("Failed to fix " + numFailedCase + " OrphanTables with default .tableinfo files");
1432      }
1433
1434    }
1435    // cleanup the list
1436    orphanTableDirs.clear();
1437
1438  }
1439
1440  /**
1441   * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1442   */
1443  private void logParallelMerge() {
1444    if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1445      LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to"
1446        + " false to run serially.");
1447    } else {
1448      LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to"
1449        + " true to run in parallel.");
1450    }
1451  }
1452
1453  private SortedMap<TableName, HbckTableInfo> checkHdfsIntegrity(boolean fixHoles,
1454    boolean fixOverlaps) throws IOException {
1455    LOG.info("Checking HBase region split map from HDFS data...");
1456    logParallelMerge();
1457    for (HbckTableInfo tInfo : tablesInfo.values()) {
1458      TableIntegrityErrorHandler handler;
1459      if (fixHoles || fixOverlaps) {
1460        handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(), fixHoles, fixOverlaps);
1461      } else {
1462        handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1463      }
1464      if (!tInfo.checkRegionChain(handler)) {
1465        // should dump info as well.
1466        errors.report("Found inconsistency in table " + tInfo.getName());
1467      }
1468    }
1469    return tablesInfo;
1470  }
1471
1472  Path getSidelineDir() throws IOException {
1473    if (sidelineDir == null) {
1474      Path hbaseDir = CommonFSUtils.getRootDir(getConf());
1475      Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1476      sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-" + startMillis);
1477    }
1478    return sidelineDir;
1479  }
1480
1481  /**
1482   * Sideline a region dir (instead of deleting it)
1483   */
1484  Path sidelineRegionDir(FileSystem fs, HbckRegionInfo hi) throws IOException {
1485    return sidelineRegionDir(fs, null, hi);
1486  }
1487
1488  /**
1489   * Sideline a region dir (instead of deleting it)
1490   * @param parentDir if specified, the region will be sidelined to folder like
1491   *                  {@literal .../parentDir/<table name>/<region name>}. The purpose is to group
1492   *                  together similar regions sidelined, for example, those regions should be bulk
1493   *                  loaded back later on. If NULL, it is ignored.
1494   */
1495  Path sidelineRegionDir(FileSystem fs, String parentDir, HbckRegionInfo hi) throws IOException {
1496    TableName tableName = hi.getTableName();
1497    Path regionDir = hi.getHdfsRegionDir();
1498
1499    if (!fs.exists(regionDir)) {
1500      LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1501      return null;
1502    }
1503
1504    Path rootDir = getSidelineDir();
1505    if (parentDir != null) {
1506      rootDir = new Path(rootDir, parentDir);
1507    }
1508    Path sidelineTableDir = CommonFSUtils.getTableDir(rootDir, tableName);
1509    Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1510    fs.mkdirs(sidelineRegionDir);
1511    boolean success = false;
1512    FileStatus[] cfs = fs.listStatus(regionDir);
1513    if (cfs == null) {
1514      LOG.info("Region dir is empty: " + regionDir);
1515    } else {
1516      for (FileStatus cf : cfs) {
1517        Path src = cf.getPath();
1518        Path dst = new Path(sidelineRegionDir, src.getName());
1519        if (fs.isFile(src)) {
1520          // simple file
1521          success = fs.rename(src, dst);
1522          if (!success) {
1523            String msg = "Unable to rename file " + src + " to " + dst;
1524            LOG.error(msg);
1525            throw new IOException(msg);
1526          }
1527          continue;
1528        }
1529
1530        // is a directory.
1531        fs.mkdirs(dst);
1532
1533        LOG.info("Sidelining files from " + src + " into containing region " + dst);
1534        // FileSystem.rename is inconsistent with directories -- if the
1535        // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1536        // it moves the src into the dst dir resulting in (foo/a/b). If
1537        // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1538        FileStatus[] hfiles = fs.listStatus(src);
1539        if (hfiles != null && hfiles.length > 0) {
1540          for (FileStatus hfile : hfiles) {
1541            success = fs.rename(hfile.getPath(), dst);
1542            if (!success) {
1543              String msg = "Unable to rename file " + src + " to " + dst;
1544              LOG.error(msg);
1545              throw new IOException(msg);
1546            }
1547          }
1548        }
1549        LOG.debug("Sideline directory contents:");
1550        debugLsr(sidelineRegionDir);
1551      }
1552    }
1553
1554    LOG.info("Removing old region dir: " + regionDir);
1555    success = fs.delete(regionDir, true);
1556    if (!success) {
1557      String msg = "Unable to delete dir " + regionDir;
1558      LOG.error(msg);
1559      throw new IOException(msg);
1560    }
1561    return sidelineRegionDir;
1562  }
1563
1564  /**
1565   * Load the list of disabled tables in ZK into local set. nn
1566   */
1567  private void loadTableStates() throws IOException {
1568    tableStates = MetaTableAccessor.getTableStates(connection);
1569    // Add hbase:meta so this tool keeps working. In hbase2, meta is always enabled though it
1570    // has no entry in the table states. HBCK doesn't work right w/ hbase2 but just do this in
1571    // meantime.
1572    this.tableStates.put(TableName.META_TABLE_NAME,
1573      new TableState(TableName.META_TABLE_NAME, TableState.State.ENABLED));
1574  }
1575
1576  /**
1577   * Check if the specified region's table is disabled.
1578   * @param tableName table to check status of
1579   */
1580  boolean isTableDisabled(TableName tableName) {
1581    return tableStates.containsKey(tableName)
1582      && tableStates.get(tableName).inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1583  }
1584
1585  /**
1586   * Scan HDFS for all regions, recording their information into regionInfoMap
1587   */
1588  public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1589    Path rootDir = CommonFSUtils.getRootDir(getConf());
1590    FileSystem fs = rootDir.getFileSystem(getConf());
1591
1592    // list all tables from HDFS
1593    List<FileStatus> tableDirs = Lists.newArrayList();
1594
1595    boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1596
1597    List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1598    for (Path path : paths) {
1599      TableName tableName = CommonFSUtils.getTableName(path);
1600      if (
1601        (!checkMetaOnly && isTableIncluded(tableName))
1602          || tableName.equals(TableName.META_TABLE_NAME)
1603      ) {
1604        tableDirs.add(fs.getFileStatus(path));
1605      }
1606    }
1607
1608    // verify that version file exists
1609    if (!foundVersionFile) {
1610      errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1611        "Version file does not exist in root dir " + rootDir);
1612      if (shouldFixVersionFile()) {
1613        LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME + " file.");
1614        setShouldRerun();
1615        FSUtils.setVersion(fs, rootDir,
1616          getConf().getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000),
1617          getConf().getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1618            HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1619      }
1620    }
1621
1622    // Avoid multithreading at table-level because already multithreaded internally at
1623    // region-level. Additionally multithreading at table-level can lead to deadlock
1624    // if there are many tables in the cluster. Since there are a limited # of threads
1625    // in the executor's thread pool and if we multithread at the table-level by putting
1626    // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1627    // executor tied up solely in waiting for the tables' region-level calls to complete.
1628    // If there are enough tables then there will be no actual threads in the pool left
1629    // for the region-level callables to be serviced.
1630    for (FileStatus tableDir : tableDirs) {
1631      LOG.debug("Loading region dirs from " + tableDir.getPath());
1632      WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1633      try {
1634        item.call();
1635      } catch (ExecutionException e) {
1636        LOG.warn("Could not completely load table dir " + tableDir.getPath(), e.getCause());
1637      }
1638    }
1639    errors.print("");
1640  }
1641
1642  /**
1643   * Record the location of the hbase:meta region as found in ZooKeeper.
1644   */
1645  private boolean recordMetaRegion() throws IOException {
1646    RegionLocations rl =
1647      connection.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, false, false);
1648    if (rl == null) {
1649      errors.reportError(ERROR_CODE.NULL_META_REGION, "META region was not found in ZooKeeper");
1650      return false;
1651    }
1652    for (HRegionLocation metaLocation : rl.getRegionLocations()) {
1653      // Check if Meta region is valid and existing
1654      if (metaLocation == null) {
1655        errors.reportError(ERROR_CODE.NULL_META_REGION, "META region location is null");
1656        return false;
1657      }
1658      if (metaLocation.getRegionInfo() == null) {
1659        errors.reportError(ERROR_CODE.NULL_META_REGION, "META location regionInfo is null");
1660        return false;
1661      }
1662      if (metaLocation.getHostname() == null) {
1663        errors.reportError(ERROR_CODE.NULL_META_REGION, "META location hostName is null");
1664        return false;
1665      }
1666      ServerName sn = metaLocation.getServerName();
1667      HbckRegionInfo.MetaEntry m = new HbckRegionInfo.MetaEntry(metaLocation.getRegion(), sn,
1668        EnvironmentEdgeManager.currentTime());
1669      HbckRegionInfo hbckRegionInfo = regionInfoMap.get(metaLocation.getRegion().getEncodedName());
1670      if (hbckRegionInfo == null) {
1671        regionInfoMap.put(metaLocation.getRegion().getEncodedName(), new HbckRegionInfo(m));
1672      } else {
1673        hbckRegionInfo.setMetaEntry(m);
1674      }
1675    }
1676    return true;
1677  }
1678
1679  private ZKWatcher createZooKeeperWatcher() throws IOException {
1680    return new ZKWatcher(getConf(), "hbase Fsck", new Abortable() {
1681      @Override
1682      public void abort(String why, Throwable e) {
1683        LOG.error(why, e);
1684        System.exit(1);
1685      }
1686
1687      @Override
1688      public boolean isAborted() {
1689        return false;
1690      }
1691
1692    });
1693  }
1694
1695  /**
1696   * Contacts each regionserver and fetches metadata about regions.
1697   * @param regionServerList - the list of region servers to connect to
1698   * @throws IOException if a remote or network exception occurs
1699   */
1700  void processRegionServers(Collection<ServerName> regionServerList)
1701    throws IOException, InterruptedException {
1702
1703    List<WorkItemRegion> workItems = new ArrayList<>(regionServerList.size());
1704    List<Future<Void>> workFutures;
1705
1706    // loop to contact each region server in parallel
1707    for (ServerName rsinfo : regionServerList) {
1708      workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1709    }
1710
1711    workFutures = executor.invokeAll(workItems);
1712
1713    for (int i = 0; i < workFutures.size(); i++) {
1714      WorkItemRegion item = workItems.get(i);
1715      Future<Void> f = workFutures.get(i);
1716      try {
1717        f.get();
1718      } catch (ExecutionException e) {
1719        LOG.warn("Could not process regionserver {}", item.rsinfo.getAddress(), e.getCause());
1720      }
1721    }
1722  }
1723
1724  /**
1725   * Check consistency of all regions that have been found in previous phases.
1726   */
1727  private void checkAndFixConsistency() throws IOException, KeeperException, InterruptedException {
1728    // Divide the checks in two phases. One for default/primary replicas and another
1729    // for the non-primary ones. Keeps code cleaner this way.
1730
1731    List<CheckRegionConsistencyWorkItem> workItems = new ArrayList<>(regionInfoMap.size());
1732    for (java.util.Map.Entry<String, HbckRegionInfo> e : regionInfoMap.entrySet()) {
1733      if (e.getValue().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1734        workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1735      }
1736    }
1737    checkRegionConsistencyConcurrently(workItems);
1738
1739    boolean prevHdfsCheck = shouldCheckHdfs();
1740    setCheckHdfs(false); // replicas don't have any hdfs data
1741    // Run a pass over the replicas and fix any assignment issues that exist on the currently
1742    // deployed/undeployed replicas.
1743    List<CheckRegionConsistencyWorkItem> replicaWorkItems = new ArrayList<>(regionInfoMap.size());
1744    for (java.util.Map.Entry<String, HbckRegionInfo> e : regionInfoMap.entrySet()) {
1745      if (e.getValue().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
1746        replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1747      }
1748    }
1749    checkRegionConsistencyConcurrently(replicaWorkItems);
1750    setCheckHdfs(prevHdfsCheck);
1751
1752    // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
1753    // not get accurate state of the hbase if continuing. The config here allows users to tune
1754    // the tolerance of number of skipped region.
1755    // TODO: evaluate the consequence to continue the hbck operation without config.
1756    int terminateThreshold = getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
1757    int numOfSkippedRegions = skippedRegions.size();
1758    if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
1759      throw new IOException(
1760        numOfSkippedRegions + " region(s) could not be checked or repaired.  See logs for detail.");
1761    }
1762
1763    if (shouldCheckHdfs()) {
1764      checkAndFixTableStates();
1765    }
1766  }
1767
1768  /**
1769   * Check consistency of all regions using multiple threads concurrently.
1770   */
1771  private void
1772    checkRegionConsistencyConcurrently(final List<CheckRegionConsistencyWorkItem> workItems)
1773      throws IOException, KeeperException, InterruptedException {
1774    if (workItems.isEmpty()) {
1775      return; // nothing to check
1776    }
1777
1778    List<Future<Void>> workFutures = executor.invokeAll(workItems);
1779    for (Future<Void> f : workFutures) {
1780      try {
1781        f.get();
1782      } catch (ExecutionException e1) {
1783        LOG.warn("Could not check region consistency ", e1.getCause());
1784        if (e1.getCause() instanceof IOException) {
1785          throw (IOException) e1.getCause();
1786        } else if (e1.getCause() instanceof KeeperException) {
1787          throw (KeeperException) e1.getCause();
1788        } else if (e1.getCause() instanceof InterruptedException) {
1789          throw (InterruptedException) e1.getCause();
1790        } else {
1791          throw new IOException(e1.getCause());
1792        }
1793      }
1794    }
1795  }
1796
1797  class CheckRegionConsistencyWorkItem implements Callable<Void> {
1798    private final String key;
1799    private final HbckRegionInfo hbi;
1800
1801    CheckRegionConsistencyWorkItem(String key, HbckRegionInfo hbi) {
1802      this.key = key;
1803      this.hbi = hbi;
1804    }
1805
1806    @Override
1807    public synchronized Void call() throws Exception {
1808      try {
1809        checkRegionConsistency(key, hbi);
1810      } catch (Exception e) {
1811        // If the region is non-META region, skip this region and send warning/error message; if
1812        // the region is META region, we should not continue.
1813        LOG.warn(
1814          "Unable to complete check or repair the region '" + hbi.getRegionNameAsString() + "'.",
1815          e);
1816        if (hbi.getHdfsHRI().isMetaRegion()) {
1817          throw e;
1818        }
1819        LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
1820        addSkippedRegion(hbi);
1821      }
1822      return null;
1823    }
1824  }
1825
1826  private void addSkippedRegion(final HbckRegionInfo hbi) {
1827    Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
1828    if (skippedRegionNames == null) {
1829      skippedRegionNames = new HashSet<>();
1830    }
1831    skippedRegionNames.add(hbi.getRegionNameAsString());
1832    skippedRegions.put(hbi.getTableName(), skippedRegionNames);
1833  }
1834
1835  /**
1836   * Check and fix table states, assumes full info available: - tableInfos - empty tables loaded
1837   */
1838  private void checkAndFixTableStates() throws IOException {
1839    // first check dangling states
1840    for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
1841      TableName tableName = entry.getKey();
1842      TableState tableState = entry.getValue();
1843      HbckTableInfo tableInfo = tablesInfo.get(tableName);
1844      if (isTableIncluded(tableName) && !tableName.isSystemTable() && tableInfo == null) {
1845        if (fixMeta) {
1846          MetaTableAccessor.deleteTableState(connection, tableName);
1847          TableState state = MetaTableAccessor.getTableState(connection, tableName);
1848          if (state != null) {
1849            errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1850              tableName + " unable to delete dangling table state " + tableState);
1851          }
1852        } else if (!checkMetaOnly) {
1853          // dangling table state in meta if checkMetaOnly is false. If checkMetaOnly is
1854          // true, tableInfo will be null as tablesInfo are not polulated for all tables from hdfs
1855          errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1856            tableName + " has dangling table state " + tableState);
1857        }
1858      }
1859    }
1860    // check that all tables have states
1861    for (TableName tableName : tablesInfo.keySet()) {
1862      if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
1863        if (fixMeta) {
1864          MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
1865          TableState newState = MetaTableAccessor.getTableState(connection, tableName);
1866          if (newState == null) {
1867            errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1868              "Unable to change state for table " + tableName + " in meta ");
1869          }
1870        } else {
1871          errors.reportError(ERROR_CODE.NO_TABLE_STATE, tableName + " has no state in meta ");
1872        }
1873      }
1874    }
1875  }
1876
1877  private void preCheckPermission() throws IOException {
1878    if (shouldIgnorePreCheckPermission()) {
1879      return;
1880    }
1881
1882    Path hbaseDir = CommonFSUtils.getRootDir(getConf());
1883    FileSystem fs = hbaseDir.getFileSystem(getConf());
1884    UserProvider userProvider = UserProvider.instantiate(getConf());
1885    UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1886    FileStatus[] files = fs.listStatus(hbaseDir);
1887    for (FileStatus file : files) {
1888      try {
1889        fs.access(file.getPath(), FsAction.WRITE);
1890      } catch (AccessControlException ace) {
1891        LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1892        errors.reportError(ERROR_CODE.WRONG_USAGE,
1893          "Current user " + ugi.getUserName() + " does not have write perms to " + file.getPath()
1894            + ". Please rerun hbck as hdfs user " + file.getOwner());
1895        throw ace;
1896      }
1897    }
1898  }
1899
1900  /**
1901   * Deletes region from meta table
1902   */
1903  private void deleteMetaRegion(HbckRegionInfo hi) throws IOException {
1904    deleteMetaRegion(hi.getMetaEntry().getRegionName());
1905  }
1906
1907  /**
1908   * Deletes region from meta table
1909   */
1910  private void deleteMetaRegion(byte[] metaKey) throws IOException {
1911    Delete d = new Delete(metaKey);
1912    meta.delete(d);
1913    LOG.info("Deleted " + Bytes.toString(metaKey) + " from META");
1914  }
1915
1916  /**
1917   * Reset the split parent region info in meta table
1918   */
1919  private void resetSplitParent(HbckRegionInfo hi) throws IOException {
1920    RowMutations mutations = new RowMutations(hi.getMetaEntry().getRegionName());
1921    Delete d = new Delete(hi.getMetaEntry().getRegionName());
1922    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1923    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1924    mutations.add(d);
1925
1926    RegionInfo hri =
1927      RegionInfoBuilder.newBuilder(hi.getMetaEntry()).setOffline(false).setSplit(false).build();
1928    Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime());
1929    mutations.add(p);
1930
1931    meta.mutateRow(mutations);
1932    LOG.info("Reset split parent " + hi.getMetaEntry().getRegionNameAsString() + " in META");
1933  }
1934
1935  /**
1936   * This backwards-compatibility wrapper for permanently offlining a region that should not be
1937   * alive. If the region server does not support the "offline" method, it will use the closest
1938   * unassign method instead. This will basically work until one attempts to disable or delete the
1939   * affected table. The problem has to do with in-memory only master state, so restarting the
1940   * HMaster or failing over to another should fix this.
1941   */
1942  void offline(byte[] regionName) throws IOException {
1943    String regionString = Bytes.toStringBinary(regionName);
1944    if (!rsSupportsOffline) {
1945      LOG.warn("Using unassign region " + regionString
1946        + " instead of using offline method, you should" + " restart HMaster after these repairs");
1947      admin.unassign(regionName, true);
1948      return;
1949    }
1950
1951    // first time we assume the rs's supports #offline.
1952    try {
1953      LOG.info("Offlining region " + regionString);
1954      admin.offline(regionName);
1955    } catch (IOException ioe) {
1956      String notFoundMsg =
1957        "java.lang.NoSuchMethodException: " + "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1958      if (ioe.getMessage().contains(notFoundMsg)) {
1959        LOG.warn(
1960          "Using unassign region " + regionString + " instead of using offline method, you should"
1961            + " restart HMaster after these repairs");
1962        rsSupportsOffline = false; // in the future just use unassign
1963        admin.unassign(regionName, true);
1964        return;
1965      }
1966      throw ioe;
1967    }
1968  }
1969
1970  /**
1971   * Attempts to undeploy a region from a region server based in information in META. Any operations
1972   * that modify the file system should make sure that its corresponding region is not deployed to
1973   * prevent data races. A separate call is required to update the master in-memory region state
1974   * kept in the AssignementManager. Because disable uses this state instead of that found in META,
1975   * we can't seem to cleanly disable/delete tables that have been hbck fixed. When used on a
1976   * version of HBase that does not have the offline ipc call exposed on the master (&lt;0.90.5,
1977   * &lt;0.92.0) a master restart or failover may be required.
1978   */
1979  void closeRegion(HbckRegionInfo hi) throws IOException, InterruptedException {
1980    if (hi.getMetaEntry() == null && hi.getHdfsEntry() == null) {
1981      undeployRegions(hi);
1982      return;
1983    }
1984
1985    // get assignment info and hregioninfo from meta.
1986    Get get = new Get(hi.getRegionName());
1987    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1988    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1989    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1990    // also get the locations of the replicas to close if the primary region is being closed
1991    if (hi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1992      int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
1993      for (int i = 0; i < numReplicas; i++) {
1994        get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
1995        get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
1996      }
1997    }
1998    Result r = meta.get(get);
1999    RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2000    if (rl == null) {
2001      LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2002        + " since meta does not have handle to reach it");
2003      return;
2004    }
2005    for (HRegionLocation h : rl.getRegionLocations()) {
2006      ServerName serverName = h.getServerName();
2007      if (serverName == null) {
2008        errors.reportError("Unable to close region " + hi.getRegionNameAsString()
2009          + " because meta does not " + "have handle to reach it.");
2010        continue;
2011      }
2012      RegionInfo hri = h.getRegionInfo();
2013      if (hri == null) {
2014        LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2015          + " because hbase:meta had invalid or missing " + HConstants.CATALOG_FAMILY_STR + ":"
2016          + Bytes.toString(HConstants.REGIONINFO_QUALIFIER) + " qualifier value.");
2017        continue;
2018      }
2019      // close the region -- close files and remove assignment
2020      HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2021    }
2022  }
2023
2024  private void undeployRegions(HbckRegionInfo hi) throws IOException, InterruptedException {
2025    undeployRegionsForHbi(hi);
2026    // undeploy replicas of the region (but only if the method is invoked for the primary)
2027    if (hi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2028      return;
2029    }
2030    int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2031    for (int i = 1; i < numReplicas; i++) {
2032      if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2033      RegionInfo hri =
2034        RegionReplicaUtil.getRegionInfoForReplica(hi.getPrimaryHRIForDeployedReplica(), i);
2035      HbckRegionInfo h = regionInfoMap.get(hri.getEncodedName());
2036      if (h != null) {
2037        undeployRegionsForHbi(h);
2038        // set skip checks; we undeployed it, and we don't want to evaluate this anymore
2039        // in consistency checks
2040        h.setSkipChecks(true);
2041      }
2042    }
2043  }
2044
2045  private void undeployRegionsForHbi(HbckRegionInfo hi) throws IOException, InterruptedException {
2046    for (HbckRegionInfo.OnlineEntry rse : hi.getOnlineEntries()) {
2047      LOG.debug("Undeploy region " + rse.getRegionInfo() + " from " + rse.getServerName());
2048      try {
2049        HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.getServerName(),
2050          rse.getRegionInfo());
2051        offline(rse.getRegionInfo().getRegionName());
2052      } catch (IOException ioe) {
2053        LOG.warn("Got exception when attempting to offline region "
2054          + Bytes.toString(rse.getRegionInfo().getRegionName()), ioe);
2055      }
2056    }
2057  }
2058
2059  private void tryAssignmentRepair(HbckRegionInfo hbi, String msg)
2060    throws IOException, KeeperException, InterruptedException {
2061    // If we are trying to fix the errors
2062    if (shouldFixAssignments()) {
2063      errors.print(msg);
2064      undeployRegions(hbi);
2065      setShouldRerun();
2066      RegionInfo hri = hbi.getHdfsHRI();
2067      if (hri == null) {
2068        hri = hbi.getMetaEntry();
2069      }
2070      HBaseFsckRepair.fixUnassigned(admin, hri);
2071      HBaseFsckRepair.waitUntilAssigned(admin, hri);
2072
2073      // also assign replicas if needed (do it only when this call operates on a primary replica)
2074      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) return;
2075      int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();
2076      for (int i = 1; i < replicationCount; i++) {
2077        hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2078        HbckRegionInfo h = regionInfoMap.get(hri.getEncodedName());
2079        if (h != null) {
2080          undeployRegions(h);
2081          // set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2082          // in consistency checks
2083          h.setSkipChecks(true);
2084        }
2085        HBaseFsckRepair.fixUnassigned(admin, hri);
2086        HBaseFsckRepair.waitUntilAssigned(admin, hri);
2087      }
2088
2089    }
2090  }
2091
2092  /**
2093   * Check a single region for consistency and correct deployment.
2094   */
2095  private void checkRegionConsistency(final String key, final HbckRegionInfo hbi)
2096    throws IOException, KeeperException, InterruptedException {
2097
2098    if (hbi.isSkipChecks()) return;
2099    String descriptiveName = hbi.toString();
2100    boolean inMeta = hbi.getMetaEntry() != null;
2101    // In case not checking HDFS, assume the region is on HDFS
2102    boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2103    boolean hasMetaAssignment = inMeta && hbi.getMetaEntry().regionServer != null;
2104    boolean isDeployed = !hbi.getDeployedOn().isEmpty();
2105    boolean isMultiplyDeployed = hbi.getDeployedOn().size() > 1;
2106    boolean deploymentMatchesMeta = hasMetaAssignment && isDeployed && !isMultiplyDeployed
2107      && hbi.getMetaEntry().regionServer.equals(hbi.getDeployedOn().get(0));
2108    boolean splitParent = inMeta && hbi.getMetaEntry().isSplit() && hbi.getMetaEntry().isOffline();
2109    boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.getMetaEntry().getTable());
2110    boolean recentlyModified =
2111      inHdfs && hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2112
2113    // ========== First the healthy cases =============
2114    if (hbi.containsOnlyHdfsEdits()) {
2115      return;
2116    }
2117    if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2118      return;
2119    } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2120      LOG.info("Region " + descriptiveName + " is in META, and in a disabled "
2121        + "tabled that is not deployed");
2122      return;
2123    } else if (recentlyModified) {
2124      LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2125      return;
2126    }
2127    // ========== Cases where the region is not in hbase:meta =============
2128    else if (!inMeta && !inHdfs && !isDeployed) {
2129      // We shouldn't have record of this region at all then!
2130      assert false : "Entry for region with no data";
2131    } else if (!inMeta && !inHdfs && isDeployed) {
2132      errors.reportError(ERROR_CODE.NOT_IN_META_HDFS,
2133        "Region " + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but "
2134          + "deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2135      if (shouldFixAssignments()) {
2136        undeployRegions(hbi);
2137      }
2138
2139    } else if (!inMeta && inHdfs && !isDeployed) {
2140      if (hbi.isMerged()) {
2141        // This region has already been merged, the remaining hdfs file will be
2142        // cleaned by CatalogJanitor later
2143        hbi.setSkipChecks(true);
2144        LOG.info("Region " + descriptiveName
2145          + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2146        return;
2147      }
2148      errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region " + descriptiveName
2149        + " on HDFS, but not listed in hbase:meta " + "or deployed on any region server");
2150      // restore region consistency of an adopted orphan
2151      if (shouldFixMeta()) {
2152        if (!hbi.isHdfsRegioninfoPresent()) {
2153          LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2154            + " in table integrity repair phase if -fixHdfsOrphans was" + " used.");
2155          return;
2156        }
2157
2158        RegionInfo hri = hbi.getHdfsHRI();
2159        HbckTableInfo tableInfo = tablesInfo.get(hri.getTable());
2160
2161        for (RegionInfo region : tableInfo.getRegionsFromMeta(this.regionInfoMap)) {
2162          if (
2163            Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2164              && (region.getEndKey().length == 0
2165                || Bytes.compareTo(region.getEndKey(), hri.getEndKey()) >= 0)
2166              && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0
2167          ) {
2168            if (region.isSplit() || region.isOffline()) continue;
2169            Path regionDir = hbi.getHdfsRegionDir();
2170            FileSystem fs = regionDir.getFileSystem(getConf());
2171            List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2172            for (Path familyDir : familyDirs) {
2173              List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2174              for (Path referenceFilePath : referenceFilePaths) {
2175                Path parentRegionDir =
2176                  StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2177                if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2178                  LOG.warn(hri + " start and stop keys are in the range of " + region
2179                    + ". The region might not be cleaned up from hdfs when region " + region
2180                    + " split failed. Hence deleting from hdfs.");
2181                  HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs, regionDir.getParent(),
2182                    hri);
2183                  return;
2184                }
2185              }
2186            }
2187          }
2188        }
2189        LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2190        int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2191        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2192          admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet(),
2193          numReplicas);
2194
2195        tryAssignmentRepair(hbi, "Trying to reassign region...");
2196      }
2197
2198    } else if (!inMeta && inHdfs && isDeployed) {
2199      errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2200        + " not in META, but deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2201      debugLsr(hbi.getHdfsRegionDir());
2202      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2203        // for replicas, this means that we should undeploy the region (we would have
2204        // gone over the primaries and fixed meta holes in first phase under
2205        // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2206        // this stage unless unwanted replica)
2207        if (shouldFixAssignments()) {
2208          undeployRegionsForHbi(hbi);
2209        }
2210      }
2211      if (shouldFixMeta() && hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2212        if (!hbi.isHdfsRegioninfoPresent()) {
2213          LOG.error("This should have been repaired in table integrity repair phase");
2214          return;
2215        }
2216
2217        LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2218        int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2219        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2220          admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet(),
2221          numReplicas);
2222        tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2223      }
2224
2225      // ========== Cases where the region is in hbase:meta =============
2226    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2227      // check whether this is an actual error, or just transient state where parent
2228      // is not cleaned
2229      if (hbi.getMetaEntry().splitA != null && hbi.getMetaEntry().splitB != null) {
2230        // check that split daughters are there
2231        HbckRegionInfo infoA = this.regionInfoMap.get(hbi.getMetaEntry().splitA.getEncodedName());
2232        HbckRegionInfo infoB = this.regionInfoMap.get(hbi.getMetaEntry().splitB.getEncodedName());
2233        if (infoA != null && infoB != null) {
2234          // we already processed or will process daughters. Move on, nothing to see here.
2235          hbi.setSkipChecks(true);
2236          return;
2237        }
2238      }
2239
2240      // For Replica region, we need to do a similar check. If replica is not split successfully,
2241      // error is going to be reported against primary daughter region.
2242      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2243        LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, "
2244          + "and not deployed on any region server. This may be transient.");
2245        hbi.setSkipChecks(true);
2246        return;
2247      }
2248
2249      errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT,
2250        "Region " + descriptiveName + " is a split parent in META, in HDFS, "
2251          + "and not deployed on any region server. This could be transient, "
2252          + "consider to run the catalog janitor first!");
2253      if (shouldFixSplitParents()) {
2254        setShouldRerun();
2255        resetSplitParent(hbi);
2256      }
2257    } else if (inMeta && !inHdfs && !isDeployed) {
2258      errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region " + descriptiveName
2259        + " found in META, but not in HDFS " + "or deployed on any region server.");
2260      if (shouldFixMeta()) {
2261        deleteMetaRegion(hbi);
2262      }
2263    } else if (inMeta && !inHdfs && isDeployed) {
2264      errors.reportError(ERROR_CODE.NOT_IN_HDFS,
2265        "Region " + descriptiveName + " found in META, but not in HDFS, " + "and deployed on "
2266          + Joiner.on(", ").join(hbi.getDeployedOn()));
2267      // We treat HDFS as ground truth. Any information in meta is transient
2268      // and equivalent data can be regenerated. So, lets unassign and remove
2269      // these problems from META.
2270      if (shouldFixAssignments()) {
2271        errors.print("Trying to fix unassigned region...");
2272        undeployRegions(hbi);
2273      }
2274      if (shouldFixMeta()) {
2275        // wait for it to complete
2276        deleteMetaRegion(hbi);
2277      }
2278    } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2279      errors.reportError(ERROR_CODE.NOT_DEPLOYED,
2280        "Region " + descriptiveName + " not deployed on any region server.");
2281      tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2282    } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2283      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2284        "Region " + descriptiveName + " should not be deployed according "
2285          + "to META, but is deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2286      if (shouldFixAssignments()) {
2287        errors.print("Trying to close the region " + descriptiveName);
2288        setShouldRerun();
2289        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry(), hbi.getDeployedOn());
2290      }
2291    } else if (inMeta && inHdfs && isMultiplyDeployed) {
2292      errors.reportError(ERROR_CODE.MULTI_DEPLOYED,
2293        "Region " + descriptiveName + " is listed in hbase:meta on region server "
2294          + hbi.getMetaEntry().regionServer + " but is multiply assigned to region servers "
2295          + Joiner.on(", ").join(hbi.getDeployedOn()));
2296      // If we are trying to fix the errors
2297      if (shouldFixAssignments()) {
2298        errors.print("Trying to fix assignment error...");
2299        setShouldRerun();
2300        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry(), hbi.getDeployedOn());
2301      }
2302    } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2303      errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META,
2304        "Region " + descriptiveName + " listed in hbase:meta on region server "
2305          + hbi.getMetaEntry().regionServer + " but found on region server "
2306          + hbi.getDeployedOn().get(0));
2307      // If we are trying to fix the errors
2308      if (shouldFixAssignments()) {
2309        errors.print("Trying to fix assignment error...");
2310        setShouldRerun();
2311        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry(), hbi.getDeployedOn());
2312        HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2313      }
2314    } else {
2315      errors.reportError(ERROR_CODE.UNKNOWN,
2316        "Region " + descriptiveName + " is in an unforeseen state:" + " inMeta=" + inMeta
2317          + " inHdfs=" + inHdfs + " isDeployed=" + isDeployed + " isMultiplyDeployed="
2318          + isMultiplyDeployed + " deploymentMatchesMeta=" + deploymentMatchesMeta
2319          + " shouldBeDeployed=" + shouldBeDeployed);
2320    }
2321  }
2322
2323  /**
2324   * Checks tables integrity. Goes over all regions and scans the tables. Collects all the pieces
2325   * for each table and checks if there are missing, repeated or overlapping ones. n
2326   */
2327  SortedMap<TableName, HbckTableInfo> checkIntegrity() throws IOException {
2328    tablesInfo = new TreeMap<>();
2329    LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2330    for (HbckRegionInfo hbi : regionInfoMap.values()) {
2331      // Check only valid, working regions
2332      if (hbi.getMetaEntry() == null) {
2333        // this assumes that consistency check has run loadMetaEntry
2334        Path p = hbi.getHdfsRegionDir();
2335        if (p == null) {
2336          errors.report("No regioninfo in Meta or HDFS. " + hbi);
2337        }
2338
2339        // TODO test.
2340        continue;
2341      }
2342      if (hbi.getMetaEntry().regionServer == null) {
2343        errors.detail("Skipping region because no region server: " + hbi);
2344        continue;
2345      }
2346      if (hbi.getMetaEntry().isOffline()) {
2347        errors.detail("Skipping region because it is offline: " + hbi);
2348        continue;
2349      }
2350      if (hbi.containsOnlyHdfsEdits()) {
2351        errors.detail("Skipping region because it only contains edits" + hbi);
2352        continue;
2353      }
2354
2355      // Missing regionDir or over-deployment is checked elsewhere. Include
2356      // these cases in modTInfo, so we can evaluate those regions as part of
2357      // the region chain in META
2358      // if (hbi.foundRegionDir == null) continue;
2359      // if (hbi.deployedOn.size() != 1) continue;
2360      if (hbi.getDeployedOn().isEmpty()) {
2361        continue;
2362      }
2363
2364      // We should be safe here
2365      TableName tableName = hbi.getMetaEntry().getTable();
2366      HbckTableInfo modTInfo = tablesInfo.get(tableName);
2367      if (modTInfo == null) {
2368        modTInfo = new HbckTableInfo(tableName, this);
2369      }
2370      for (ServerName server : hbi.getDeployedOn()) {
2371        modTInfo.addServer(server);
2372      }
2373
2374      if (!hbi.isSkipChecks()) {
2375        modTInfo.addRegionInfo(hbi);
2376      }
2377
2378      tablesInfo.put(tableName, modTInfo);
2379    }
2380
2381    loadTableInfosForTablesWithNoRegion();
2382
2383    logParallelMerge();
2384    for (HbckTableInfo tInfo : tablesInfo.values()) {
2385      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2386      if (!tInfo.checkRegionChain(handler)) {
2387        errors.report("Found inconsistency in table " + tInfo.getName());
2388      }
2389    }
2390    return tablesInfo;
2391  }
2392
2393  /**
2394   * Loads table info's for tables that may not have been included, since there are no regions
2395   * reported for the table, but table dir is there in hdfs
2396   */
2397  private void loadTableInfosForTablesWithNoRegion() throws IOException {
2398    Map<String, TableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2399    for (TableDescriptor htd : allTables.values()) {
2400      if (checkMetaOnly && !htd.isMetaTable()) {
2401        continue;
2402      }
2403
2404      TableName tableName = htd.getTableName();
2405      if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2406        HbckTableInfo tableInfo = new HbckTableInfo(tableName, this);
2407        tableInfo.htds.add(htd);
2408        tablesInfo.put(htd.getTableName(), tableInfo);
2409      }
2410    }
2411  }
2412
2413  /**
2414   * Merge hdfs data by moving from contained HbckRegionInfo into targetRegionDir.
2415   * @return number of file move fixes done to merge regions.
2416   */
2417  public int mergeRegionDirs(Path targetRegionDir, HbckRegionInfo contained) throws IOException {
2418    int fileMoves = 0;
2419    String thread = Thread.currentThread().getName();
2420    LOG.debug("[" + thread + "] Contained region dir after close and pause");
2421    debugLsr(contained.getHdfsRegionDir());
2422
2423    // rename the contained into the container.
2424    FileSystem fs = targetRegionDir.getFileSystem(getConf());
2425    FileStatus[] dirs = null;
2426    try {
2427      dirs = fs.listStatus(contained.getHdfsRegionDir());
2428    } catch (FileNotFoundException fnfe) {
2429      // region we are attempting to merge in is not present! Since this is a merge, there is
2430      // no harm skipping this region if it does not exist.
2431      if (!fs.exists(contained.getHdfsRegionDir())) {
2432        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2433          + " is missing. Assuming already sidelined or moved.");
2434      } else {
2435        sidelineRegionDir(fs, contained);
2436      }
2437      return fileMoves;
2438    }
2439
2440    if (dirs == null) {
2441      if (!fs.exists(contained.getHdfsRegionDir())) {
2442        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2443          + " already sidelined.");
2444      } else {
2445        sidelineRegionDir(fs, contained);
2446      }
2447      return fileMoves;
2448    }
2449
2450    for (FileStatus cf : dirs) {
2451      Path src = cf.getPath();
2452      Path dst = new Path(targetRegionDir, src.getName());
2453
2454      if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2455        // do not copy the old .regioninfo file.
2456        continue;
2457      }
2458
2459      if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2460        // do not copy the .oldlogs files
2461        continue;
2462      }
2463
2464      LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2465      // FileSystem.rename is inconsistent with directories -- if the
2466      // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2467      // it moves the src into the dst dir resulting in (foo/a/b). If
2468      // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2469      for (FileStatus hfile : fs.listStatus(src)) {
2470        boolean success = fs.rename(hfile.getPath(), dst);
2471        if (success) {
2472          fileMoves++;
2473        }
2474      }
2475      LOG.debug("[" + thread + "] Sideline directory contents:");
2476      debugLsr(targetRegionDir);
2477    }
2478
2479    // if all success.
2480    sidelineRegionDir(fs, contained);
2481    LOG.info("[" + thread + "] Sidelined region dir " + contained.getHdfsRegionDir() + " into "
2482      + getSidelineDir());
2483    debugLsr(contained.getHdfsRegionDir());
2484
2485    return fileMoves;
2486  }
2487
2488  static class WorkItemOverlapMerge implements Callable<Void> {
2489    private TableIntegrityErrorHandler handler;
2490    Collection<HbckRegionInfo> overlapgroup;
2491
2492    WorkItemOverlapMerge(Collection<HbckRegionInfo> overlapgroup,
2493      TableIntegrityErrorHandler handler) {
2494      this.handler = handler;
2495      this.overlapgroup = overlapgroup;
2496    }
2497
2498    @Override
2499    public Void call() throws Exception {
2500      handler.handleOverlapGroup(overlapgroup);
2501      return null;
2502    }
2503  };
2504
2505  /**
2506   * Return a list of user-space table names whose metadata have not been modified in the last few
2507   * milliseconds specified by timelag if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER,
2508   * STARTCODE_QUALIFIER, SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
2509   * milliseconds specified by timelag, then the table is a candidate to be returned.
2510   * @return tables that have not been modified recently
2511   * @throws IOException if an error is encountered
2512   */
2513  TableDescriptor[] getTables(AtomicInteger numSkipped) {
2514    List<TableName> tableNames = new ArrayList<>();
2515    long now = EnvironmentEdgeManager.currentTime();
2516
2517    for (HbckRegionInfo hbi : regionInfoMap.values()) {
2518      HbckRegionInfo.MetaEntry info = hbi.getMetaEntry();
2519
2520      // if the start key is zero, then we have found the first region of a table.
2521      // pick only those tables that were not modified in the last few milliseconds.
2522      if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2523        if (info.modTime + timelag < now) {
2524          tableNames.add(info.getTable());
2525        } else {
2526          numSkipped.incrementAndGet(); // one more in-flux table
2527        }
2528      }
2529    }
2530    return getTableDescriptors(tableNames);
2531  }
2532
2533  TableDescriptor[] getTableDescriptors(List<TableName> tableNames) {
2534    LOG.info("getTableDescriptors == tableNames => " + tableNames);
2535    try (Connection conn = ConnectionFactory.createConnection(getConf());
2536      Admin admin = conn.getAdmin()) {
2537      List<TableDescriptor> tds = admin.listTableDescriptors(tableNames);
2538      return tds.toArray(new TableDescriptor[tds.size()]);
2539    } catch (IOException e) {
2540      LOG.debug("Exception getting table descriptors", e);
2541    }
2542    return new TableDescriptor[0];
2543  }
2544
2545  /**
2546   * Gets the entry in regionInfo corresponding to the the given encoded region name. If the region
2547   * has not been seen yet, a new entry is added and returned.
2548   */
2549  private synchronized HbckRegionInfo getOrCreateInfo(String name) {
2550    HbckRegionInfo hbi = regionInfoMap.get(name);
2551    if (hbi == null) {
2552      hbi = new HbckRegionInfo(null);
2553      regionInfoMap.put(name, hbi);
2554    }
2555    return hbi;
2556  }
2557
2558  private void checkAndFixReplication() throws ReplicationException {
2559    ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, errors);
2560    checker.checkUnDeletedQueues();
2561
2562    if (checker.hasUnDeletedQueues() && this.fixReplication) {
2563      checker.fixUnDeletedQueues();
2564      setShouldRerun();
2565    }
2566  }
2567
2568  /**
2569   * Check values in regionInfo for hbase:meta Check if zero or more than one regions with
2570   * hbase:meta are found. If there are inconsistencies (i.e. zero or more than one regions pretend
2571   * to be holding the hbase:meta) try to fix that and report an error.
2572   * @throws IOException from HBaseFsckRepair functions nn
2573   */
2574  boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
2575    Map<Integer, HbckRegionInfo> metaRegions = new HashMap<>();
2576    for (HbckRegionInfo value : regionInfoMap.values()) {
2577      if (value.getMetaEntry() != null && value.getMetaEntry().isMetaRegion()) {
2578        metaRegions.put(value.getReplicaId(), value);
2579      }
2580    }
2581    int metaReplication =
2582      admin.getTableDescriptor(TableName.META_TABLE_NAME).getRegionReplication();
2583    boolean noProblem = true;
2584    // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
2585    // Check the deployed servers. It should be exactly one server for each replica.
2586    for (int i = 0; i < metaReplication; i++) {
2587      HbckRegionInfo metaHbckRegionInfo = metaRegions.remove(i);
2588      List<ServerName> servers = new ArrayList<>();
2589      if (metaHbckRegionInfo != null) {
2590        servers = metaHbckRegionInfo.getDeployedOn();
2591      }
2592      if (servers.size() != 1) {
2593        noProblem = false;
2594        if (servers.isEmpty()) {
2595          assignMetaReplica(i);
2596        } else if (servers.size() > 1) {
2597          errors.reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId "
2598            + metaHbckRegionInfo.getReplicaId() + " is found on more than one region.");
2599          if (shouldFixAssignments()) {
2600            errors.print("Trying to fix a problem with hbase:meta, replicaId "
2601              + metaHbckRegionInfo.getReplicaId() + "..");
2602            setShouldRerun();
2603            // try fix it (treat is a dupe assignment)
2604            HBaseFsckRepair.fixMultiAssignment(connection, metaHbckRegionInfo.getMetaEntry(),
2605              servers);
2606          }
2607        }
2608      }
2609    }
2610    // unassign whatever is remaining in metaRegions. They are excess replicas.
2611    for (Map.Entry<Integer, HbckRegionInfo> entry : metaRegions.entrySet()) {
2612      noProblem = false;
2613      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2614        "hbase:meta replicas are deployed in excess. Configured " + metaReplication + ", deployed "
2615          + metaRegions.size());
2616      if (shouldFixAssignments()) {
2617        errors.print(
2618          "Trying to undeploy excess replica, replicaId: " + entry.getKey() + " of hbase:meta..");
2619        setShouldRerun();
2620        unassignMetaReplica(entry.getValue());
2621      }
2622    }
2623    // if noProblem is false, rerun hbck with hopefully fixed META
2624    // if noProblem is true, no errors, so continue normally
2625    return noProblem;
2626  }
2627
2628  private void unassignMetaReplica(HbckRegionInfo hi)
2629    throws IOException, InterruptedException, KeeperException {
2630    undeployRegions(hi);
2631    ZKUtil.deleteNode(zkw,
2632      zkw.getZNodePaths().getZNodeForReplica(hi.getMetaEntry().getReplicaId()));
2633  }
2634
2635  private void assignMetaReplica(int replicaId)
2636    throws IOException, KeeperException, InterruptedException {
2637    errors.reportError(ERROR_CODE.NO_META_REGION,
2638      "hbase:meta, replicaId " + replicaId + " is not found on any region.");
2639    if (shouldFixAssignments()) {
2640      errors.print("Trying to fix a problem with hbase:meta..");
2641      setShouldRerun();
2642      // try to fix it (treat it as unassigned region)
2643      RegionInfo h = RegionReplicaUtil
2644        .getRegionInfoForReplica(RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId);
2645      HBaseFsckRepair.fixUnassigned(admin, h);
2646      HBaseFsckRepair.waitUntilAssigned(admin, h);
2647    }
2648  }
2649
2650  /**
2651   * Scan hbase:meta, adding all regions found to the regionInfo map.
2652   * @throws IOException if an error is encountered
2653   */
2654  boolean loadMetaEntries() throws IOException {
2655    MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() {
2656      int countRecord = 1;
2657
2658      // comparator to sort KeyValues with latest modtime
2659      final Comparator<Cell> comp = new Comparator<Cell>() {
2660        @Override
2661        public int compare(Cell k1, Cell k2) {
2662          return Long.compare(k1.getTimestamp(), k2.getTimestamp());
2663        }
2664      };
2665
2666      @Override
2667      public boolean visit(Result result) throws IOException {
2668        try {
2669
2670          // record the latest modification of this META record
2671          long ts = Collections.max(result.listCells(), comp).getTimestamp();
2672          RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
2673          if (rl == null) {
2674            emptyRegionInfoQualifiers.add(result);
2675            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2676              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
2677            return true;
2678          }
2679          ServerName sn = null;
2680          if (
2681            rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID) == null
2682              || rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null
2683          ) {
2684            emptyRegionInfoQualifiers.add(result);
2685            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2686              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
2687            return true;
2688          }
2689          RegionInfo hri = rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegionInfo();
2690          if (!(isTableIncluded(hri.getTable()) || hri.isMetaRegion())) {
2691            return true;
2692          }
2693          PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(result);
2694          for (HRegionLocation h : rl.getRegionLocations()) {
2695            if (h == null || h.getRegionInfo() == null) {
2696              continue;
2697            }
2698            sn = h.getServerName();
2699            hri = h.getRegionInfo();
2700
2701            HbckRegionInfo.MetaEntry m = null;
2702            if (hri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2703              m = new HbckRegionInfo.MetaEntry(hri, sn, ts, daughters.getFirst(),
2704                daughters.getSecond());
2705            } else {
2706              m = new HbckRegionInfo.MetaEntry(hri, sn, ts, null, null);
2707            }
2708            HbckRegionInfo previous = regionInfoMap.get(hri.getEncodedName());
2709            if (previous == null) {
2710              regionInfoMap.put(hri.getEncodedName(), new HbckRegionInfo(m));
2711            } else if (previous.getMetaEntry() == null) {
2712              previous.setMetaEntry(m);
2713            } else {
2714              throw new IOException("Two entries in hbase:meta are same " + previous);
2715            }
2716          }
2717          List<RegionInfo> mergeParents = MetaTableAccessor.getMergeRegions(result.rawCells());
2718          if (mergeParents != null) {
2719            for (RegionInfo mergeRegion : mergeParents) {
2720              if (mergeRegion != null) {
2721                // This region is already being merged
2722                HbckRegionInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
2723                hbInfo.setMerged(true);
2724              }
2725            }
2726          }
2727
2728          // show proof of progress to the user, once for every 100 records.
2729          if (countRecord % 100 == 0) {
2730            errors.progress();
2731          }
2732          countRecord++;
2733          return true;
2734        } catch (RuntimeException e) {
2735          LOG.error("Result=" + result);
2736          throw e;
2737        }
2738      }
2739    };
2740    if (!checkMetaOnly) {
2741      // Scan hbase:meta to pick up user regions
2742      MetaTableAccessor.fullScanRegions(connection, visitor);
2743    }
2744
2745    errors.print("");
2746    return true;
2747  }
2748
2749  /**
2750   * Prints summary of all tables found on the system.
2751   */
2752  private void printTableSummary(SortedMap<TableName, HbckTableInfo> tablesInfo) {
2753    StringBuilder sb = new StringBuilder();
2754    int numOfSkippedRegions;
2755    errors.print("Summary:");
2756    for (HbckTableInfo tInfo : tablesInfo.values()) {
2757      numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName()))
2758        ? skippedRegions.get(tInfo.getName()).size()
2759        : 0;
2760
2761      if (errors.tableHasErrors(tInfo)) {
2762        errors.print("Table " + tInfo.getName() + " is inconsistent.");
2763      } else if (numOfSkippedRegions > 0) {
2764        errors.print("Table " + tInfo.getName() + " is okay (with " + numOfSkippedRegions
2765          + " skipped regions).");
2766      } else {
2767        errors.print("Table " + tInfo.getName() + " is okay.");
2768      }
2769      errors.print("    Number of regions: " + tInfo.getNumRegions());
2770      if (numOfSkippedRegions > 0) {
2771        Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
2772        System.out.println("    Number of skipped regions: " + numOfSkippedRegions);
2773        System.out.println("      List of skipped regions:");
2774        for (String sr : skippedRegionStrings) {
2775          System.out.println("        " + sr);
2776        }
2777      }
2778      sb.setLength(0); // clear out existing buffer, if any.
2779      sb.append("    Deployed on: ");
2780      for (ServerName server : tInfo.deployedOn) {
2781        sb.append(" " + server.toString());
2782      }
2783      errors.print(sb.toString());
2784    }
2785  }
2786
2787  static HbckErrorReporter getErrorReporter(final Configuration conf)
2788    throws ClassNotFoundException {
2789    Class<? extends HbckErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter",
2790      PrintingErrorReporter.class, HbckErrorReporter.class);
2791    return ReflectionUtils.newInstance(reporter, conf);
2792  }
2793
2794  static class PrintingErrorReporter implements HbckErrorReporter {
2795    public int errorCount = 0;
2796    private int showProgress;
2797    // How frequently calls to progress() will create output
2798    private static final int progressThreshold = 100;
2799
2800    Set<HbckTableInfo> errorTables = new HashSet<>();
2801
2802    // for use by unit tests to verify which errors were discovered
2803    private ArrayList<ERROR_CODE> errorList = new ArrayList<>();
2804
2805    @Override
2806    public void clear() {
2807      errorTables.clear();
2808      errorList.clear();
2809      errorCount = 0;
2810    }
2811
2812    @Override
2813    public synchronized void reportError(ERROR_CODE errorCode, String message) {
2814      if (errorCode == ERROR_CODE.WRONG_USAGE) {
2815        System.err.println(message);
2816        return;
2817      }
2818
2819      errorList.add(errorCode);
2820      if (!summary) {
2821        System.out.println("ERROR: " + message);
2822      }
2823      errorCount++;
2824      showProgress = 0;
2825    }
2826
2827    @Override
2828    public synchronized void reportError(ERROR_CODE errorCode, String message,
2829      HbckTableInfo table) {
2830      errorTables.add(table);
2831      reportError(errorCode, message);
2832    }
2833
2834    @Override
2835    public synchronized void reportError(ERROR_CODE errorCode, String message, HbckTableInfo table,
2836      HbckRegionInfo info) {
2837      errorTables.add(table);
2838      String reference = "(region " + info.getRegionNameAsString() + ")";
2839      reportError(errorCode, reference + " " + message);
2840    }
2841
2842    @Override
2843    public synchronized void reportError(ERROR_CODE errorCode, String message, HbckTableInfo table,
2844      HbckRegionInfo info1, HbckRegionInfo info2) {
2845      errorTables.add(table);
2846      String reference =
2847        "(regions " + info1.getRegionNameAsString() + " and " + info2.getRegionNameAsString() + ")";
2848      reportError(errorCode, reference + " " + message);
2849    }
2850
2851    @Override
2852    public synchronized void reportError(String message) {
2853      reportError(ERROR_CODE.UNKNOWN, message);
2854    }
2855
2856    /**
2857     * Report error information, but do not increment the error count. Intended for cases where the
2858     * actual error would have been reported previously. n
2859     */
2860    @Override
2861    public synchronized void report(String message) {
2862      if (!summary) {
2863        System.out.println("ERROR: " + message);
2864      }
2865      showProgress = 0;
2866    }
2867
2868    @Override
2869    public synchronized int summarize() {
2870      System.out.println(Integer.toString(errorCount) + " inconsistencies detected.");
2871      if (errorCount == 0) {
2872        System.out.println("Status: OK");
2873        return 0;
2874      } else {
2875        System.out.println("Status: INCONSISTENT");
2876        return -1;
2877      }
2878    }
2879
2880    @Override
2881    public ArrayList<ERROR_CODE> getErrorList() {
2882      return errorList;
2883    }
2884
2885    @Override
2886    public synchronized void print(String message) {
2887      if (!summary) {
2888        System.out.println(message);
2889      }
2890    }
2891
2892    @Override
2893    public boolean tableHasErrors(HbckTableInfo table) {
2894      return errorTables.contains(table);
2895    }
2896
2897    @Override
2898    public void resetErrors() {
2899      errorCount = 0;
2900    }
2901
2902    @Override
2903    public synchronized void detail(String message) {
2904      if (details) {
2905        System.out.println(message);
2906      }
2907      showProgress = 0;
2908    }
2909
2910    @Override
2911    public synchronized void progress() {
2912      if (showProgress++ == progressThreshold) {
2913        if (!summary) {
2914          System.out.print(".");
2915        }
2916        showProgress = 0;
2917      }
2918    }
2919  }
2920
2921  /**
2922   * Contact a region server and get all information from it
2923   */
2924  static class WorkItemRegion implements Callable<Void> {
2925    private final HBaseFsck hbck;
2926    private final ServerName rsinfo;
2927    private final HbckErrorReporter errors;
2928    private final ClusterConnection connection;
2929
2930    WorkItemRegion(HBaseFsck hbck, ServerName info, HbckErrorReporter errors,
2931      ClusterConnection connection) {
2932      this.hbck = hbck;
2933      this.rsinfo = info;
2934      this.errors = errors;
2935      this.connection = connection;
2936    }
2937
2938    @Override
2939    public synchronized Void call() throws IOException {
2940      errors.progress();
2941      try {
2942        BlockingInterface server = connection.getAdmin(rsinfo);
2943
2944        // list all online regions from this region server
2945        List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
2946        regions = filterRegions(regions);
2947
2948        if (details) {
2949          errors.detail(
2950            "RegionServer: " + rsinfo.getServerName() + " number of regions: " + regions.size());
2951          for (RegionInfo rinfo : regions) {
2952            errors.detail("  " + rinfo.getRegionNameAsString() + " id: " + rinfo.getRegionId()
2953              + " encoded_name: " + rinfo.getEncodedName() + " start: "
2954              + Bytes.toStringBinary(rinfo.getStartKey()) + " end: "
2955              + Bytes.toStringBinary(rinfo.getEndKey()));
2956          }
2957        }
2958
2959        // check to see if the existence of this region matches the region in META
2960
2961        for (RegionInfo r : regions) {
2962          HbckRegionInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
2963          hbi.addServer(r, rsinfo);
2964        }
2965      } catch (IOException e) { // unable to connect to the region server.
2966        errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE,
2967          "RegionServer: " + rsinfo.getServerName() + " Unable to fetch region information. " + e);
2968        throw e;
2969      }
2970      return null;
2971    }
2972
2973    private List<RegionInfo> filterRegions(List<RegionInfo> regions) {
2974      List<RegionInfo> ret = Lists.newArrayList();
2975      for (RegionInfo hri : regions) {
2976        if (hri.isMetaRegion() || (!hbck.checkMetaOnly && hbck.isTableIncluded(hri.getTable()))) {
2977          ret.add(hri);
2978        }
2979      }
2980      return ret;
2981    }
2982  }
2983
2984  /**
2985   * Contact hdfs and get all information about specified table directory into regioninfo list.
2986   */
2987  class WorkItemHdfsDir implements Callable<Void> {
2988    private FileStatus tableDir;
2989    private HbckErrorReporter errors;
2990    private FileSystem fs;
2991
2992    WorkItemHdfsDir(FileSystem fs, HbckErrorReporter errors, FileStatus status) {
2993      this.fs = fs;
2994      this.tableDir = status;
2995      this.errors = errors;
2996    }
2997
2998    @Override
2999    public synchronized Void call() throws InterruptedException, ExecutionException {
3000      final Vector<Exception> exceptions = new Vector<>();
3001
3002      try {
3003        final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
3004        final List<Future<?>> futures = new ArrayList<>(regionDirs.length);
3005
3006        for (final FileStatus regionDir : regionDirs) {
3007          errors.progress();
3008          final String encodedName = regionDir.getPath().getName();
3009          // ignore directories that aren't hexadecimal
3010          if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
3011            continue;
3012          }
3013
3014          if (!exceptions.isEmpty()) {
3015            break;
3016          }
3017
3018          futures.add(executor.submit(new Runnable() {
3019            @Override
3020            public void run() {
3021              try {
3022                LOG.debug("Loading region info from hdfs:" + regionDir.getPath());
3023
3024                Path regioninfoFile =
3025                  new Path(regionDir.getPath(), HRegionFileSystem.REGION_INFO_FILE);
3026                boolean regioninfoFileExists = fs.exists(regioninfoFile);
3027
3028                if (!regioninfoFileExists) {
3029                  // As tables become larger it is more and more likely that by the time you
3030                  // reach a given region that it will be gone due to region splits/merges.
3031                  if (!fs.exists(regionDir.getPath())) {
3032                    LOG.warn("By the time we tried to process this region dir it was already gone: "
3033                      + regionDir.getPath());
3034                    return;
3035                  }
3036                }
3037
3038                HbckRegionInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
3039                HbckRegionInfo.HdfsEntry he = new HbckRegionInfo.HdfsEntry();
3040                synchronized (hbi) {
3041                  if (hbi.getHdfsRegionDir() != null) {
3042                    errors
3043                      .print("Directory " + encodedName + " duplicate??" + hbi.getHdfsRegionDir());
3044                  }
3045
3046                  he.regionDir = regionDir.getPath();
3047                  he.regionDirModTime = regionDir.getModificationTime();
3048                  he.hdfsRegioninfoFilePresent = regioninfoFileExists;
3049                  // we add to orphan list when we attempt to read .regioninfo
3050
3051                  // Set a flag if this region contains only edits
3052                  // This is special case if a region is left after split
3053                  he.hdfsOnlyEdits = true;
3054                  FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
3055                  Path ePath = WALSplitUtil.getRegionDirRecoveredEditsDir(regionDir.getPath());
3056                  for (FileStatus subDir : subDirs) {
3057                    errors.progress();
3058                    String sdName = subDir.getPath().getName();
3059                    if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
3060                      he.hdfsOnlyEdits = false;
3061                      break;
3062                    }
3063                  }
3064                  hbi.setHdfsEntry(he);
3065                }
3066              } catch (Exception e) {
3067                LOG.error("Could not load region dir", e);
3068                exceptions.add(e);
3069              }
3070            }
3071          }));
3072        }
3073
3074        // Ensure all pending tasks are complete (or that we run into an exception)
3075        for (Future<?> f : futures) {
3076          if (!exceptions.isEmpty()) {
3077            break;
3078          }
3079          try {
3080            f.get();
3081          } catch (ExecutionException e) {
3082            LOG.error("Unexpected exec exception!  Should've been caught already.  (Bug?)", e);
3083            // Shouldn't happen, we already logged/caught any exceptions in the Runnable
3084          }
3085          ;
3086        }
3087      } catch (IOException e) {
3088        LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
3089        exceptions.add(e);
3090      } finally {
3091        if (!exceptions.isEmpty()) {
3092          errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
3093            + tableDir.getPath().getName() + " Unable to fetch all HDFS region information. ");
3094          // Just throw the first exception as an indication something bad happened
3095          // Don't need to propagate all the exceptions, we already logged them all anyway
3096          throw new ExecutionException("First exception in WorkItemHdfsDir",
3097            exceptions.firstElement());
3098        }
3099      }
3100      return null;
3101    }
3102  }
3103
3104  /**
3105   * Contact hdfs and get all information about specified table directory into regioninfo list.
3106   */
3107  static class WorkItemHdfsRegionInfo implements Callable<Void> {
3108    private HbckRegionInfo hbi;
3109    private HBaseFsck hbck;
3110    private HbckErrorReporter errors;
3111
3112    WorkItemHdfsRegionInfo(HbckRegionInfo hbi, HBaseFsck hbck, HbckErrorReporter errors) {
3113      this.hbi = hbi;
3114      this.hbck = hbck;
3115      this.errors = errors;
3116    }
3117
3118    @Override
3119    public synchronized Void call() throws IOException {
3120      // only load entries that haven't been loaded yet.
3121      if (hbi.getHdfsHRI() == null) {
3122        try {
3123          errors.progress();
3124          hbi.loadHdfsRegioninfo(hbck.getConf());
3125        } catch (IOException ioe) {
3126          String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3127            + hbi.getTableName() + " in hdfs dir " + hbi.getHdfsRegionDir()
3128            + "!  It may be an invalid format or version file.  Treating as "
3129            + "an orphaned regiondir.";
3130          errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3131          try {
3132            hbck.debugLsr(hbi.getHdfsRegionDir());
3133          } catch (IOException ioe2) {
3134            LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3135            throw ioe2;
3136          }
3137          hbck.orphanHdfsDirs.add(hbi);
3138          throw ioe;
3139        }
3140      }
3141      return null;
3142    }
3143  };
3144
3145  /**
3146   * Display the full report from fsck. This displays all live and dead region servers, and all
3147   * known regions.
3148   */
3149  public static void setDisplayFullReport() {
3150    details = true;
3151  }
3152
3153  public static boolean shouldDisplayFullReport() {
3154    return details;
3155  }
3156
3157  /**
3158   * Set exclusive mode.
3159   */
3160  public static void setForceExclusive() {
3161    forceExclusive = true;
3162  }
3163
3164  /**
3165   * Only one instance of hbck can modify HBase at a time.
3166   */
3167  public boolean isExclusive() {
3168    return fixAny || forceExclusive;
3169  }
3170
3171  /**
3172   * Set summary mode. Print only summary of the tables and status (OK or INCONSISTENT)
3173   */
3174  static void setSummary() {
3175    summary = true;
3176  }
3177
3178  /**
3179   * Set hbase:meta check mode. Print only info about hbase:meta table deployment/state
3180   */
3181  void setCheckMetaOnly() {
3182    checkMetaOnly = true;
3183  }
3184
3185  /**
3186   * Set region boundaries check mode.
3187   */
3188  void setRegionBoundariesCheck() {
3189    checkRegionBoundaries = true;
3190  }
3191
3192  /**
3193   * Set replication fix mode.
3194   */
3195  public void setFixReplication(boolean shouldFix) {
3196    fixReplication = shouldFix;
3197    fixAny |= shouldFix;
3198  }
3199
3200  public void setCleanReplicationBarrier(boolean shouldClean) {
3201    cleanReplicationBarrier = shouldClean;
3202  }
3203
3204  /**
3205   * Check if we should rerun fsck again. This checks if we've tried to fix something and we should
3206   * rerun fsck tool again. Display the full report from fsck. This displays all live and dead
3207   * region servers, and all known regions.
3208   */
3209  void setShouldRerun() {
3210    rerun = true;
3211  }
3212
3213  public boolean shouldRerun() {
3214    return rerun;
3215  }
3216
3217  /**
3218   * Fix inconsistencies found by fsck. This should try to fix errors (if any) found by fsck
3219   * utility.
3220   */
3221  public void setFixAssignments(boolean shouldFix) {
3222    fixAssignments = shouldFix;
3223    fixAny |= shouldFix;
3224  }
3225
3226  boolean shouldFixAssignments() {
3227    return fixAssignments;
3228  }
3229
3230  public void setFixMeta(boolean shouldFix) {
3231    fixMeta = shouldFix;
3232    fixAny |= shouldFix;
3233  }
3234
3235  boolean shouldFixMeta() {
3236    return fixMeta;
3237  }
3238
3239  public void setFixEmptyMetaCells(boolean shouldFix) {
3240    fixEmptyMetaCells = shouldFix;
3241    fixAny |= shouldFix;
3242  }
3243
3244  boolean shouldFixEmptyMetaCells() {
3245    return fixEmptyMetaCells;
3246  }
3247
3248  public void setCheckHdfs(boolean checking) {
3249    checkHdfs = checking;
3250  }
3251
3252  boolean shouldCheckHdfs() {
3253    return checkHdfs;
3254  }
3255
3256  public void setFixHdfsHoles(boolean shouldFix) {
3257    fixHdfsHoles = shouldFix;
3258    fixAny |= shouldFix;
3259  }
3260
3261  boolean shouldFixHdfsHoles() {
3262    return fixHdfsHoles;
3263  }
3264
3265  public void setFixTableOrphans(boolean shouldFix) {
3266    fixTableOrphans = shouldFix;
3267    fixAny |= shouldFix;
3268  }
3269
3270  boolean shouldFixTableOrphans() {
3271    return fixTableOrphans;
3272  }
3273
3274  public void setFixHdfsOverlaps(boolean shouldFix) {
3275    fixHdfsOverlaps = shouldFix;
3276    fixAny |= shouldFix;
3277  }
3278
3279  boolean shouldFixHdfsOverlaps() {
3280    return fixHdfsOverlaps;
3281  }
3282
3283  public void setFixHdfsOrphans(boolean shouldFix) {
3284    fixHdfsOrphans = shouldFix;
3285    fixAny |= shouldFix;
3286  }
3287
3288  boolean shouldFixHdfsOrphans() {
3289    return fixHdfsOrphans;
3290  }
3291
3292  public void setFixVersionFile(boolean shouldFix) {
3293    fixVersionFile = shouldFix;
3294    fixAny |= shouldFix;
3295  }
3296
3297  public boolean shouldFixVersionFile() {
3298    return fixVersionFile;
3299  }
3300
3301  public void setSidelineBigOverlaps(boolean sbo) {
3302    this.sidelineBigOverlaps = sbo;
3303  }
3304
3305  public boolean shouldSidelineBigOverlaps() {
3306    return sidelineBigOverlaps;
3307  }
3308
3309  public void setFixSplitParents(boolean shouldFix) {
3310    fixSplitParents = shouldFix;
3311    fixAny |= shouldFix;
3312  }
3313
3314  public void setRemoveParents(boolean shouldFix) {
3315    removeParents = shouldFix;
3316    fixAny |= shouldFix;
3317  }
3318
3319  boolean shouldFixSplitParents() {
3320    return fixSplitParents;
3321  }
3322
3323  boolean shouldRemoveParents() {
3324    return removeParents;
3325  }
3326
3327  public void setFixReferenceFiles(boolean shouldFix) {
3328    fixReferenceFiles = shouldFix;
3329    fixAny |= shouldFix;
3330  }
3331
3332  boolean shouldFixReferenceFiles() {
3333    return fixReferenceFiles;
3334  }
3335
3336  public void setFixHFileLinks(boolean shouldFix) {
3337    fixHFileLinks = shouldFix;
3338    fixAny |= shouldFix;
3339  }
3340
3341  boolean shouldFixHFileLinks() {
3342    return fixHFileLinks;
3343  }
3344
3345  public boolean shouldIgnorePreCheckPermission() {
3346    return !fixAny || ignorePreCheckPermission;
3347  }
3348
3349  public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3350    this.ignorePreCheckPermission = ignorePreCheckPermission;
3351  }
3352
3353  /**
3354   * @param mm maximum number of regions to merge into a single region.
3355   */
3356  public void setMaxMerge(int mm) {
3357    this.maxMerge = mm;
3358  }
3359
3360  public int getMaxMerge() {
3361    return maxMerge;
3362  }
3363
3364  public void setMaxOverlapsToSideline(int mo) {
3365    this.maxOverlapsToSideline = mo;
3366  }
3367
3368  public int getMaxOverlapsToSideline() {
3369    return maxOverlapsToSideline;
3370  }
3371
3372  /**
3373   * Only check/fix tables specified by the list, Empty list means all tables are included.
3374   */
3375  boolean isTableIncluded(TableName table) {
3376    return (tablesIncluded.isEmpty()) || tablesIncluded.contains(table);
3377  }
3378
3379  public void includeTable(TableName table) {
3380    tablesIncluded.add(table);
3381  }
3382
3383  Set<TableName> getIncludedTables() {
3384    return new HashSet<>(tablesIncluded);
3385  }
3386
3387  /**
3388   * We are interested in only those tables that have not changed their state in hbase:meta during
3389   * the last few seconds specified by hbase.admin.fsck.timelag
3390   * @param seconds - the time in seconds
3391   */
3392  public void setTimeLag(long seconds) {
3393    timelag = seconds * 1000; // convert to milliseconds
3394  }
3395
3396  /**
3397   * @param sidelineDir - HDFS path to sideline data
3398   */
3399  public void setSidelineDir(String sidelineDir) {
3400    this.sidelineDir = new Path(sidelineDir);
3401  }
3402
3403  protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
3404    throws IOException {
3405    return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
3406  }
3407
3408  public HFileCorruptionChecker getHFilecorruptionChecker() {
3409    return hfcc;
3410  }
3411
3412  public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
3413    this.hfcc = hfcc;
3414  }
3415
3416  public void setRetCode(int code) {
3417    this.retcode = code;
3418  }
3419
3420  public int getRetCode() {
3421    return retcode;
3422  }
3423
3424  protected HBaseFsck printUsageAndExit() {
3425    StringWriter sw = new StringWriter(2048);
3426    PrintWriter out = new PrintWriter(sw);
3427    out.println("");
3428    out.println("-----------------------------------------------------------------------");
3429    out.println("NOTE: As of HBase version 2.0, the hbck tool is significantly changed.");
3430    out.println("In general, all Read-Only options are supported and can be be used");
3431    out.println("safely. Most -fix/ -repair options are NOT supported. Please see usage");
3432    out.println("below for details on which options are not supported.");
3433    out.println("-----------------------------------------------------------------------");
3434    out.println("");
3435    out.println("Usage: fsck [opts] {only tables}");
3436    out.println(" where [opts] are:");
3437    out.println("   -help Display help options (this)");
3438    out.println("   -details Display full report of all regions.");
3439    out.println("   -timelag <timeInSeconds>  Process only regions that "
3440      + " have not experienced any metadata updates in the last " + " <timeInSeconds> seconds.");
3441    out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds"
3442      + " before checking if the fix worked if run with -fix");
3443    out.println("   -summary Print only summary of the tables and status.");
3444    out.println("   -metaonly Only check the state of the hbase:meta table.");
3445    out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
3446    out.println(
3447      "   -boundaries Verify that regions boundaries are the same between META and store files.");
3448    out.println("   -exclusive Abort if another hbck is exclusive or fixing.");
3449
3450    out.println("");
3451    out.println("  Datafile Repair options: (expert features, use with caution!)");
3452    out.println(
3453      "   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
3454    out.println(
3455      "   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
3456
3457    out.println("");
3458    out.println(" Replication options");
3459    out.println("   -fixReplication   Deletes replication queues for removed peers");
3460
3461    out.println("");
3462    out.println(
3463      "  Metadata Repair options supported as of version 2.0: (expert features, use with caution!)");
3464    out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
3465    out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
3466    out.println("   -fixHFileLinks  Try to offline lingering HFileLinks");
3467    out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
3468      + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
3469    out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
3470
3471    out.println("");
3472    out.println("NOTE: Following options are NOT supported as of HBase version 2.0+.");
3473    out.println("");
3474    out.println("  UNSUPPORTED Metadata Repair options: (expert features, use with caution!)");
3475    out.println(
3476      "   -fix              Try to fix region assignments.  This is for backwards compatibility");
3477    out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
3478    out.println(
3479      "   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
3480    out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
3481    out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
3482    out.println(
3483      "   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
3484    out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
3485    out.println(
3486      "   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n="
3487        + DEFAULT_MAX_MERGE + " by default)");
3488    out.println(
3489      "   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
3490    out.println(
3491      "   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n="
3492        + DEFAULT_OVERLAPS_TO_SIDELINE + " by default)");
3493    out.println("   -fixSplitParents  Try to force offline split parents to be online.");
3494    out.println(
3495      "   -removeParents    Try to offline and sideline lingering parents and keep daughter regions.");
3496    out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
3497      + " (empty REGIONINFO_QUALIFIER rows)");
3498
3499    out.println("");
3500    out.println("  UNSUPPORTED Metadata Repair shortcuts");
3501    out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles "
3502      + "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles"
3503      + "-fixHFileLinks");
3504    out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
3505    out.println("");
3506    out.println(" Replication options");
3507    out.println("   -fixReplication   Deletes replication queues for removed peers");
3508    out.println("   -cleanReplicationBarrier [tableName] clean the replication barriers "
3509      + "of a specified table, tableName is required");
3510    out.flush();
3511    errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
3512
3513    setRetCode(-2);
3514    return this;
3515  }
3516
3517  /**
3518   * Main program nn
3519   */
3520  public static void main(String[] args) throws Exception {
3521    // create a fsck object
3522    Configuration conf = HBaseConfiguration.create();
3523    Path hbasedir = CommonFSUtils.getRootDir(conf);
3524    URI defaultFs = hbasedir.getFileSystem(conf).getUri();
3525    CommonFSUtils.setFsDefault(conf, new Path(defaultFs));
3526    int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
3527    System.exit(ret);
3528  }
3529
3530  /**
3531   * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
3532   */
3533  static class HBaseFsckTool extends Configured implements Tool {
3534    HBaseFsckTool(Configuration conf) {
3535      super(conf);
3536    }
3537
3538    @Override
3539    public int run(String[] args) throws Exception {
3540      HBaseFsck hbck = new HBaseFsck(getConf());
3541      hbck.exec(hbck.executor, args);
3542      hbck.close();
3543      return hbck.getRetCode();
3544    }
3545  };
3546
3547  public HBaseFsck exec(ExecutorService exec, String[] args)
3548    throws KeeperException, IOException, InterruptedException, ReplicationException {
3549    long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
3550
3551    boolean checkCorruptHFiles = false;
3552    boolean sidelineCorruptHFiles = false;
3553
3554    // Process command-line args.
3555    for (int i = 0; i < args.length; i++) {
3556      String cmd = args[i];
3557      if (cmd.equals("-help") || cmd.equals("-h")) {
3558        return printUsageAndExit();
3559      } else if (cmd.equals("-details")) {
3560        setDisplayFullReport();
3561      } else if (cmd.equals("-exclusive")) {
3562        setForceExclusive();
3563      } else if (cmd.equals("-timelag")) {
3564        if (i == args.length - 1) {
3565          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
3566          return printUsageAndExit();
3567        }
3568        try {
3569          long timelag = Long.parseLong(args[++i]);
3570          setTimeLag(timelag);
3571        } catch (NumberFormatException e) {
3572          errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
3573          return printUsageAndExit();
3574        }
3575      } else if (cmd.equals("-sleepBeforeRerun")) {
3576        if (i == args.length - 1) {
3577          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sleepBeforeRerun needs a value.");
3578          return printUsageAndExit();
3579        }
3580        try {
3581          sleepBeforeRerun = Long.parseLong(args[++i]);
3582        } catch (NumberFormatException e) {
3583          errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
3584          return printUsageAndExit();
3585        }
3586      } else if (cmd.equals("-sidelineDir")) {
3587        if (i == args.length - 1) {
3588          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
3589          return printUsageAndExit();
3590        }
3591        setSidelineDir(args[++i]);
3592      } else if (cmd.equals("-fix")) {
3593        errors.reportError(ERROR_CODE.WRONG_USAGE,
3594          "This option is deprecated, please use  -fixAssignments instead.");
3595        setFixAssignments(true);
3596      } else if (cmd.equals("-fixAssignments")) {
3597        setFixAssignments(true);
3598      } else if (cmd.equals("-fixMeta")) {
3599        setFixMeta(true);
3600      } else if (cmd.equals("-noHdfsChecking")) {
3601        setCheckHdfs(false);
3602      } else if (cmd.equals("-fixHdfsHoles")) {
3603        setFixHdfsHoles(true);
3604      } else if (cmd.equals("-fixHdfsOrphans")) {
3605        setFixHdfsOrphans(true);
3606      } else if (cmd.equals("-fixTableOrphans")) {
3607        setFixTableOrphans(true);
3608      } else if (cmd.equals("-fixHdfsOverlaps")) {
3609        setFixHdfsOverlaps(true);
3610      } else if (cmd.equals("-fixVersionFile")) {
3611        setFixVersionFile(true);
3612      } else if (cmd.equals("-sidelineBigOverlaps")) {
3613        setSidelineBigOverlaps(true);
3614      } else if (cmd.equals("-fixSplitParents")) {
3615        setFixSplitParents(true);
3616      } else if (cmd.equals("-removeParents")) {
3617        setRemoveParents(true);
3618      } else if (cmd.equals("-ignorePreCheckPermission")) {
3619        setIgnorePreCheckPermission(true);
3620      } else if (cmd.equals("-checkCorruptHFiles")) {
3621        checkCorruptHFiles = true;
3622      } else if (cmd.equals("-sidelineCorruptHFiles")) {
3623        sidelineCorruptHFiles = true;
3624      } else if (cmd.equals("-fixReferenceFiles")) {
3625        setFixReferenceFiles(true);
3626      } else if (cmd.equals("-fixHFileLinks")) {
3627        setFixHFileLinks(true);
3628      } else if (cmd.equals("-fixEmptyMetaCells")) {
3629        setFixEmptyMetaCells(true);
3630      } else if (cmd.equals("-repair")) {
3631        // this attempts to merge overlapping hdfs regions, needs testing
3632        // under load
3633        setFixHdfsHoles(true);
3634        setFixHdfsOrphans(true);
3635        setFixMeta(true);
3636        setFixAssignments(true);
3637        setFixHdfsOverlaps(true);
3638        setFixVersionFile(true);
3639        setSidelineBigOverlaps(true);
3640        setFixSplitParents(false);
3641        setCheckHdfs(true);
3642        setFixReferenceFiles(true);
3643        setFixHFileLinks(true);
3644      } else if (cmd.equals("-repairHoles")) {
3645        // this will make all missing hdfs regions available but may lose data
3646        setFixHdfsHoles(true);
3647        setFixHdfsOrphans(false);
3648        setFixMeta(true);
3649        setFixAssignments(true);
3650        setFixHdfsOverlaps(false);
3651        setSidelineBigOverlaps(false);
3652        setFixSplitParents(false);
3653        setCheckHdfs(true);
3654      } else if (cmd.equals("-maxOverlapsToSideline")) {
3655        if (i == args.length - 1) {
3656          errors.reportError(ERROR_CODE.WRONG_USAGE,
3657            "-maxOverlapsToSideline needs a numeric value argument.");
3658          return printUsageAndExit();
3659        }
3660        try {
3661          int maxOverlapsToSideline = Integer.parseInt(args[++i]);
3662          setMaxOverlapsToSideline(maxOverlapsToSideline);
3663        } catch (NumberFormatException e) {
3664          errors.reportError(ERROR_CODE.WRONG_USAGE,
3665            "-maxOverlapsToSideline needs a numeric value argument.");
3666          return printUsageAndExit();
3667        }
3668      } else if (cmd.equals("-maxMerge")) {
3669        if (i == args.length - 1) {
3670          errors.reportError(ERROR_CODE.WRONG_USAGE, "-maxMerge needs a numeric value argument.");
3671          return printUsageAndExit();
3672        }
3673        try {
3674          int maxMerge = Integer.parseInt(args[++i]);
3675          setMaxMerge(maxMerge);
3676        } catch (NumberFormatException e) {
3677          errors.reportError(ERROR_CODE.WRONG_USAGE, "-maxMerge needs a numeric value argument.");
3678          return printUsageAndExit();
3679        }
3680      } else if (cmd.equals("-summary")) {
3681        setSummary();
3682      } else if (cmd.equals("-metaonly")) {
3683        setCheckMetaOnly();
3684      } else if (cmd.equals("-boundaries")) {
3685        setRegionBoundariesCheck();
3686      } else if (cmd.equals("-fixReplication")) {
3687        setFixReplication(true);
3688      } else if (cmd.equals("-cleanReplicationBarrier")) {
3689        setCleanReplicationBarrier(true);
3690        if (args[++i].startsWith("-")) {
3691          printUsageAndExit();
3692        }
3693        setCleanReplicationBarrierTable(args[i]);
3694      } else if (cmd.startsWith("-")) {
3695        errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
3696        return printUsageAndExit();
3697      } else {
3698        includeTable(TableName.valueOf(cmd));
3699        errors.print("Allow checking/fixes for table: " + cmd);
3700      }
3701    }
3702
3703    errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
3704
3705    // pre-check current user has FS write permission or not
3706    try {
3707      preCheckPermission();
3708    } catch (IOException ioe) {
3709      Runtime.getRuntime().exit(-1);
3710    }
3711
3712    // do the real work of hbck
3713    connect();
3714
3715    // after connecting to server above, we have server version
3716    // check if unsupported option is specified based on server version
3717    if (!isOptionsSupported(args)) {
3718      return printUsageAndExit();
3719    }
3720
3721    try {
3722      // if corrupt file mode is on, first fix them since they may be opened later
3723      if (checkCorruptHFiles || sidelineCorruptHFiles) {
3724        LOG.info("Checking all hfiles for corruption");
3725        HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
3726        setHFileCorruptionChecker(hfcc); // so we can get result
3727        Collection<TableName> tables = getIncludedTables();
3728        Collection<Path> tableDirs = new ArrayList<>();
3729        Path rootdir = CommonFSUtils.getRootDir(getConf());
3730        if (tables.size() > 0) {
3731          for (TableName t : tables) {
3732            tableDirs.add(CommonFSUtils.getTableDir(rootdir, t));
3733          }
3734        } else {
3735          tableDirs = FSUtils.getTableDirs(CommonFSUtils.getCurrentFileSystem(getConf()), rootdir);
3736        }
3737        hfcc.checkTables(tableDirs);
3738        hfcc.report(errors);
3739      }
3740
3741      // check and fix table integrity, region consistency.
3742      int code = onlineHbck();
3743      setRetCode(code);
3744      // If we have changed the HBase state it is better to run hbck again
3745      // to see if we haven't broken something else in the process.
3746      // We run it only once more because otherwise we can easily fall into
3747      // an infinite loop.
3748      if (shouldRerun()) {
3749        try {
3750          LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
3751          Thread.sleep(sleepBeforeRerun);
3752        } catch (InterruptedException ie) {
3753          LOG.warn("Interrupted while sleeping");
3754          return this;
3755        }
3756        // Just report
3757        setFixAssignments(false);
3758        setFixMeta(false);
3759        setFixHdfsHoles(false);
3760        setFixHdfsOverlaps(false);
3761        setFixVersionFile(false);
3762        setFixTableOrphans(false);
3763        errors.resetErrors();
3764        code = onlineHbck();
3765        setRetCode(code);
3766      }
3767    } finally {
3768      IOUtils.closeQuietly(this, e -> LOG.warn("", e));
3769    }
3770    return this;
3771  }
3772
3773  private boolean isOptionsSupported(String[] args) {
3774    boolean result = true;
3775    String hbaseServerVersion = status.getHBaseVersion();
3776    if (VersionInfo.compareVersion("2.any.any", hbaseServerVersion) < 0) {
3777      // Process command-line args.
3778      for (String arg : args) {
3779        if (unsupportedOptionsInV2.contains(arg)) {
3780          errors.reportError(ERROR_CODE.UNSUPPORTED_OPTION,
3781            "option '" + arg + "' is not " + "supported!");
3782          result = false;
3783          break;
3784        }
3785      }
3786    }
3787    return result;
3788  }
3789
3790  public void setCleanReplicationBarrierTable(String cleanReplicationBarrierTable) {
3791    this.cleanReplicationBarrierTable = TableName.valueOf(cleanReplicationBarrierTable);
3792  }
3793
3794  public void cleanReplicationBarrier() throws IOException {
3795    if (!cleanReplicationBarrier || cleanReplicationBarrierTable == null) {
3796      return;
3797    }
3798    if (cleanReplicationBarrierTable.isSystemTable()) {
3799      errors.reportError(ERROR_CODE.INVALID_TABLE,
3800        "invalid table: " + cleanReplicationBarrierTable);
3801      return;
3802    }
3803
3804    boolean isGlobalScope = false;
3805    try {
3806      isGlobalScope = admin.getDescriptor(cleanReplicationBarrierTable).hasGlobalReplicationScope();
3807    } catch (TableNotFoundException e) {
3808      LOG.info("we may need to clean some erroneous data due to bugs");
3809    }
3810
3811    if (isGlobalScope) {
3812      errors.reportError(ERROR_CODE.INVALID_TABLE,
3813        "table's replication scope is global: " + cleanReplicationBarrierTable);
3814      return;
3815    }
3816    List<byte[]> regionNames = new ArrayList<>();
3817    Scan barrierScan = new Scan();
3818    barrierScan.setCaching(100);
3819    barrierScan.addFamily(HConstants.REPLICATION_BARRIER_FAMILY);
3820    barrierScan
3821      .withStartRow(MetaTableAccessor.getTableStartRowForMeta(cleanReplicationBarrierTable,
3822        MetaTableAccessor.QueryType.REGION))
3823      .withStopRow(MetaTableAccessor.getTableStopRowForMeta(cleanReplicationBarrierTable,
3824        MetaTableAccessor.QueryType.REGION));
3825    Result result;
3826    try (ResultScanner scanner = meta.getScanner(barrierScan)) {
3827      while ((result = scanner.next()) != null) {
3828        regionNames.add(result.getRow());
3829      }
3830    }
3831    if (regionNames.size() <= 0) {
3832      errors.reportError(ERROR_CODE.INVALID_TABLE,
3833        "there is no barriers of this table: " + cleanReplicationBarrierTable);
3834      return;
3835    }
3836    ReplicationQueueStorage queueStorage =
3837      ReplicationStorageFactory.getReplicationQueueStorage(zkw, getConf());
3838    List<ReplicationPeerDescription> peerDescriptions = admin.listReplicationPeers();
3839    if (peerDescriptions != null && peerDescriptions.size() > 0) {
3840      List<String> peers = peerDescriptions.stream()
3841        .filter(
3842          peerConfig -> peerConfig.getPeerConfig().needToReplicate(cleanReplicationBarrierTable))
3843        .map(peerConfig -> peerConfig.getPeerId()).collect(Collectors.toList());
3844      try {
3845        List<String> batch = new ArrayList<>();
3846        for (String peer : peers) {
3847          for (byte[] regionName : regionNames) {
3848            batch.add(RegionInfo.encodeRegionName(regionName));
3849            if (batch.size() % 100 == 0) {
3850              queueStorage.removeLastSequenceIds(peer, batch);
3851              batch.clear();
3852            }
3853          }
3854          if (batch.size() > 0) {
3855            queueStorage.removeLastSequenceIds(peer, batch);
3856            batch.clear();
3857          }
3858        }
3859      } catch (ReplicationException re) {
3860        throw new IOException(re);
3861      }
3862    }
3863    for (byte[] regionName : regionNames) {
3864      meta.delete(new Delete(regionName).addFamily(HConstants.REPLICATION_BARRIER_FAMILY));
3865    }
3866    setShouldRerun();
3867  }
3868
3869  /**
3870   * ls -r for debugging purposes
3871   */
3872  void debugLsr(Path p) throws IOException {
3873    debugLsr(getConf(), p, errors);
3874  }
3875
3876  /**
3877   * ls -r for debugging purposes
3878   */
3879  public static void debugLsr(Configuration conf, Path p) throws IOException {
3880    debugLsr(conf, p, new PrintingErrorReporter());
3881  }
3882
3883  /**
3884   * ls -r for debugging purposes
3885   */
3886  public static void debugLsr(Configuration conf, Path p, HbckErrorReporter errors)
3887    throws IOException {
3888    if (!LOG.isDebugEnabled() || p == null) {
3889      return;
3890    }
3891    FileSystem fs = p.getFileSystem(conf);
3892
3893    if (!fs.exists(p)) {
3894      // nothing
3895      return;
3896    }
3897    errors.print(p.toString());
3898
3899    if (fs.isFile(p)) {
3900      return;
3901    }
3902
3903    if (fs.getFileStatus(p).isDirectory()) {
3904      FileStatus[] fss = fs.listStatus(p);
3905      for (FileStatus status : fss) {
3906        debugLsr(conf, status.getPath(), errors);
3907      }
3908    }
3909  }
3910}