001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.io.Closeable;
021import java.io.FileNotFoundException;
022import java.io.IOException;
023import java.io.InterruptedIOException;
024import java.io.PrintWriter;
025import java.io.StringWriter;
026import java.net.InetAddress;
027import java.net.URI;
028import java.util.ArrayList;
029import java.util.Collection;
030import java.util.Collections;
031import java.util.Comparator;
032import java.util.EnumSet;
033import java.util.HashMap;
034import java.util.HashSet;
035import java.util.Iterator;
036import java.util.List;
037import java.util.Locale;
038import java.util.Map;
039import java.util.Map.Entry;
040import java.util.Objects;
041import java.util.Optional;
042import java.util.Set;
043import java.util.SortedMap;
044import java.util.TreeMap;
045import java.util.Vector;
046import java.util.concurrent.Callable;
047import java.util.concurrent.ConcurrentSkipListMap;
048import java.util.concurrent.ExecutionException;
049import java.util.concurrent.ExecutorService;
050import java.util.concurrent.Executors;
051import java.util.concurrent.Future;
052import java.util.concurrent.FutureTask;
053import java.util.concurrent.ScheduledThreadPoolExecutor;
054import java.util.concurrent.TimeUnit;
055import java.util.concurrent.TimeoutException;
056import java.util.concurrent.atomic.AtomicBoolean;
057import java.util.concurrent.atomic.AtomicInteger;
058import java.util.stream.Collectors;
059
060import org.apache.commons.io.IOUtils;
061import org.apache.commons.lang3.StringUtils;
062import org.apache.hadoop.conf.Configuration;
063import org.apache.hadoop.conf.Configured;
064import org.apache.hadoop.fs.FSDataOutputStream;
065import org.apache.hadoop.fs.FileStatus;
066import org.apache.hadoop.fs.FileSystem;
067import org.apache.hadoop.fs.Path;
068import org.apache.hadoop.fs.permission.FsAction;
069import org.apache.hadoop.fs.permission.FsPermission;
070import org.apache.hadoop.hbase.Abortable;
071import org.apache.hadoop.hbase.Cell;
072import org.apache.hadoop.hbase.CellUtil;
073import org.apache.hadoop.hbase.ClusterMetrics;
074import org.apache.hadoop.hbase.ClusterMetrics.Option;
075import org.apache.hadoop.hbase.HBaseConfiguration;
076import org.apache.hadoop.hbase.HBaseInterfaceAudience;
077import org.apache.hadoop.hbase.HConstants;
078import org.apache.hadoop.hbase.HRegionLocation;
079import org.apache.hadoop.hbase.KeyValue;
080import org.apache.hadoop.hbase.MasterNotRunningException;
081import org.apache.hadoop.hbase.MetaTableAccessor;
082import org.apache.hadoop.hbase.RegionLocations;
083import org.apache.hadoop.hbase.ServerName;
084import org.apache.hadoop.hbase.TableName;
085import org.apache.hadoop.hbase.TableNotFoundException;
086import org.apache.hadoop.hbase.ZooKeeperConnectionException;
087import org.apache.hadoop.hbase.client.Admin;
088import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
089import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
090import org.apache.hadoop.hbase.client.Connection;
091import org.apache.hadoop.hbase.client.ConnectionFactory;
092import org.apache.hadoop.hbase.client.Delete;
093import org.apache.hadoop.hbase.client.Get;
094import org.apache.hadoop.hbase.client.Put;
095import org.apache.hadoop.hbase.client.RegionInfo;
096import org.apache.hadoop.hbase.client.RegionInfoBuilder;
097import org.apache.hadoop.hbase.client.RegionLocator;
098import org.apache.hadoop.hbase.client.RegionReplicaUtil;
099import org.apache.hadoop.hbase.client.Result;
100import org.apache.hadoop.hbase.client.ResultScanner;
101import org.apache.hadoop.hbase.client.RowMutations;
102import org.apache.hadoop.hbase.client.Scan;
103import org.apache.hadoop.hbase.client.Table;
104import org.apache.hadoop.hbase.client.TableDescriptor;
105import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
106import org.apache.hadoop.hbase.client.TableState;
107import org.apache.hadoop.hbase.io.FileLink;
108import org.apache.hadoop.hbase.io.HFileLink;
109import org.apache.hadoop.hbase.io.hfile.CacheConfig;
110import org.apache.hadoop.hbase.io.hfile.HFile;
111import org.apache.hadoop.hbase.master.RegionState;
112import org.apache.hadoop.hbase.regionserver.HRegion;
113import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
114import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
115import org.apache.hadoop.hbase.replication.ReplicationException;
116import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
117import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
118import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
119import org.apache.hadoop.hbase.security.UserProvider;
120import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
121import org.apache.hadoop.hbase.util.HbckErrorReporter.ERROR_CODE;
122import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
123import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
124import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
125import org.apache.hadoop.hbase.wal.WALSplitUtil;
126import org.apache.hadoop.hbase.zookeeper.ZKUtil;
127import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
128import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
129import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
130import org.apache.hadoop.ipc.RemoteException;
131import org.apache.hadoop.security.AccessControlException;
132import org.apache.hadoop.security.UserGroupInformation;
133import org.apache.hadoop.util.ReflectionUtils;
134import org.apache.hadoop.util.Tool;
135import org.apache.hadoop.util.ToolRunner;
136import org.apache.yetus.audience.InterfaceAudience;
137import org.apache.yetus.audience.InterfaceStability;
138import org.apache.zookeeper.KeeperException;
139import org.slf4j.Logger;
140import org.slf4j.LoggerFactory;
141
142import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
143import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
144import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
145import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
146import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
147
148/**
149 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
150 * table integrity problems in a corrupted HBase. This tool was written for hbase-1.x. It does not
151 * work with hbase-2.x; it can read state but is not allowed to change state; i.e. effect 'repair'.
152 * Even though it can 'read' state, given how so much has changed in how hbase1 and hbase2 operate,
153 * it will often misread. See hbck2 (HBASE-19121) for a hbck tool for hbase2. This class is
154 * deprecated.
155 *
156 * <p>
157 * Region consistency checks verify that hbase:meta, region deployment on region
158 * servers and the state of data in HDFS (.regioninfo files) all are in
159 * accordance.
160 * <p>
161 * Table integrity checks verify that all possible row keys resolve to exactly
162 * one region of a table.  This means there are no individual degenerate
163 * or backwards regions; no holes between regions; and that there are no
164 * overlapping regions.
165 * <p>
166 * The general repair strategy works in two phases:
167 * <ol>
168 * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
169 * <li> Repair Region Consistency with hbase:meta and assignments
170 * </ol>
171 * <p>
172 * For table integrity repairs, the tables' region directories are scanned
173 * for .regioninfo files.  Each table's integrity is then verified.  If there
174 * are any orphan regions (regions with no .regioninfo files) or holes, new
175 * regions are fabricated.  Backwards regions are sidelined as well as empty
176 * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
177 * a new region is created and all data is merged into the new region.
178 * <p>
179 * Table integrity repairs deal solely with HDFS and could potentially be done
180 * offline -- the hbase region servers or master do not need to be running.
181 * This phase can eventually be used to completely reconstruct the hbase:meta table in
182 * an offline fashion.
183 * <p>
184 * Region consistency requires three conditions -- 1) valid .regioninfo file
185 * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
186 * and 3) a region is deployed only at the regionserver that was assigned to
187 * with proper state in the master.
188 * <p>
189 * Region consistency repairs require hbase to be online so that hbck can
190 * contact the HBase master and region servers.  The hbck#connect() method must
191 * first be called successfully.  Much of the region consistency information
192 * is transient and less risky to repair.
193 * <p>
194 * If hbck is run from the command line, there are a handful of arguments that
195 * can be used to limit the kinds of repairs hbck will do.  See the code in
196 * {@link #printUsageAndExit()} for more details.
197 * @deprecated For removal in hbase-4.0.0. Use HBCK2 instead.
198 */
199@Deprecated
200@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
201@InterfaceStability.Evolving
202public class HBaseFsck extends Configured implements Closeable {
203  public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
204  public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
205  private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
206  private static boolean rsSupportsOffline = true;
207  private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
208  private static final int DEFAULT_MAX_MERGE = 5;
209
210  /**
211   * Here is where hbase-1.x used to default the lock for hbck1.
212   * It puts in place a lock when it goes to write/make changes.
213   */
214  @VisibleForTesting
215  public static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
216  private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
217  private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
218  private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
219  // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
220  // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
221  // AlreadyBeingCreatedException which is implies timeout on this operations up to
222  // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
223  private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
224  private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
225  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
226  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
227
228  /**********************
229   * Internal resources
230   **********************/
231  private static final Logger LOG = LoggerFactory.getLogger(HBaseFsck.class.getName());
232  private ClusterMetrics status;
233  private Connection connection;
234  private Admin admin;
235  private Table meta;
236  // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
237  protected ExecutorService executor;
238  private long startMillis = EnvironmentEdgeManager.currentTime();
239  private HFileCorruptionChecker hfcc;
240  private int retcode = 0;
241  private Path HBCK_LOCK_PATH;
242  private FSDataOutputStream hbckOutFd;
243  // This lock is to prevent cleanup of balancer resources twice between
244  // ShutdownHook and the main code. We cleanup only if the connect() is
245  // successful
246  private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
247
248  // Unsupported options in HBase 2.0+
249  private static final Set<String> unsupportedOptionsInV2 = Sets.newHashSet("-fix",
250      "-fixAssignments", "-fixMeta", "-fixHdfsHoles", "-fixHdfsOrphans", "-fixTableOrphans",
251      "-fixHdfsOverlaps", "-sidelineBigOverlaps", "-fixSplitParents", "-removeParents",
252      "-fixEmptyMetaCells", "-repair", "-repairHoles", "-maxOverlapsToSideline", "-maxMerge");
253
254  /***********
255   * Options
256   ***********/
257  private static boolean details = false; // do we display the full report
258  private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
259  private static boolean forceExclusive = false; // only this hbck can modify HBase
260  private boolean fixAssignments = false; // fix assignment errors?
261  private boolean fixMeta = false; // fix meta errors?
262  private boolean checkHdfs = true; // load and check fs consistency?
263  private boolean fixHdfsHoles = false; // fix fs holes?
264  private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
265  private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
266  private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
267  private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
268  private boolean fixSplitParents = false; // fix lingering split parents
269  private boolean removeParents = false; // remove split parents
270  private boolean fixReferenceFiles = false; // fix lingering reference store file
271  private boolean fixHFileLinks = false; // fix lingering HFileLinks
272  private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
273  private boolean fixReplication = false; // fix undeleted replication queues for removed peer
274  private boolean cleanReplicationBarrier = false; // clean replication barriers of a table
275  private boolean fixAny = false; // Set to true if any of the fix is required.
276
277  // limit checking/fixes to listed tables, if empty attempt to check/fix all
278  // hbase:meta are always checked
279  private Set<TableName> tablesIncluded = new HashSet<>();
280  private TableName cleanReplicationBarrierTable;
281  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
282  // maximum number of overlapping regions to sideline
283  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
284  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
285  private Path sidelineDir = null;
286
287  private boolean rerun = false; // if we tried to fix something, rerun hbck
288  private static boolean summary = false; // if we want to print less output
289  private boolean checkMetaOnly = false;
290  private boolean checkRegionBoundaries = false;
291  private boolean ignorePreCheckPermission = false; // if pre-check permission
292
293  /*********
294   * State
295   *********/
296  final private HbckErrorReporter errors;
297  int fixes = 0;
298
299  /**
300   * This map contains the state of all hbck items.  It maps from encoded region
301   * name to HbckRegionInfo structure.  The information contained in HbckRegionInfo is used
302   * to detect and correct consistency (hdfs/meta/deployment) problems.
303   */
304  private TreeMap<String, HbckRegionInfo> regionInfoMap = new TreeMap<>();
305  // Empty regioninfo qualifiers in hbase:meta
306  private Set<Result> emptyRegionInfoQualifiers = new HashSet<>();
307
308  /**
309   * This map from Tablename -> TableInfo contains the structures necessary to
310   * detect table consistency problems (holes, dupes, overlaps).  It is sorted
311   * to prevent dupes.
312   *
313   * If tablesIncluded is empty, this map contains all tables.
314   * Otherwise, it contains only meta tables and tables in tablesIncluded,
315   * unless checkMetaOnly is specified, in which case, it contains only
316   * the meta table
317   */
318  private SortedMap<TableName, HbckTableInfo> tablesInfo = new ConcurrentSkipListMap<>();
319
320  /**
321   * When initially looking at HDFS, we attempt to find any orphaned data.
322   */
323  private List<HbckRegionInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<>());
324
325  private Map<TableName, Set<String>> orphanTableDirs = new HashMap<>();
326  private Map<TableName, TableState> tableStates = new HashMap<>();
327  private final RetryCounterFactory lockFileRetryCounterFactory;
328  private final RetryCounterFactory createZNodeRetryCounterFactory;
329
330  private Map<TableName, Set<String>> skippedRegions = new HashMap<>();
331
332  private ZKWatcher zkw = null;
333  private String hbckEphemeralNodePath = null;
334  private boolean hbckZodeCreated = false;
335
336  /**
337   * Constructor
338   *
339   * @param conf Configuration object
340   * @throws MasterNotRunningException if the master is not running
341   * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
342   */
343  public HBaseFsck(Configuration conf) throws IOException, ClassNotFoundException {
344    this(conf, createThreadPool(conf));
345  }
346
347  private static ExecutorService createThreadPool(Configuration conf) {
348    int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
349    return new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
350  }
351
352  /**
353   * Constructor
354   *
355   * @param conf
356   *          Configuration object
357   * @throws MasterNotRunningException
358   *           if the master is not running
359   * @throws ZooKeeperConnectionException
360   *           if unable to connect to ZooKeeper
361   */
362  public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
363      ZooKeeperConnectionException, IOException, ClassNotFoundException {
364    super(conf);
365    errors = getErrorReporter(getConf());
366    this.executor = exec;
367    lockFileRetryCounterFactory = createLockRetryCounterFactory(getConf());
368    createZNodeRetryCounterFactory = createZnodeRetryCounterFactory(getConf());
369    zkw = createZooKeeperWatcher();
370  }
371
372  /**
373   * @return A retry counter factory configured for retrying lock file creation.
374   */
375  public static RetryCounterFactory createLockRetryCounterFactory(Configuration conf) {
376    return new RetryCounterFactory(
377        conf.getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
378        conf.getInt("hbase.hbck.lockfile.attempt.sleep.interval",
379            DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
380        conf.getInt("hbase.hbck.lockfile.attempt.maxsleeptime",
381            DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
382  }
383
384  /**
385   * @return A retry counter factory configured for retrying znode creation.
386   */
387  private static RetryCounterFactory createZnodeRetryCounterFactory(Configuration conf) {
388    return new RetryCounterFactory(
389        conf.getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
390        conf.getInt("hbase.hbck.createznode.attempt.sleep.interval",
391            DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
392        conf.getInt("hbase.hbck.createznode.attempt.maxsleeptime",
393            DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
394  }
395
396  /**
397   * @return Return the tmp dir this tool writes too.
398   */
399  @VisibleForTesting
400  public static Path getTmpDir(Configuration conf) throws IOException {
401    return new Path(CommonFSUtils.getRootDir(conf), HConstants.HBASE_TEMP_DIRECTORY);
402  }
403
404  private static class FileLockCallable implements Callable<FSDataOutputStream> {
405    RetryCounter retryCounter;
406    private final Configuration conf;
407    private Path hbckLockPath = null;
408
409    public FileLockCallable(Configuration conf, RetryCounter retryCounter) {
410      this.retryCounter = retryCounter;
411      this.conf = conf;
412    }
413
414    /**
415     * @return Will be <code>null</code> unless you call {@link #call()}
416     */
417    Path getHbckLockPath() {
418      return this.hbckLockPath;
419    }
420
421    @Override
422    public FSDataOutputStream call() throws IOException {
423      try {
424        FileSystem fs = CommonFSUtils.getCurrentFileSystem(this.conf);
425        FsPermission defaultPerms =
426          CommonFSUtils.getFilePermissions(fs, this.conf, HConstants.DATA_FILE_UMASK_KEY);
427        Path tmpDir = getTmpDir(conf);
428        this.hbckLockPath = new Path(tmpDir, HBCK_LOCK_FILE);
429        fs.mkdirs(tmpDir);
430        final FSDataOutputStream out = createFileWithRetries(fs, this.hbckLockPath, defaultPerms);
431        out.writeBytes(InetAddress.getLocalHost().toString());
432        // Add a note into the file we write on why hbase2 is writing out an hbck1 lock file.
433        out.writeBytes(" Written by an hbase-2.x Master to block an " +
434            "attempt by an hbase-1.x HBCK tool making modification to state. " +
435            "See 'HBCK must match HBase server version' in the hbase refguide.");
436        out.flush();
437        return out;
438      } catch(RemoteException e) {
439        if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
440          return null;
441        } else {
442          throw e;
443        }
444      }
445    }
446
447    private FSDataOutputStream createFileWithRetries(final FileSystem fs,
448        final Path hbckLockFilePath, final FsPermission defaultPerms)
449        throws IOException {
450      IOException exception = null;
451      do {
452        try {
453          return CommonFSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
454        } catch (IOException ioe) {
455          LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
456              + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
457              + retryCounter.getMaxAttempts());
458          LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(),
459              ioe);
460          try {
461            exception = ioe;
462            retryCounter.sleepUntilNextRetry();
463          } catch (InterruptedException ie) {
464            throw (InterruptedIOException) new InterruptedIOException(
465                "Can't create lock file " + hbckLockFilePath.getName())
466            .initCause(ie);
467          }
468        }
469      } while (retryCounter.shouldRetry());
470
471      throw exception;
472    }
473  }
474
475  /**
476   * This method maintains a lock using a file. If the creation fails we return null
477   *
478   * @return FSDataOutputStream object corresponding to the newly opened lock file
479   * @throws IOException if IO failure occurs
480   */
481  public static Pair<Path, FSDataOutputStream> checkAndMarkRunningHbck(Configuration conf,
482      RetryCounter retryCounter) throws IOException {
483    FileLockCallable callable = new FileLockCallable(conf, retryCounter);
484    ExecutorService executor = Executors.newFixedThreadPool(1);
485    FutureTask<FSDataOutputStream> futureTask = new FutureTask<>(callable);
486    executor.execute(futureTask);
487    final int timeoutInSeconds = conf.getInt(
488      "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
489    FSDataOutputStream stream = null;
490    try {
491      stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
492    } catch (ExecutionException ee) {
493      LOG.warn("Encountered exception when opening lock file", ee);
494    } catch (InterruptedException ie) {
495      LOG.warn("Interrupted when opening lock file", ie);
496      Thread.currentThread().interrupt();
497    } catch (TimeoutException exception) {
498      // took too long to obtain lock
499      LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
500      futureTask.cancel(true);
501    } finally {
502      executor.shutdownNow();
503    }
504    return new Pair<Path, FSDataOutputStream>(callable.getHbckLockPath(), stream);
505  }
506
507  private void unlockHbck() {
508    if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) {
509      RetryCounter retryCounter = lockFileRetryCounterFactory.create();
510      do {
511        try {
512          IOUtils.closeQuietly(hbckOutFd);
513          CommonFSUtils.delete(CommonFSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
514          LOG.info("Finishing hbck");
515          return;
516        } catch (IOException ioe) {
517          LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
518              + (retryCounter.getAttemptTimes() + 1) + " of "
519              + retryCounter.getMaxAttempts());
520          LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
521          try {
522            retryCounter.sleepUntilNextRetry();
523          } catch (InterruptedException ie) {
524            Thread.currentThread().interrupt();
525            LOG.warn("Interrupted while deleting lock file" +
526                HBCK_LOCK_PATH);
527            return;
528          }
529        }
530      } while (retryCounter.shouldRetry());
531    }
532  }
533
534  /**
535   * To repair region consistency, one must call connect() in order to repair
536   * online state.
537   */
538  public void connect() throws IOException {
539
540    if (isExclusive()) {
541      // Grab the lock
542      Pair<Path, FSDataOutputStream> pair =
543          checkAndMarkRunningHbck(getConf(), this.lockFileRetryCounterFactory.create());
544      HBCK_LOCK_PATH = pair.getFirst();
545      this.hbckOutFd = pair.getSecond();
546      if (hbckOutFd == null) {
547        setRetCode(-1);
548        LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " +
549            "[If you are sure no other instance is running, delete the lock file " +
550            HBCK_LOCK_PATH + " and rerun the tool]");
551        throw new IOException("Duplicate hbck - Abort");
552      }
553
554      // Make sure to cleanup the lock
555      hbckLockCleanup.set(true);
556    }
557
558
559    // Add a shutdown hook to this thread, in case user tries to
560    // kill the hbck with a ctrl-c, we want to cleanup the lock so that
561    // it is available for further calls
562    Runtime.getRuntime().addShutdownHook(new Thread() {
563      @Override
564      public void run() {
565        IOUtils.closeQuietly(HBaseFsck.this);
566        cleanupHbckZnode();
567        unlockHbck();
568      }
569    });
570
571    LOG.info("Launching hbck");
572
573    connection = ConnectionFactory.createConnection(getConf());
574    admin = connection.getAdmin();
575    meta = connection.getTable(TableName.META_TABLE_NAME);
576    status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS,
577      Option.DEAD_SERVERS, Option.MASTER, Option.BACKUP_MASTERS,
578      Option.REGIONS_IN_TRANSITION, Option.HBASE_VERSION));
579  }
580
581  /**
582   * Get deployed regions according to the region servers.
583   */
584  private void loadDeployedRegions() throws IOException, InterruptedException {
585    // From the master, get a list of all known live region servers
586    Collection<ServerName> regionServers = status.getLiveServerMetrics().keySet();
587    errors.print("Number of live region servers: " + regionServers.size());
588    if (details) {
589      for (ServerName rsinfo: regionServers) {
590        errors.print("  " + rsinfo.getServerName());
591      }
592    }
593
594    // From the master, get a list of all dead region servers
595    Collection<ServerName> deadRegionServers = status.getDeadServerNames();
596    errors.print("Number of dead region servers: " + deadRegionServers.size());
597    if (details) {
598      for (ServerName name: deadRegionServers) {
599        errors.print("  " + name);
600      }
601    }
602
603    // Print the current master name and state
604    errors.print("Master: " + status.getMasterName());
605
606    // Print the list of all backup masters
607    Collection<ServerName> backupMasters = status.getBackupMasterNames();
608    errors.print("Number of backup masters: " + backupMasters.size());
609    if (details) {
610      for (ServerName name: backupMasters) {
611        errors.print("  " + name);
612      }
613    }
614
615    errors.print("Average load: " + status.getAverageLoad());
616    errors.print("Number of requests: " + status.getRequestCount());
617    errors.print("Number of regions: " + status.getRegionCount());
618
619    List<RegionState> rits = status.getRegionStatesInTransition();
620    errors.print("Number of regions in transition: " + rits.size());
621    if (details) {
622      for (RegionState state: rits) {
623        errors.print("  " + state.toDescriptiveString());
624      }
625    }
626
627    // Determine what's deployed
628    processRegionServers(regionServers);
629  }
630
631  /**
632   * Clear the current state of hbck.
633   */
634  private void clearState() {
635    // Make sure regionInfo is empty before starting
636    fixes = 0;
637    regionInfoMap.clear();
638    emptyRegionInfoQualifiers.clear();
639    tableStates.clear();
640    errors.clear();
641    tablesInfo.clear();
642    orphanHdfsDirs.clear();
643    skippedRegions.clear();
644  }
645
646  /**
647   * This repair method analyzes hbase data in hdfs and repairs it to satisfy
648   * the table integrity rules.  HBase doesn't need to be online for this
649   * operation to work.
650   */
651  public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
652    // Initial pass to fix orphans.
653    if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
654        || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
655      LOG.info("Loading regioninfos HDFS");
656      // if nothing is happening this should always complete in two iterations.
657      int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
658      int curIter = 0;
659      do {
660        clearState(); // clears hbck state and reset fixes to 0 and.
661        // repair what's on HDFS
662        restoreHdfsIntegrity();
663        curIter++;// limit the number of iterations.
664      } while (fixes > 0 && curIter <= maxIterations);
665
666      // Repairs should be done in the first iteration and verification in the second.
667      // If there are more than 2 passes, something funny has happened.
668      if (curIter > 2) {
669        if (curIter == maxIterations) {
670          LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
671              + "Tables integrity may not be fully repaired!");
672        } else {
673          LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
674        }
675      }
676    }
677  }
678
679  /**
680   * This repair method requires the cluster to be online since it contacts
681   * region servers and the masters.  It makes each region's state in HDFS, in
682   * hbase:meta, and deployments consistent.
683   *
684   * @return If &gt; 0 , number of errors detected, if &lt; 0 there was an unrecoverable
685   *     error.  If 0, we have a clean hbase.
686   */
687  public int onlineConsistencyRepair() throws IOException, KeeperException,
688    InterruptedException {
689
690    // get regions according to what is online on each RegionServer
691    loadDeployedRegions();
692    // check whether hbase:meta is deployed and online
693    recordMetaRegion();
694    // Check if hbase:meta is found only once and in the right place
695    if (!checkMetaRegion()) {
696      String errorMsg = "hbase:meta table is not consistent. ";
697      if (shouldFixAssignments()) {
698        errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
699      } else {
700        errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
701      }
702      errors.reportError(errorMsg + " Exiting...");
703      return -2;
704    }
705    // Not going with further consistency check for tables when hbase:meta itself is not consistent.
706    LOG.info("Loading regionsinfo from the hbase:meta table");
707    boolean success = loadMetaEntries();
708    if (!success) return -1;
709
710    // Empty cells in hbase:meta?
711    reportEmptyMetaCells();
712
713    // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
714    if (shouldFixEmptyMetaCells()) {
715      fixEmptyMetaCells();
716    }
717
718    // get a list of all tables that have not changed recently.
719    if (!checkMetaOnly) {
720      reportTablesInFlux();
721    }
722
723    // Get disabled tables states
724    loadTableStates();
725
726    // load regiondirs and regioninfos from HDFS
727    if (shouldCheckHdfs()) {
728      LOG.info("Loading region directories from HDFS");
729      loadHdfsRegionDirs();
730      LOG.info("Loading region information from HDFS");
731      loadHdfsRegionInfos();
732    }
733
734    // fix the orphan tables
735    fixOrphanTables();
736
737    LOG.info("Checking and fixing region consistency");
738    // Check and fix consistency
739    checkAndFixConsistency();
740
741    // Check integrity (does not fix)
742    checkIntegrity();
743    return errors.getErrorList().size();
744  }
745
746  /**
747   * This method maintains an ephemeral znode. If the creation fails we return false or throw
748   * exception
749   *
750   * @return true if creating znode succeeds; false otherwise
751   * @throws IOException if IO failure occurs
752   */
753  private boolean setMasterInMaintenanceMode() throws IOException {
754    RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
755    hbckEphemeralNodePath = ZNodePaths.joinZNode(
756      zkw.getZNodePaths().masterMaintZNode,
757      "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
758    do {
759      try {
760        hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
761        if (hbckZodeCreated) {
762          break;
763        }
764      } catch (KeeperException e) {
765        if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
766           throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
767        }
768        // fall through and retry
769      }
770
771      LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try=" +
772          (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
773
774      try {
775        retryCounter.sleepUntilNextRetry();
776      } catch (InterruptedException ie) {
777        throw (InterruptedIOException) new InterruptedIOException(
778              "Can't create znode " + hbckEphemeralNodePath).initCause(ie);
779      }
780    } while (retryCounter.shouldRetry());
781    return hbckZodeCreated;
782  }
783
784  private void cleanupHbckZnode() {
785    try {
786      if (zkw != null && hbckZodeCreated) {
787        ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
788        hbckZodeCreated = false;
789      }
790    } catch (KeeperException e) {
791      // Ignore
792      if (!e.code().equals(KeeperException.Code.NONODE)) {
793        LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
794      }
795    }
796  }
797
798  /**
799   * Contacts the master and prints out cluster-wide information
800   * @return 0 on success, non-zero on failure
801   */
802  public int onlineHbck()
803      throws IOException, KeeperException, InterruptedException, ReplicationException {
804    // print hbase server version
805    errors.print("Version: " + status.getHBaseVersion());
806
807    // Clean start
808    clearState();
809    // Do offline check and repair first
810    offlineHdfsIntegrityRepair();
811    offlineReferenceFileRepair();
812    offlineHLinkFileRepair();
813    // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
814    // hbck, it is likely that hbck would be misled and report transient errors.  Therefore, it
815    // is better to set Master into maintenance mode during online hbck.
816    //
817    if (!setMasterInMaintenanceMode()) {
818      LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
819        + "error.  Please run HBCK multiple times to reduce the chance of transient error.");
820    }
821
822    onlineConsistencyRepair();
823
824    if (checkRegionBoundaries) {
825      checkRegionBoundaries();
826    }
827
828    checkAndFixReplication();
829
830    cleanReplicationBarrier();
831
832    // Remove the hbck znode
833    cleanupHbckZnode();
834
835    // Remove the hbck lock
836    unlockHbck();
837
838    // Print table summary
839    printTableSummary(tablesInfo);
840    return errors.summarize();
841  }
842
843  public static byte[] keyOnly(byte[] b) {
844    if (b == null)
845      return b;
846    int rowlength = Bytes.toShort(b, 0);
847    byte[] result = new byte[rowlength];
848    System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
849    return result;
850  }
851
852  @Override
853  public void close() throws IOException {
854    try {
855      cleanupHbckZnode();
856      unlockHbck();
857    } catch (Exception io) {
858      LOG.warn(io.toString(), io);
859    } finally {
860      if (zkw != null) {
861        zkw.close();
862        zkw = null;
863      }
864      IOUtils.closeQuietly(admin);
865      IOUtils.closeQuietly(meta);
866      IOUtils.closeQuietly(connection);
867    }
868  }
869
870  private static class RegionBoundariesInformation {
871    public byte [] regionName;
872    public byte [] metaFirstKey;
873    public byte [] metaLastKey;
874    public byte [] storesFirstKey;
875    public byte [] storesLastKey;
876    @Override
877    public String toString () {
878      return "regionName=" + Bytes.toStringBinary(regionName) +
879             "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
880             "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
881             "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
882             "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
883    }
884  }
885
886  public void checkRegionBoundaries() {
887    try {
888      ByteArrayComparator comparator = new ByteArrayComparator();
889      List<RegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
890      final RegionBoundariesInformation currentRegionBoundariesInformation =
891          new RegionBoundariesInformation();
892      Path hbaseRoot = CommonFSUtils.getRootDir(getConf());
893      for (RegionInfo regionInfo : regions) {
894        Path tableDir = CommonFSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
895        currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
896        // For each region, get the start and stop key from the META and compare them to the
897        // same information from the Stores.
898        Path path = new Path(tableDir, regionInfo.getEncodedName());
899        FileSystem fs = path.getFileSystem(getConf());
900        FileStatus[] files = fs.listStatus(path);
901        // For all the column families in this region...
902        byte[] storeFirstKey = null;
903        byte[] storeLastKey = null;
904        for (FileStatus file : files) {
905          String fileName = file.getPath().toString();
906          fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
907          if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
908            FileStatus[] storeFiles = fs.listStatus(file.getPath());
909            // For all the stores in this column family.
910            for (FileStatus storeFile : storeFiles) {
911              HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(),
912                CacheConfig.DISABLED, true, getConf());
913              if ((reader.getFirstKey() != null)
914                  && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
915                      ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey()) > 0))) {
916                storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey().get()).getKey();
917              }
918              if ((reader.getLastKey() != null)
919                  && ((storeLastKey == null) || (comparator.compare(storeLastKey,
920                      ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey())) < 0)) {
921                storeLastKey = ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey();
922              }
923              reader.close();
924            }
925          }
926        }
927        currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
928        currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
929        currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
930        currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
931        if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
932          currentRegionBoundariesInformation.metaFirstKey = null;
933        if (currentRegionBoundariesInformation.metaLastKey.length == 0)
934          currentRegionBoundariesInformation.metaLastKey = null;
935
936        // For a region to be correct, we need the META start key to be smaller or equal to the
937        // smallest start key from all the stores, and the start key from the next META entry to
938        // be bigger than the last key from all the current stores. First region start key is null;
939        // Last region end key is null; some regions can be empty and not have any store.
940
941        boolean valid = true;
942        // Checking start key.
943        if ((currentRegionBoundariesInformation.storesFirstKey != null)
944            && (currentRegionBoundariesInformation.metaFirstKey != null)) {
945          valid = valid
946              && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
947                currentRegionBoundariesInformation.metaFirstKey) >= 0;
948        }
949        // Checking stop key.
950        if ((currentRegionBoundariesInformation.storesLastKey != null)
951            && (currentRegionBoundariesInformation.metaLastKey != null)) {
952          valid = valid
953              && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
954                currentRegionBoundariesInformation.metaLastKey) < 0;
955        }
956        if (!valid) {
957          errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
958            tablesInfo.get(regionInfo.getTable()));
959          LOG.warn("Region's boundaries not aligned between stores and META for:");
960          LOG.warn(Objects.toString(currentRegionBoundariesInformation));
961        }
962      }
963    } catch (IOException e) {
964      LOG.error(e.toString(), e);
965    }
966  }
967
968  /**
969   * Iterates through the list of all orphan/invalid regiondirs.
970   */
971  private void adoptHdfsOrphans(Collection<HbckRegionInfo> orphanHdfsDirs) throws IOException {
972    for (HbckRegionInfo hi : orphanHdfsDirs) {
973      LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
974      adoptHdfsOrphan(hi);
975    }
976  }
977
978  /**
979   * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
980   * these orphans by creating a new region, and moving the column families,
981   * recovered edits, WALs, into the new region dir.  We determine the region
982   * startkey and endkeys by looking at all of the hfiles inside the column
983   * families to identify the min and max keys. The resulting region will
984   * likely violate table integrity but will be dealt with by merging
985   * overlapping regions.
986   */
987  @SuppressWarnings("deprecation")
988  private void adoptHdfsOrphan(HbckRegionInfo hi) throws IOException {
989    Path p = hi.getHdfsRegionDir();
990    FileSystem fs = p.getFileSystem(getConf());
991    FileStatus[] dirs = fs.listStatus(p);
992    if (dirs == null) {
993      LOG.warn("Attempt to adopt orphan hdfs region skipped because no files present in " +
994          p + ". This dir could probably be deleted.");
995      return ;
996    }
997
998    TableName tableName = hi.getTableName();
999    HbckTableInfo tableInfo = tablesInfo.get(tableName);
1000    Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
1001    TableDescriptor template = tableInfo.getTableDescriptor();
1002
1003    // find min and max key values
1004    Pair<byte[],byte[]> orphanRegionRange = null;
1005    for (FileStatus cf : dirs) {
1006      String cfName= cf.getPath().getName();
1007      // TODO Figure out what the special dirs are
1008      if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
1009
1010      FileStatus[] hfiles = fs.listStatus(cf.getPath());
1011      for (FileStatus hfile : hfiles) {
1012        byte[] start, end;
1013        HFile.Reader hf = null;
1014        try {
1015          hf = HFile.createReader(fs, hfile.getPath(), CacheConfig.DISABLED, true, getConf());
1016          Optional<Cell> startKv = hf.getFirstKey();
1017          start = CellUtil.cloneRow(startKv.get());
1018          Optional<Cell> endKv = hf.getLastKey();
1019          end = CellUtil.cloneRow(endKv.get());
1020        } catch (IOException ioe) {
1021          LOG.warn("Problem reading orphan file " + hfile + ", skipping");
1022          continue;
1023        } catch (NullPointerException ioe) {
1024          LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
1025          continue;
1026        } finally {
1027          if (hf != null) {
1028            hf.close();
1029          }
1030        }
1031
1032        // expand the range to include the range of all hfiles
1033        if (orphanRegionRange == null) {
1034          // first range
1035          orphanRegionRange = new Pair<>(start, end);
1036        } else {
1037          // TODO add test
1038
1039          // expand range only if the hfile is wider.
1040          if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
1041            orphanRegionRange.setFirst(start);
1042          }
1043          if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
1044            orphanRegionRange.setSecond(end);
1045          }
1046        }
1047      }
1048    }
1049    if (orphanRegionRange == null) {
1050      LOG.warn("No data in dir " + p + ", sidelining data");
1051      fixes++;
1052      sidelineRegionDir(fs, hi);
1053      return;
1054    }
1055    LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
1056        Bytes.toString(orphanRegionRange.getSecond()) + ")");
1057
1058    // create new region on hdfs. move data into place.
1059    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(template.getTableName())
1060        .setStartKey(orphanRegionRange.getFirst())
1061        .setEndKey(Bytes.add(orphanRegionRange.getSecond(), new byte[1]))
1062        .build();
1063    LOG.info("Creating new region : " + regionInfo);
1064    HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), regionInfo, template);
1065    Path target = region.getRegionFileSystem().getRegionDir();
1066
1067    // rename all the data to new region
1068    mergeRegionDirs(target, hi);
1069    fixes++;
1070  }
1071
1072  /**
1073   * This method determines if there are table integrity errors in HDFS.  If
1074   * there are errors and the appropriate "fix" options are enabled, the method
1075   * will first correct orphan regions making them into legit regiondirs, and
1076   * then reload to merge potentially overlapping regions.
1077   *
1078   * @return number of table integrity errors found
1079   */
1080  private int restoreHdfsIntegrity() throws IOException, InterruptedException {
1081    // Determine what's on HDFS
1082    LOG.info("Loading HBase regioninfo from HDFS...");
1083    loadHdfsRegionDirs(); // populating regioninfo table.
1084
1085    int errs = errors.getErrorList().size();
1086    // First time just get suggestions.
1087    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1088    checkHdfsIntegrity(false, false);
1089
1090    if (errors.getErrorList().size() == errs) {
1091      LOG.info("No integrity errors.  We are done with this phase. Glorious.");
1092      return 0;
1093    }
1094
1095    if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
1096      adoptHdfsOrphans(orphanHdfsDirs);
1097      // TODO optimize by incrementally adding instead of reloading.
1098    }
1099
1100    // Make sure there are no holes now.
1101    if (shouldFixHdfsHoles()) {
1102      clearState(); // this also resets # fixes.
1103      loadHdfsRegionDirs();
1104      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1105      tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1106    }
1107
1108    // Now we fix overlaps
1109    if (shouldFixHdfsOverlaps()) {
1110      // second pass we fix overlaps.
1111      clearState(); // this also resets # fixes.
1112      loadHdfsRegionDirs();
1113      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1114      tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1115    }
1116
1117    return errors.getErrorList().size();
1118  }
1119
1120  /**
1121   * Scan all the store file names to find any lingering reference files,
1122   * which refer to some none-exiting files. If "fix" option is enabled,
1123   * any lingering reference file will be sidelined if found.
1124   * <p>
1125   * Lingering reference file prevents a region from opening. It has to
1126   * be fixed before a cluster can start properly.
1127   */
1128  private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1129    clearState();
1130    Configuration conf = getConf();
1131    Path hbaseRoot = CommonFSUtils.getRootDir(conf);
1132    FileSystem fs = hbaseRoot.getFileSystem(conf);
1133    LOG.info("Computing mapping of all store files");
1134    Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1135      new FSUtils.ReferenceFileFilter(fs), executor, errors);
1136    errors.print("");
1137    LOG.info("Validating mapping using HDFS state");
1138    for (Path path: allFiles.values()) {
1139      Path referredToFile = StoreFileInfo.getReferredToFile(path);
1140      if (fs.exists(referredToFile)) continue;  // good, expected
1141
1142      // Found a lingering reference file
1143      errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1144        "Found lingering reference file " + path);
1145      if (!shouldFixReferenceFiles()) continue;
1146
1147      // Now, trying to fix it since requested
1148      boolean success = false;
1149      String pathStr = path.toString();
1150
1151      // A reference file path should be like
1152      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1153      // Up 5 directories to get the root folder.
1154      // So the file will be sidelined to a similar folder structure.
1155      int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1156      for (int i = 0; index > 0 && i < 5; i++) {
1157        index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1158      }
1159      if (index > 0) {
1160        Path rootDir = getSidelineDir();
1161        Path dst = new Path(rootDir, pathStr.substring(index + 1));
1162        fs.mkdirs(dst.getParent());
1163        LOG.info("Trying to sideline reference file "
1164          + path + " to " + dst);
1165        setShouldRerun();
1166
1167        success = fs.rename(path, dst);
1168        debugLsr(dst);
1169
1170      }
1171      if (!success) {
1172        LOG.error("Failed to sideline reference file " + path);
1173      }
1174    }
1175  }
1176
1177  /**
1178   * Scan all the store file names to find any lingering HFileLink files,
1179   * which refer to some none-exiting files. If "fix" option is enabled,
1180   * any lingering HFileLink file will be sidelined if found.
1181   */
1182  private void offlineHLinkFileRepair() throws IOException, InterruptedException {
1183    Configuration conf = getConf();
1184    Path hbaseRoot = CommonFSUtils.getRootDir(conf);
1185    FileSystem fs = hbaseRoot.getFileSystem(conf);
1186    LOG.info("Computing mapping of all link files");
1187    Map<String, Path> allFiles = FSUtils
1188        .getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors);
1189    errors.print("");
1190
1191    LOG.info("Validating mapping using HDFS state");
1192    for (Path path : allFiles.values()) {
1193      // building HFileLink object to gather locations
1194      HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
1195      if (actualLink.exists(fs)) continue; // good, expected
1196
1197      // Found a lingering HFileLink
1198      errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
1199      if (!shouldFixHFileLinks()) continue;
1200
1201      // Now, trying to fix it since requested
1202      setShouldRerun();
1203
1204      // An HFileLink path should be like
1205      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
1206      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1207      boolean success = sidelineFile(fs, hbaseRoot, path);
1208
1209      if (!success) {
1210        LOG.error("Failed to sideline HFileLink file " + path);
1211      }
1212
1213      // An HFileLink backreference path should be like
1214      // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
1215      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1216      Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil
1217              .getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()),
1218                  HFileLink.getReferencedRegionName(path.getName().toString()),
1219                  path.getParent().getName()),
1220          HFileLink.getReferencedHFileName(path.getName().toString()));
1221      success = sidelineFile(fs, hbaseRoot, backRefPath);
1222
1223      if (!success) {
1224        LOG.error("Failed to sideline HFileLink backreference file " + path);
1225      }
1226    }
1227  }
1228
1229  private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
1230    URI uri = hbaseRoot.toUri().relativize(path.toUri());
1231    if (uri.isAbsolute()) return false;
1232    String relativePath = uri.getPath();
1233    Path rootDir = getSidelineDir();
1234    Path dst = new Path(rootDir, relativePath);
1235    boolean pathCreated = fs.mkdirs(dst.getParent());
1236    if (!pathCreated) {
1237      LOG.error("Failed to create path: " + dst.getParent());
1238      return false;
1239    }
1240    LOG.info("Trying to sideline file " + path + " to " + dst);
1241    return fs.rename(path, dst);
1242  }
1243
1244  /**
1245   * TODO -- need to add tests for this.
1246   */
1247  private void reportEmptyMetaCells() {
1248    errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1249      emptyRegionInfoQualifiers.size());
1250    if (details) {
1251      for (Result r: emptyRegionInfoQualifiers) {
1252        errors.print("  " + r);
1253      }
1254    }
1255  }
1256
1257  /**
1258   * TODO -- need to add tests for this.
1259   */
1260  private void reportTablesInFlux() {
1261    AtomicInteger numSkipped = new AtomicInteger(0);
1262    TableDescriptor[] allTables = getTables(numSkipped);
1263    errors.print("Number of Tables: " + allTables.length);
1264    if (details) {
1265      if (numSkipped.get() > 0) {
1266        errors.detail("Number of Tables in flux: " + numSkipped.get());
1267      }
1268      for (TableDescriptor td : allTables) {
1269        errors.detail("  Table: " + td.getTableName() + "\t" +
1270                           (td.isReadOnly() ? "ro" : "rw") + "\t" +
1271                            (td.isMetaRegion() ? "META" : "    ") + "\t" +
1272                           " families: " + td.getColumnFamilyCount());
1273      }
1274    }
1275  }
1276
1277  public HbckErrorReporter getErrors() {
1278    return errors;
1279  }
1280
1281  /**
1282   * Populate hbi's from regionInfos loaded from file system.
1283   */
1284  private SortedMap<TableName, HbckTableInfo> loadHdfsRegionInfos()
1285      throws IOException, InterruptedException {
1286    tablesInfo.clear(); // regenerating the data
1287    // generate region split structure
1288    Collection<HbckRegionInfo> hbckRegionInfos = regionInfoMap.values();
1289
1290    // Parallelized read of .regioninfo files.
1291    List<WorkItemHdfsRegionInfo> hbis = new ArrayList<>(hbckRegionInfos.size());
1292    List<Future<Void>> hbiFutures;
1293
1294    for (HbckRegionInfo hbi : hbckRegionInfos) {
1295      WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1296      hbis.add(work);
1297    }
1298
1299    // Submit and wait for completion
1300    hbiFutures = executor.invokeAll(hbis);
1301
1302    for(int i=0; i<hbiFutures.size(); i++) {
1303      WorkItemHdfsRegionInfo work = hbis.get(i);
1304      Future<Void> f = hbiFutures.get(i);
1305      try {
1306        f.get();
1307      } catch(ExecutionException e) {
1308        LOG.warn("Failed to read .regioninfo file for region " +
1309              work.hbi.getRegionNameAsString(), e.getCause());
1310      }
1311    }
1312
1313    Path hbaseRoot = CommonFSUtils.getRootDir(getConf());
1314    FileSystem fs = hbaseRoot.getFileSystem(getConf());
1315    // serialized table info gathering.
1316    for (HbckRegionInfo hbi: hbckRegionInfos) {
1317
1318      if (hbi.getHdfsHRI() == null) {
1319        // was an orphan
1320        continue;
1321      }
1322
1323
1324      // get table name from hdfs, populate various HBaseFsck tables.
1325      TableName tableName = hbi.getTableName();
1326      if (tableName == null) {
1327        // There was an entry in hbase:meta not in the HDFS?
1328        LOG.warn("tableName was null for: " + hbi);
1329        continue;
1330      }
1331
1332      HbckTableInfo modTInfo = tablesInfo.get(tableName);
1333      if (modTInfo == null) {
1334        // only executed once per table.
1335        modTInfo = new HbckTableInfo(tableName, this);
1336        tablesInfo.put(tableName, modTInfo);
1337        try {
1338          TableDescriptor htd =
1339              FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1340          modTInfo.htds.add(htd);
1341        } catch (IOException ioe) {
1342          if (!orphanTableDirs.containsKey(tableName)) {
1343            LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1344            //should only report once for each table
1345            errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1346                "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1347            Set<String> columns = new HashSet<>();
1348            orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1349          }
1350        }
1351      }
1352      if (!hbi.isSkipChecks()) {
1353        modTInfo.addRegionInfo(hbi);
1354      }
1355    }
1356
1357    loadTableInfosForTablesWithNoRegion();
1358    errors.print("");
1359
1360    return tablesInfo;
1361  }
1362
1363  /**
1364   * To get the column family list according to the column family dirs
1365   * @param columns
1366   * @param hbi
1367   * @return a set of column families
1368   * @throws IOException
1369   */
1370  private Set<String> getColumnFamilyList(Set<String> columns, HbckRegionInfo hbi)
1371      throws IOException {
1372    Path regionDir = hbi.getHdfsRegionDir();
1373    FileSystem fs = regionDir.getFileSystem(getConf());
1374    FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1375    for (FileStatus subdir : subDirs) {
1376      String columnfamily = subdir.getPath().getName();
1377      columns.add(columnfamily);
1378    }
1379    return columns;
1380  }
1381
1382  /**
1383   * To fabricate a .tableinfo file with following contents<br>
1384   * 1. the correct tablename <br>
1385   * 2. the correct colfamily list<br>
1386   * 3. the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1387   * @throws IOException
1388   */
1389  private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1390      Set<String> columns) throws IOException {
1391    if (columns ==null || columns.isEmpty()) return false;
1392    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
1393    for (String columnfamimly : columns) {
1394      builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(columnfamimly));
1395    }
1396    fstd.createTableDescriptor(builder.build(), true);
1397    return true;
1398  }
1399
1400  /**
1401   * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1402   * @throws IOException
1403   */
1404  public void fixEmptyMetaCells() throws IOException {
1405    if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1406      LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1407      for (Result region : emptyRegionInfoQualifiers) {
1408        deleteMetaRegion(region.getRow());
1409        errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1410      }
1411      emptyRegionInfoQualifiers.clear();
1412    }
1413  }
1414
1415  /**
1416   * To fix orphan table by creating a .tableinfo file under tableDir <br>
1417   * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1418   * 2. else create a default .tableinfo file with following items<br>
1419   * &nbsp;2.1 the correct tablename <br>
1420   * &nbsp;2.2 the correct colfamily list<br>
1421   * &nbsp;2.3 the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1422   * @throws IOException
1423   */
1424  public void fixOrphanTables() throws IOException {
1425    if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1426
1427      List<TableName> tmpList = new ArrayList<>(orphanTableDirs.keySet().size());
1428      tmpList.addAll(orphanTableDirs.keySet());
1429      TableDescriptor[] htds = getTableDescriptors(tmpList);
1430      Iterator<Entry<TableName, Set<String>>> iter =
1431          orphanTableDirs.entrySet().iterator();
1432      int j = 0;
1433      int numFailedCase = 0;
1434      FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1435      while (iter.hasNext()) {
1436        Entry<TableName, Set<String>> entry =
1437            iter.next();
1438        TableName tableName = entry.getKey();
1439        LOG.info("Trying to fix orphan table error: " + tableName);
1440        if (j < htds.length) {
1441          if (tableName.equals(htds[j].getTableName())) {
1442            TableDescriptor htd = htds[j];
1443            LOG.info("fixing orphan table: " + tableName + " from cache");
1444            fstd.createTableDescriptor(htd, true);
1445            j++;
1446            iter.remove();
1447          }
1448        } else {
1449          if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1450            LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1451            LOG.warn("Strongly recommend to modify the TableDescriptor if necessary for: " + tableName);
1452            iter.remove();
1453          } else {
1454            LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1455            numFailedCase++;
1456          }
1457        }
1458        fixes++;
1459      }
1460
1461      if (orphanTableDirs.isEmpty()) {
1462        // all orphanTableDirs are luckily recovered
1463        // re-run doFsck after recovering the .tableinfo file
1464        setShouldRerun();
1465        LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1466      } else if (numFailedCase > 0) {
1467        LOG.error("Failed to fix " + numFailedCase
1468            + " OrphanTables with default .tableinfo files");
1469      }
1470
1471    }
1472    //cleanup the list
1473    orphanTableDirs.clear();
1474
1475  }
1476
1477  /**
1478   * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1479   */
1480  private void logParallelMerge() {
1481    if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1482      LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1483          " false to run serially.");
1484    } else {
1485      LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
1486          " true to run in parallel.");
1487    }
1488  }
1489
1490  private SortedMap<TableName, HbckTableInfo> checkHdfsIntegrity(boolean fixHoles,
1491      boolean fixOverlaps) throws IOException {
1492    LOG.info("Checking HBase region split map from HDFS data...");
1493    logParallelMerge();
1494    for (HbckTableInfo tInfo : tablesInfo.values()) {
1495      TableIntegrityErrorHandler handler;
1496      if (fixHoles || fixOverlaps) {
1497        handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1498          fixHoles, fixOverlaps);
1499      } else {
1500        handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1501      }
1502      if (!tInfo.checkRegionChain(handler)) {
1503        // should dump info as well.
1504        errors.report("Found inconsistency in table " + tInfo.getName());
1505      }
1506    }
1507    return tablesInfo;
1508  }
1509
1510  Path getSidelineDir() throws IOException {
1511    if (sidelineDir == null) {
1512      Path hbaseDir = CommonFSUtils.getRootDir(getConf());
1513      Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1514      sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1515          + startMillis);
1516    }
1517    return sidelineDir;
1518  }
1519
1520  /**
1521   * Sideline a region dir (instead of deleting it)
1522   */
1523  Path sidelineRegionDir(FileSystem fs, HbckRegionInfo hi) throws IOException {
1524    return sidelineRegionDir(fs, null, hi);
1525  }
1526
1527  /**
1528   * Sideline a region dir (instead of deleting it)
1529   *
1530   * @param parentDir if specified, the region will be sidelined to folder like
1531   *     {@literal .../parentDir/<table name>/<region name>}. The purpose is to group together
1532   *     similar regions sidelined, for example, those regions should be bulk loaded back later
1533   *     on. If NULL, it is ignored.
1534   */
1535  Path sidelineRegionDir(FileSystem fs,
1536      String parentDir, HbckRegionInfo hi) throws IOException {
1537    TableName tableName = hi.getTableName();
1538    Path regionDir = hi.getHdfsRegionDir();
1539
1540    if (!fs.exists(regionDir)) {
1541      LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1542      return null;
1543    }
1544
1545    Path rootDir = getSidelineDir();
1546    if (parentDir != null) {
1547      rootDir = new Path(rootDir, parentDir);
1548    }
1549    Path sidelineTableDir= CommonFSUtils.getTableDir(rootDir, tableName);
1550    Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1551    fs.mkdirs(sidelineRegionDir);
1552    boolean success = false;
1553    FileStatus[] cfs =  fs.listStatus(regionDir);
1554    if (cfs == null) {
1555      LOG.info("Region dir is empty: " + regionDir);
1556    } else {
1557      for (FileStatus cf : cfs) {
1558        Path src = cf.getPath();
1559        Path dst =  new Path(sidelineRegionDir, src.getName());
1560        if (fs.isFile(src)) {
1561          // simple file
1562          success = fs.rename(src, dst);
1563          if (!success) {
1564            String msg = "Unable to rename file " + src +  " to " + dst;
1565            LOG.error(msg);
1566            throw new IOException(msg);
1567          }
1568          continue;
1569        }
1570
1571        // is a directory.
1572        fs.mkdirs(dst);
1573
1574        LOG.info("Sidelining files from " + src + " into containing region " + dst);
1575        // FileSystem.rename is inconsistent with directories -- if the
1576        // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1577        // it moves the src into the dst dir resulting in (foo/a/b).  If
1578        // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1579        FileStatus[] hfiles = fs.listStatus(src);
1580        if (hfiles != null && hfiles.length > 0) {
1581          for (FileStatus hfile : hfiles) {
1582            success = fs.rename(hfile.getPath(), dst);
1583            if (!success) {
1584              String msg = "Unable to rename file " + src +  " to " + dst;
1585              LOG.error(msg);
1586              throw new IOException(msg);
1587            }
1588          }
1589        }
1590        LOG.debug("Sideline directory contents:");
1591        debugLsr(sidelineRegionDir);
1592      }
1593    }
1594
1595    LOG.info("Removing old region dir: " + regionDir);
1596    success = fs.delete(regionDir, true);
1597    if (!success) {
1598      String msg = "Unable to delete dir " + regionDir;
1599      LOG.error(msg);
1600      throw new IOException(msg);
1601    }
1602    return sidelineRegionDir;
1603  }
1604
1605  /**
1606   * Load the list of disabled tables in ZK into local set.
1607   * @throws ZooKeeperConnectionException
1608   * @throws IOException
1609   */
1610  private void loadTableStates()
1611  throws IOException {
1612    tableStates = MetaTableAccessor.getTableStates(connection);
1613    // Add hbase:meta so this tool keeps working. In hbase2, meta is always enabled though it
1614    // has no entry in the table states. HBCK doesn't work right w/ hbase2 but just do this in
1615    // meantime.
1616    this.tableStates.put(TableName.META_TABLE_NAME,
1617        new TableState(TableName.META_TABLE_NAME, TableState.State.ENABLED));
1618  }
1619
1620  /**
1621   * Check if the specified region's table is disabled.
1622   * @param tableName table to check status of
1623   */
1624  boolean isTableDisabled(TableName tableName) {
1625    return tableStates.containsKey(tableName)
1626        && tableStates.get(tableName)
1627        .inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1628  }
1629
1630  /**
1631   * Scan HDFS for all regions, recording their information into
1632   * regionInfoMap
1633   */
1634  public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1635    Path rootDir = CommonFSUtils.getRootDir(getConf());
1636    FileSystem fs = rootDir.getFileSystem(getConf());
1637
1638    // list all tables from HDFS
1639    List<FileStatus> tableDirs = Lists.newArrayList();
1640
1641    boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1642
1643    List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1644    for (Path path : paths) {
1645      TableName tableName = CommonFSUtils.getTableName(path);
1646       if ((!checkMetaOnly &&
1647           isTableIncluded(tableName)) ||
1648           tableName.equals(TableName.META_TABLE_NAME)) {
1649         tableDirs.add(fs.getFileStatus(path));
1650       }
1651    }
1652
1653    // verify that version file exists
1654    if (!foundVersionFile) {
1655      errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1656          "Version file does not exist in root dir " + rootDir);
1657      if (shouldFixVersionFile()) {
1658        LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1659            + " file.");
1660        setShouldRerun();
1661        FSUtils.setVersion(fs, rootDir, getConf().getInt(
1662            HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1663            HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1664            HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1665      }
1666    }
1667
1668    // Avoid multithreading at table-level because already multithreaded internally at
1669    // region-level.  Additionally multithreading at table-level can lead to deadlock
1670    // if there are many tables in the cluster.  Since there are a limited # of threads
1671    // in the executor's thread pool and if we multithread at the table-level by putting
1672    // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1673    // executor tied up solely in waiting for the tables' region-level calls to complete.
1674    // If there are enough tables then there will be no actual threads in the pool left
1675    // for the region-level callables to be serviced.
1676    for (FileStatus tableDir : tableDirs) {
1677      LOG.debug("Loading region dirs from " +tableDir.getPath());
1678      WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1679      try {
1680        item.call();
1681      } catch (ExecutionException e) {
1682        LOG.warn("Could not completely load table dir " +
1683            tableDir.getPath(), e.getCause());
1684      }
1685    }
1686    errors.print("");
1687  }
1688
1689  /**
1690   * Record the location of the hbase:meta region as found in ZooKeeper.
1691   */
1692  private boolean recordMetaRegion() throws IOException {
1693    List<HRegionLocation> locs;
1694    try (RegionLocator locator = connection.getRegionLocator(TableName.META_TABLE_NAME)) {
1695      locs = locator.getRegionLocations(HConstants.EMPTY_START_ROW, true);
1696    }
1697    if (locs == null || locs.isEmpty()) {
1698      errors.reportError(ERROR_CODE.NULL_META_REGION, "META region was not found in ZooKeeper");
1699      return false;
1700    }
1701    for (HRegionLocation metaLocation : locs) {
1702      // Check if Meta region is valid and existing
1703      if (metaLocation == null) {
1704        errors.reportError(ERROR_CODE.NULL_META_REGION, "META region location is null");
1705        return false;
1706      }
1707      if (metaLocation.getRegion() == null) {
1708        errors.reportError(ERROR_CODE.NULL_META_REGION, "META location regionInfo is null");
1709        return false;
1710      }
1711      if (metaLocation.getHostname() == null) {
1712        errors.reportError(ERROR_CODE.NULL_META_REGION, "META location hostName is null");
1713        return false;
1714      }
1715      ServerName sn = metaLocation.getServerName();
1716      HbckRegionInfo.MetaEntry m = new HbckRegionInfo.MetaEntry(metaLocation.getRegion(), sn,
1717          EnvironmentEdgeManager.currentTime());
1718      HbckRegionInfo hbckRegionInfo = regionInfoMap.get(metaLocation.getRegion().getEncodedName());
1719      if (hbckRegionInfo == null) {
1720        regionInfoMap.put(metaLocation.getRegion().getEncodedName(), new HbckRegionInfo(m));
1721      } else {
1722        hbckRegionInfo.setMetaEntry(m);
1723      }
1724    }
1725    return true;
1726  }
1727
1728  private ZKWatcher createZooKeeperWatcher() throws IOException {
1729    return new ZKWatcher(getConf(), "hbase Fsck", new Abortable() {
1730      @Override
1731      public void abort(String why, Throwable e) {
1732        LOG.error(why, e);
1733        System.exit(1);
1734      }
1735
1736      @Override
1737      public boolean isAborted() {
1738        return false;
1739      }
1740
1741    });
1742  }
1743
1744  /**
1745   * Contacts each regionserver and fetches metadata about regions.
1746   * @param regionServerList - the list of region servers to connect to
1747   * @throws IOException if a remote or network exception occurs
1748   */
1749  void processRegionServers(Collection<ServerName> regionServerList)
1750    throws IOException, InterruptedException {
1751
1752    List<WorkItemRegion> workItems = new ArrayList<>(regionServerList.size());
1753    List<Future<Void>> workFutures;
1754
1755    // loop to contact each region server in parallel
1756    for (ServerName rsinfo: regionServerList) {
1757      workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1758    }
1759
1760    workFutures = executor.invokeAll(workItems);
1761
1762    for(int i=0; i<workFutures.size(); i++) {
1763      WorkItemRegion item = workItems.get(i);
1764      Future<Void> f = workFutures.get(i);
1765      try {
1766        f.get();
1767      } catch(ExecutionException e) {
1768        LOG.warn("Could not process regionserver {}", item.rsinfo.getAddress(),
1769            e.getCause());
1770      }
1771    }
1772  }
1773
1774  /**
1775   * Check consistency of all regions that have been found in previous phases.
1776   */
1777  private void checkAndFixConsistency()
1778  throws IOException, KeeperException, InterruptedException {
1779    // Divide the checks in two phases. One for default/primary replicas and another
1780    // for the non-primary ones. Keeps code cleaner this way.
1781
1782    List<CheckRegionConsistencyWorkItem> workItems = new ArrayList<>(regionInfoMap.size());
1783    for (java.util.Map.Entry<String, HbckRegionInfo> e: regionInfoMap.entrySet()) {
1784      if (e.getValue().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1785        workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1786      }
1787    }
1788    checkRegionConsistencyConcurrently(workItems);
1789
1790    boolean prevHdfsCheck = shouldCheckHdfs();
1791    setCheckHdfs(false); //replicas don't have any hdfs data
1792    // Run a pass over the replicas and fix any assignment issues that exist on the currently
1793    // deployed/undeployed replicas.
1794    List<CheckRegionConsistencyWorkItem> replicaWorkItems = new ArrayList<>(regionInfoMap.size());
1795    for (java.util.Map.Entry<String, HbckRegionInfo> e: regionInfoMap.entrySet()) {
1796      if (e.getValue().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
1797        replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1798      }
1799    }
1800    checkRegionConsistencyConcurrently(replicaWorkItems);
1801    setCheckHdfs(prevHdfsCheck);
1802
1803    // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
1804    // not get accurate state of the hbase if continuing. The config here allows users to tune
1805    // the tolerance of number of skipped region.
1806    // TODO: evaluate the consequence to continue the hbck operation without config.
1807    int terminateThreshold =  getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
1808    int numOfSkippedRegions = skippedRegions.size();
1809    if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
1810      throw new IOException(numOfSkippedRegions
1811        + " region(s) could not be checked or repaired.  See logs for detail.");
1812    }
1813
1814    if (shouldCheckHdfs()) {
1815      checkAndFixTableStates();
1816    }
1817  }
1818
1819  /**
1820   * Check consistency of all regions using mulitple threads concurrently.
1821   */
1822  private void checkRegionConsistencyConcurrently(
1823    final List<CheckRegionConsistencyWorkItem> workItems)
1824    throws IOException, KeeperException, InterruptedException {
1825    if (workItems.isEmpty()) {
1826      return;  // nothing to check
1827    }
1828
1829    List<Future<Void>> workFutures = executor.invokeAll(workItems);
1830    for(Future<Void> f: workFutures) {
1831      try {
1832        f.get();
1833      } catch(ExecutionException e1) {
1834        LOG.warn("Could not check region consistency " , e1.getCause());
1835        if (e1.getCause() instanceof IOException) {
1836          throw (IOException)e1.getCause();
1837        } else if (e1.getCause() instanceof KeeperException) {
1838          throw (KeeperException)e1.getCause();
1839        } else if (e1.getCause() instanceof InterruptedException) {
1840          throw (InterruptedException)e1.getCause();
1841        } else {
1842          throw new IOException(e1.getCause());
1843        }
1844      }
1845    }
1846  }
1847
1848  class CheckRegionConsistencyWorkItem implements Callable<Void> {
1849    private final String key;
1850    private final HbckRegionInfo hbi;
1851
1852    CheckRegionConsistencyWorkItem(String key, HbckRegionInfo hbi) {
1853      this.key = key;
1854      this.hbi = hbi;
1855    }
1856
1857    @Override
1858    public synchronized Void call() throws Exception {
1859      try {
1860        checkRegionConsistency(key, hbi);
1861      } catch (Exception e) {
1862        // If the region is non-META region, skip this region and send warning/error message; if
1863        // the region is META region, we should not continue.
1864        LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()
1865          + "'.", e);
1866        if (hbi.getHdfsHRI().isMetaRegion()) {
1867          throw e;
1868        }
1869        LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
1870        addSkippedRegion(hbi);
1871      }
1872      return null;
1873    }
1874  }
1875
1876  private void addSkippedRegion(final HbckRegionInfo hbi) {
1877    Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
1878    if (skippedRegionNames == null) {
1879      skippedRegionNames = new HashSet<>();
1880    }
1881    skippedRegionNames.add(hbi.getRegionNameAsString());
1882    skippedRegions.put(hbi.getTableName(), skippedRegionNames);
1883  }
1884
1885  /**
1886   * Check and fix table states, assumes full info available:
1887   * - tableInfos
1888   * - empty tables loaded
1889   */
1890  private void checkAndFixTableStates() throws IOException {
1891    // first check dangling states
1892    for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
1893      TableName tableName = entry.getKey();
1894      TableState tableState = entry.getValue();
1895      HbckTableInfo tableInfo = tablesInfo.get(tableName);
1896      if (isTableIncluded(tableName)
1897          && !tableName.isSystemTable()
1898          && tableInfo == null) {
1899        if (fixMeta) {
1900          MetaTableAccessor.deleteTableState(connection, tableName);
1901          TableState state = MetaTableAccessor.getTableState(connection, tableName);
1902          if (state != null) {
1903            errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1904                tableName + " unable to delete dangling table state " + tableState);
1905          }
1906        } else if (!checkMetaOnly) {
1907          // dangling table state in meta if checkMetaOnly is false. If checkMetaOnly is
1908          // true, tableInfo will be null as tablesInfo are not polulated for all tables from hdfs
1909          errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1910              tableName + " has dangling table state " + tableState);
1911        }
1912      }
1913    }
1914    // check that all tables have states
1915    for (TableName tableName : tablesInfo.keySet()) {
1916      if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
1917        if (fixMeta) {
1918          MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
1919          TableState newState = MetaTableAccessor.getTableState(connection, tableName);
1920          if (newState == null) {
1921            errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1922                "Unable to change state for table " + tableName + " in meta ");
1923          }
1924        } else {
1925          errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1926              tableName + " has no state in meta ");
1927        }
1928      }
1929    }
1930  }
1931
1932  private void preCheckPermission() throws IOException {
1933    if (shouldIgnorePreCheckPermission()) {
1934      return;
1935    }
1936
1937    Path hbaseDir = CommonFSUtils.getRootDir(getConf());
1938    FileSystem fs = hbaseDir.getFileSystem(getConf());
1939    UserProvider userProvider = UserProvider.instantiate(getConf());
1940    UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1941    FileStatus[] files = fs.listStatus(hbaseDir);
1942    for (FileStatus file : files) {
1943      try {
1944        fs.access(file.getPath(), FsAction.WRITE);
1945      } catch (AccessControlException ace) {
1946        LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1947        errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1948          + " does not have write perms to " + file.getPath()
1949          + ". Please rerun hbck as hdfs user " + file.getOwner());
1950        throw ace;
1951      }
1952    }
1953  }
1954
1955  /**
1956   * Deletes region from meta table
1957   */
1958  private void deleteMetaRegion(HbckRegionInfo hi) throws IOException {
1959    deleteMetaRegion(hi.getMetaEntry().getRegionName());
1960  }
1961
1962  /**
1963   * Deletes region from meta table
1964   */
1965  private void deleteMetaRegion(byte[] metaKey) throws IOException {
1966    Delete d = new Delete(metaKey);
1967    meta.delete(d);
1968    LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1969  }
1970
1971  /**
1972   * Reset the split parent region info in meta table
1973   */
1974  private void resetSplitParent(HbckRegionInfo hi) throws IOException {
1975    RowMutations mutations = new RowMutations(hi.getMetaEntry().getRegionName());
1976    Delete d = new Delete(hi.getMetaEntry().getRegionName());
1977    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1978    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1979    mutations.add(d);
1980
1981    RegionInfo hri = RegionInfoBuilder.newBuilder(hi.getMetaEntry())
1982        .setOffline(false)
1983        .setSplit(false)
1984        .build();
1985    Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime());
1986    mutations.add(p);
1987
1988    meta.mutateRow(mutations);
1989    LOG.info("Reset split parent " + hi.getMetaEntry().getRegionNameAsString() + " in META");
1990  }
1991
1992  /**
1993   * This backwards-compatibility wrapper for permanently offlining a region
1994   * that should not be alive.  If the region server does not support the
1995   * "offline" method, it will use the closest unassign method instead.  This
1996   * will basically work until one attempts to disable or delete the affected
1997   * table.  The problem has to do with in-memory only master state, so
1998   * restarting the HMaster or failing over to another should fix this.
1999   */
2000  void offline(byte[] regionName) throws IOException {
2001    String regionString = Bytes.toStringBinary(regionName);
2002    if (!rsSupportsOffline) {
2003      LOG.warn(
2004          "Using unassign region " + regionString + " instead of using offline method, you should" +
2005              " restart HMaster after these repairs");
2006      admin.unassign(regionName, true);
2007      return;
2008    }
2009
2010    // first time we assume the rs's supports #offline.
2011    try {
2012      LOG.info("Offlining region " + regionString);
2013      admin.offline(regionName);
2014    } catch (IOException ioe) {
2015      String notFoundMsg = "java.lang.NoSuchMethodException: " +
2016          "org.apache.hadoop.hbase.master.HMaster.offline([B)";
2017      if (ioe.getMessage().contains(notFoundMsg)) {
2018        LOG.warn("Using unassign region " + regionString +
2019            " instead of using offline method, you should" +
2020            " restart HMaster after these repairs");
2021        rsSupportsOffline = false; // in the future just use unassign
2022        admin.unassign(regionName, true);
2023        return;
2024      }
2025      throw ioe;
2026    }
2027  }
2028
2029  /**
2030   * Attempts to undeploy a region from a region server based in information in
2031   * META.  Any operations that modify the file system should make sure that
2032   * its corresponding region is not deployed to prevent data races.
2033   *
2034   * A separate call is required to update the master in-memory region state
2035   * kept in the AssignementManager.  Because disable uses this state instead of
2036   * that found in META, we can't seem to cleanly disable/delete tables that
2037   * have been hbck fixed.  When used on a version of HBase that does not have
2038   * the offline ipc call exposed on the master (&lt;0.90.5, &lt;0.92.0) a master
2039   * restart or failover may be required.
2040   */
2041  void closeRegion(HbckRegionInfo hi) throws IOException, InterruptedException {
2042    if (hi.getMetaEntry() == null && hi.getHdfsEntry() == null) {
2043      undeployRegions(hi);
2044      return;
2045    }
2046
2047    // get assignment info and hregioninfo from meta.
2048    Get get = new Get(hi.getRegionName());
2049    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2050    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2051    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2052    // also get the locations of the replicas to close if the primary region is being closed
2053    if (hi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2054      int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2055      for (int i = 0; i < numReplicas; i++) {
2056        get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
2057        get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
2058      }
2059    }
2060    Result r = meta.get(get);
2061    RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2062    if (rl == null) {
2063      LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2064          " since meta does not have handle to reach it");
2065      return;
2066    }
2067    for (HRegionLocation h : rl.getRegionLocations()) {
2068      ServerName serverName = h.getServerName();
2069      if (serverName == null) {
2070        errors.reportError("Unable to close region "
2071            + hi.getRegionNameAsString() +  " because meta does not "
2072            + "have handle to reach it.");
2073        continue;
2074      }
2075      RegionInfo hri = h.getRegion();
2076      if (hri == null) {
2077        LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2078            + " because hbase:meta had invalid or missing "
2079            + HConstants.CATALOG_FAMILY_STR + ":"
2080            + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2081            + " qualifier value.");
2082        continue;
2083      }
2084      // close the region -- close files and remove assignment
2085      HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2086    }
2087  }
2088
2089  private void undeployRegions(HbckRegionInfo hi) throws IOException, InterruptedException {
2090    undeployRegionsForHbi(hi);
2091    // undeploy replicas of the region (but only if the method is invoked for the primary)
2092    if (hi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2093      return;
2094    }
2095    int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2096    for (int i = 1; i < numReplicas; i++) {
2097      if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2098      RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
2099          hi.getPrimaryHRIForDeployedReplica(), i);
2100      HbckRegionInfo h = regionInfoMap.get(hri.getEncodedName());
2101      if (h != null) {
2102        undeployRegionsForHbi(h);
2103        //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2104        //in consistency checks
2105        h.setSkipChecks(true);
2106      }
2107    }
2108  }
2109
2110  private void undeployRegionsForHbi(HbckRegionInfo hi) throws IOException, InterruptedException {
2111    for (HbckRegionInfo.OnlineEntry rse : hi.getOnlineEntries()) {
2112      LOG.debug("Undeploy region "  + rse.getRegionInfo() + " from " + rse.getServerName());
2113      try {
2114        HBaseFsckRepair
2115            .closeRegionSilentlyAndWait(connection, rse.getServerName(), rse.getRegionInfo());
2116        offline(rse.getRegionInfo().getRegionName());
2117      } catch (IOException ioe) {
2118        LOG.warn("Got exception when attempting to offline region "
2119            + Bytes.toString(rse.getRegionInfo().getRegionName()), ioe);
2120      }
2121    }
2122  }
2123
2124  private void tryAssignmentRepair(HbckRegionInfo hbi, String msg) throws IOException,
2125    KeeperException, InterruptedException {
2126    // If we are trying to fix the errors
2127    if (shouldFixAssignments()) {
2128      errors.print(msg);
2129      undeployRegions(hbi);
2130      setShouldRerun();
2131      RegionInfo hri = hbi.getHdfsHRI();
2132      if (hri == null) {
2133        hri = hbi.getMetaEntry();
2134      }
2135      HBaseFsckRepair.fixUnassigned(admin, hri);
2136      HBaseFsckRepair.waitUntilAssigned(admin, hri);
2137
2138      // also assign replicas if needed (do it only when this call operates on a primary replica)
2139      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) return;
2140      int replicationCount = admin.getDescriptor(hri.getTable()).getRegionReplication();
2141      for (int i = 1; i < replicationCount; i++) {
2142        hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2143        HbckRegionInfo h = regionInfoMap.get(hri.getEncodedName());
2144        if (h != null) {
2145          undeployRegions(h);
2146          //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2147          //in consistency checks
2148          h.setSkipChecks(true);
2149        }
2150        HBaseFsckRepair.fixUnassigned(admin, hri);
2151        HBaseFsckRepair.waitUntilAssigned(admin, hri);
2152      }
2153
2154    }
2155  }
2156
2157  /**
2158   * Check a single region for consistency and correct deployment.
2159   */
2160  private void checkRegionConsistency(final String key, final HbckRegionInfo hbi)
2161      throws IOException, KeeperException, InterruptedException {
2162
2163    if (hbi.isSkipChecks()) return;
2164    String descriptiveName = hbi.toString();
2165    boolean inMeta = hbi.getMetaEntry() != null;
2166    // In case not checking HDFS, assume the region is on HDFS
2167    boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2168    boolean hasMetaAssignment = inMeta && hbi.getMetaEntry().regionServer != null;
2169    boolean isDeployed = !hbi.getDeployedOn().isEmpty();
2170    boolean isMultiplyDeployed = hbi.getDeployedOn().size() > 1;
2171    boolean deploymentMatchesMeta =
2172      hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2173      hbi.getMetaEntry().regionServer.equals(hbi.getDeployedOn().get(0));
2174    boolean splitParent =
2175        inMeta && hbi.getMetaEntry().isSplit() && hbi.getMetaEntry().isOffline();
2176    boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.getMetaEntry().getTable());
2177    boolean recentlyModified = inHdfs &&
2178      hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2179
2180    // ========== First the healthy cases =============
2181    if (hbi.containsOnlyHdfsEdits()) {
2182      return;
2183    }
2184    if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2185      return;
2186    } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2187      LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2188        "tabled that is not deployed");
2189      return;
2190    } else if (recentlyModified) {
2191      LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2192      return;
2193    }
2194    // ========== Cases where the region is not in hbase:meta =============
2195    else if (!inMeta && !inHdfs && !isDeployed) {
2196      // We shouldn't have record of this region at all then!
2197      assert false : "Entry for region with no data";
2198    } else if (!inMeta && !inHdfs && isDeployed) {
2199      errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2200          + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2201          "deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2202      if (shouldFixAssignments()) {
2203        undeployRegions(hbi);
2204      }
2205
2206    } else if (!inMeta && inHdfs && !isDeployed) {
2207      if (hbi.isMerged()) {
2208        // This region has already been merged, the remaining hdfs file will be
2209        // cleaned by CatalogJanitor later
2210        hbi.setSkipChecks(true);
2211        LOG.info("Region " + descriptiveName
2212            + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2213        return;
2214      }
2215      errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2216          + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2217          "or deployed on any region server");
2218      // restore region consistency of an adopted orphan
2219      if (shouldFixMeta()) {
2220        if (!hbi.isHdfsRegioninfoPresent()) {
2221          LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2222              +  " in table integrity repair phase if -fixHdfsOrphans was" +
2223              " used.");
2224          return;
2225        }
2226
2227        RegionInfo hri = hbi.getHdfsHRI();
2228        HbckTableInfo tableInfo = tablesInfo.get(hri.getTable());
2229
2230        for (RegionInfo region : tableInfo.getRegionsFromMeta(this.regionInfoMap)) {
2231          if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2232              && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2233                hri.getEndKey()) >= 0)
2234              && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2235            if(region.isSplit() || region.isOffline()) continue;
2236            Path regionDir = hbi.getHdfsRegionDir();
2237            FileSystem fs = regionDir.getFileSystem(getConf());
2238            List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2239            for (Path familyDir : familyDirs) {
2240              List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2241              for (Path referenceFilePath : referenceFilePaths) {
2242                Path parentRegionDir =
2243                    StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2244                if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2245                  LOG.warn(hri + " start and stop keys are in the range of " + region
2246                      + ". The region might not be cleaned up from hdfs when region " + region
2247                      + " split failed. Hence deleting from hdfs.");
2248                  HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2249                    regionDir.getParent(), hri);
2250                  return;
2251                }
2252              }
2253            }
2254          }
2255        }
2256        LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2257        int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
2258        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2259            admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
2260              .getLiveServerMetrics().keySet(), numReplicas);
2261
2262        tryAssignmentRepair(hbi, "Trying to reassign region...");
2263      }
2264
2265    } else if (!inMeta && inHdfs && isDeployed) {
2266      errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2267          + " not in META, but deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2268      debugLsr(hbi.getHdfsRegionDir());
2269      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2270        // for replicas, this means that we should undeploy the region (we would have
2271        // gone over the primaries and fixed meta holes in first phase under
2272        // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2273        // this stage unless unwanted replica)
2274        if (shouldFixAssignments()) {
2275          undeployRegionsForHbi(hbi);
2276        }
2277      }
2278      if (shouldFixMeta() && hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2279        if (!hbi.isHdfsRegioninfoPresent()) {
2280          LOG.error("This should have been repaired in table integrity repair phase");
2281          return;
2282        }
2283
2284        LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2285        int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
2286        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2287            admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
2288              .getLiveServerMetrics().keySet(), numReplicas);
2289        tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2290      }
2291
2292    // ========== Cases where the region is in hbase:meta =============
2293    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2294      // check whether this is an actual error, or just transient state where parent
2295      // is not cleaned
2296      if (hbi.getMetaEntry().splitA != null && hbi.getMetaEntry().splitB != null) {
2297        // check that split daughters are there
2298        HbckRegionInfo infoA = this.regionInfoMap.get(hbi.getMetaEntry().splitA.getEncodedName());
2299        HbckRegionInfo infoB = this.regionInfoMap.get(hbi.getMetaEntry().splitB.getEncodedName());
2300        if (infoA != null && infoB != null) {
2301          // we already processed or will process daughters. Move on, nothing to see here.
2302          hbi.setSkipChecks(true);
2303          return;
2304        }
2305      }
2306
2307      // For Replica region, we need to do a similar check. If replica is not split successfully,
2308      // error is going to be reported against primary daughter region.
2309      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2310        LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, "
2311            + "and not deployed on any region server. This may be transient.");
2312        hbi.setSkipChecks(true);
2313        return;
2314      }
2315
2316      errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2317          + descriptiveName + " is a split parent in META, in HDFS, "
2318          + "and not deployed on any region server. This could be transient, "
2319          + "consider to run the catalog janitor first!");
2320      if (shouldFixSplitParents()) {
2321        setShouldRerun();
2322        resetSplitParent(hbi);
2323      }
2324    } else if (inMeta && !inHdfs && !isDeployed) {
2325      errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2326          + descriptiveName + " found in META, but not in HDFS "
2327          + "or deployed on any region server.");
2328      if (shouldFixMeta()) {
2329        deleteMetaRegion(hbi);
2330      }
2331    } else if (inMeta && !inHdfs && isDeployed) {
2332      errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2333          + " found in META, but not in HDFS, " +
2334          "and deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2335      // We treat HDFS as ground truth.  Any information in meta is transient
2336      // and equivalent data can be regenerated.  So, lets unassign and remove
2337      // these problems from META.
2338      if (shouldFixAssignments()) {
2339        errors.print("Trying to fix unassigned region...");
2340        undeployRegions(hbi);
2341      }
2342      if (shouldFixMeta()) {
2343        // wait for it to complete
2344        deleteMetaRegion(hbi);
2345      }
2346    } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2347      errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2348          + " not deployed on any region server.");
2349      tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2350    } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2351      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2352          "Region " + descriptiveName + " should not be deployed according " +
2353          "to META, but is deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2354      if (shouldFixAssignments()) {
2355        errors.print("Trying to close the region " + descriptiveName);
2356        setShouldRerun();
2357        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry(), hbi.getDeployedOn());
2358      }
2359    } else if (inMeta && inHdfs && isMultiplyDeployed) {
2360      errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2361          + " is listed in hbase:meta on region server " + hbi.getMetaEntry().regionServer
2362          + " but is multiply assigned to region servers " +
2363          Joiner.on(", ").join(hbi.getDeployedOn()));
2364      // If we are trying to fix the errors
2365      if (shouldFixAssignments()) {
2366        errors.print("Trying to fix assignment error...");
2367        setShouldRerun();
2368        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry(), hbi.getDeployedOn());
2369      }
2370    } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2371      errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2372          + descriptiveName + " listed in hbase:meta on region server " +
2373          hbi.getMetaEntry().regionServer + " but found on region server " +
2374          hbi.getDeployedOn().get(0));
2375      // If we are trying to fix the errors
2376      if (shouldFixAssignments()) {
2377        errors.print("Trying to fix assignment error...");
2378        setShouldRerun();
2379        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry(), hbi.getDeployedOn());
2380        HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2381      }
2382    } else {
2383      errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2384          " is in an unforeseen state:" +
2385          " inMeta=" + inMeta +
2386          " inHdfs=" + inHdfs +
2387          " isDeployed=" + isDeployed +
2388          " isMultiplyDeployed=" + isMultiplyDeployed +
2389          " deploymentMatchesMeta=" + deploymentMatchesMeta +
2390          " shouldBeDeployed=" + shouldBeDeployed);
2391    }
2392  }
2393
2394  /**
2395   * Checks tables integrity. Goes over all regions and scans the tables.
2396   * Collects all the pieces for each table and checks if there are missing,
2397   * repeated or overlapping ones.
2398   * @throws IOException
2399   */
2400  SortedMap<TableName, HbckTableInfo> checkIntegrity() throws IOException {
2401    tablesInfo = new TreeMap<>();
2402    LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2403    for (HbckRegionInfo hbi : regionInfoMap.values()) {
2404      // Check only valid, working regions
2405      if (hbi.getMetaEntry() == null) {
2406        // this assumes that consistency check has run loadMetaEntry
2407        Path p = hbi.getHdfsRegionDir();
2408        if (p == null) {
2409          errors.report("No regioninfo in Meta or HDFS. " + hbi);
2410        }
2411
2412        // TODO test.
2413        continue;
2414      }
2415      if (hbi.getMetaEntry().regionServer == null) {
2416        errors.detail("Skipping region because no region server: " + hbi);
2417        continue;
2418      }
2419      if (hbi.getMetaEntry().isOffline()) {
2420        errors.detail("Skipping region because it is offline: " + hbi);
2421        continue;
2422      }
2423      if (hbi.containsOnlyHdfsEdits()) {
2424        errors.detail("Skipping region because it only contains edits" + hbi);
2425        continue;
2426      }
2427
2428      // Missing regionDir or over-deployment is checked elsewhere. Include
2429      // these cases in modTInfo, so we can evaluate those regions as part of
2430      // the region chain in META
2431      //if (hbi.foundRegionDir == null) continue;
2432      //if (hbi.deployedOn.size() != 1) continue;
2433      if (hbi.getDeployedOn().isEmpty()) {
2434        continue;
2435      }
2436
2437      // We should be safe here
2438      TableName tableName = hbi.getMetaEntry().getTable();
2439      HbckTableInfo modTInfo = tablesInfo.get(tableName);
2440      if (modTInfo == null) {
2441        modTInfo = new HbckTableInfo(tableName, this);
2442      }
2443      for (ServerName server : hbi.getDeployedOn()) {
2444        modTInfo.addServer(server);
2445      }
2446
2447      if (!hbi.isSkipChecks()) {
2448        modTInfo.addRegionInfo(hbi);
2449      }
2450
2451      tablesInfo.put(tableName, modTInfo);
2452    }
2453
2454    loadTableInfosForTablesWithNoRegion();
2455
2456    logParallelMerge();
2457    for (HbckTableInfo tInfo : tablesInfo.values()) {
2458      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2459      if (!tInfo.checkRegionChain(handler)) {
2460        errors.report("Found inconsistency in table " + tInfo.getName());
2461      }
2462    }
2463    return tablesInfo;
2464  }
2465
2466  /** Loads table info's for tables that may not have been included, since there are no
2467   * regions reported for the table, but table dir is there in hdfs
2468   */
2469  private void loadTableInfosForTablesWithNoRegion() throws IOException {
2470    Map<String, TableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2471    for (TableDescriptor htd : allTables.values()) {
2472      if (checkMetaOnly && !htd.isMetaTable()) {
2473        continue;
2474      }
2475
2476      TableName tableName = htd.getTableName();
2477      if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2478        HbckTableInfo tableInfo = new HbckTableInfo(tableName, this);
2479        tableInfo.htds.add(htd);
2480        tablesInfo.put(htd.getTableName(), tableInfo);
2481      }
2482    }
2483  }
2484
2485  /**
2486   * Merge hdfs data by moving from contained HbckRegionInfo into targetRegionDir.
2487   * @return number of file move fixes done to merge regions.
2488   */
2489  public int mergeRegionDirs(Path targetRegionDir, HbckRegionInfo contained) throws IOException {
2490    int fileMoves = 0;
2491    String thread = Thread.currentThread().getName();
2492    LOG.debug("[" + thread + "] Contained region dir after close and pause");
2493    debugLsr(contained.getHdfsRegionDir());
2494
2495    // rename the contained into the container.
2496    FileSystem fs = targetRegionDir.getFileSystem(getConf());
2497    FileStatus[] dirs = null;
2498    try {
2499      dirs = fs.listStatus(contained.getHdfsRegionDir());
2500    } catch (FileNotFoundException fnfe) {
2501      // region we are attempting to merge in is not present!  Since this is a merge, there is
2502      // no harm skipping this region if it does not exist.
2503      if (!fs.exists(contained.getHdfsRegionDir())) {
2504        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2505            + " is missing. Assuming already sidelined or moved.");
2506      } else {
2507        sidelineRegionDir(fs, contained);
2508      }
2509      return fileMoves;
2510    }
2511
2512    if (dirs == null) {
2513      if (!fs.exists(contained.getHdfsRegionDir())) {
2514        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2515            + " already sidelined.");
2516      } else {
2517        sidelineRegionDir(fs, contained);
2518      }
2519      return fileMoves;
2520    }
2521
2522    for (FileStatus cf : dirs) {
2523      Path src = cf.getPath();
2524      Path dst =  new Path(targetRegionDir, src.getName());
2525
2526      if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2527        // do not copy the old .regioninfo file.
2528        continue;
2529      }
2530
2531      if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2532        // do not copy the .oldlogs files
2533        continue;
2534      }
2535
2536      LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2537      // FileSystem.rename is inconsistent with directories -- if the
2538      // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2539      // it moves the src into the dst dir resulting in (foo/a/b).  If
2540      // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2541      for (FileStatus hfile : fs.listStatus(src)) {
2542        boolean success = fs.rename(hfile.getPath(), dst);
2543        if (success) {
2544          fileMoves++;
2545        }
2546      }
2547      LOG.debug("[" + thread + "] Sideline directory contents:");
2548      debugLsr(targetRegionDir);
2549    }
2550
2551    // if all success.
2552    sidelineRegionDir(fs, contained);
2553    LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2554        getSidelineDir());
2555    debugLsr(contained.getHdfsRegionDir());
2556
2557    return fileMoves;
2558  }
2559
2560
2561  static class WorkItemOverlapMerge implements Callable<Void> {
2562    private TableIntegrityErrorHandler handler;
2563    Collection<HbckRegionInfo> overlapgroup;
2564
2565    WorkItemOverlapMerge(Collection<HbckRegionInfo> overlapgroup,
2566        TableIntegrityErrorHandler handler) {
2567      this.handler = handler;
2568      this.overlapgroup = overlapgroup;
2569    }
2570
2571    @Override
2572    public Void call() throws Exception {
2573      handler.handleOverlapGroup(overlapgroup);
2574      return null;
2575    }
2576  }
2577
2578  /**
2579   * Return a list of user-space table names whose metadata have not been
2580   * modified in the last few milliseconds specified by timelag
2581   * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
2582   * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
2583   * milliseconds specified by timelag, then the table is a candidate to be returned.
2584   * @return tables that have not been modified recently
2585   * @throws IOException if an error is encountered
2586   */
2587  TableDescriptor[] getTables(AtomicInteger numSkipped) {
2588    List<TableName> tableNames = new ArrayList<>();
2589    long now = EnvironmentEdgeManager.currentTime();
2590
2591    for (HbckRegionInfo hbi : regionInfoMap.values()) {
2592      HbckRegionInfo.MetaEntry info = hbi.getMetaEntry();
2593
2594      // if the start key is zero, then we have found the first region of a table.
2595      // pick only those tables that were not modified in the last few milliseconds.
2596      if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2597        if (info.modTime + timelag < now) {
2598          tableNames.add(info.getTable());
2599        } else {
2600          numSkipped.incrementAndGet(); // one more in-flux table
2601        }
2602      }
2603    }
2604    return getTableDescriptors(tableNames);
2605  }
2606
2607  TableDescriptor[] getTableDescriptors(List<TableName> tableNames) {
2608      LOG.info("getTableDescriptors == tableNames => " + tableNames);
2609    try (Connection conn = ConnectionFactory.createConnection(getConf());
2610        Admin admin = conn.getAdmin()) {
2611      List<TableDescriptor> tds = admin.listTableDescriptors(tableNames);
2612      return tds.toArray(new TableDescriptor[tds.size()]);
2613    } catch (IOException e) {
2614      LOG.debug("Exception getting table descriptors", e);
2615    }
2616    return new TableDescriptor[0];
2617  }
2618
2619  /**
2620   * Gets the entry in regionInfo corresponding to the the given encoded
2621   * region name. If the region has not been seen yet, a new entry is added
2622   * and returned.
2623   */
2624  private synchronized HbckRegionInfo getOrCreateInfo(String name) {
2625    HbckRegionInfo hbi = regionInfoMap.get(name);
2626    if (hbi == null) {
2627      hbi = new HbckRegionInfo(null);
2628      regionInfoMap.put(name, hbi);
2629    }
2630    return hbi;
2631  }
2632
2633  private void checkAndFixReplication() throws ReplicationException {
2634    ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, errors);
2635    checker.checkUnDeletedQueues();
2636
2637    if (checker.hasUnDeletedQueues() && this.fixReplication) {
2638      checker.fixUnDeletedQueues();
2639      setShouldRerun();
2640    }
2641  }
2642
2643  /**
2644    * Check values in regionInfo for hbase:meta
2645    * Check if zero or more than one regions with hbase:meta are found.
2646    * If there are inconsistencies (i.e. zero or more than one regions
2647    * pretend to be holding the hbase:meta) try to fix that and report an error.
2648    * @throws IOException from HBaseFsckRepair functions
2649    * @throws KeeperException
2650    * @throws InterruptedException
2651    */
2652  boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
2653    Map<Integer, HbckRegionInfo> metaRegions = new HashMap<>();
2654    for (HbckRegionInfo value : regionInfoMap.values()) {
2655      if (value.getMetaEntry() != null && value.getMetaEntry().isMetaRegion()) {
2656        metaRegions.put(value.getReplicaId(), value);
2657      }
2658    }
2659    int metaReplication = admin.getDescriptor(TableName.META_TABLE_NAME)
2660        .getRegionReplication();
2661    boolean noProblem = true;
2662    // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
2663    // Check the deployed servers. It should be exactly one server for each replica.
2664    for (int i = 0; i < metaReplication; i++) {
2665      HbckRegionInfo metaHbckRegionInfo = metaRegions.remove(i);
2666      List<ServerName> servers = new ArrayList<>();
2667      if (metaHbckRegionInfo != null) {
2668        servers = metaHbckRegionInfo.getDeployedOn();
2669      }
2670      if (servers.size() != 1) {
2671        noProblem = false;
2672        if (servers.isEmpty()) {
2673          assignMetaReplica(i);
2674        } else if (servers.size() > 1) {
2675          errors
2676          .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
2677                       metaHbckRegionInfo.getReplicaId() + " is found on more than one region.");
2678          if (shouldFixAssignments()) {
2679            errors.print("Trying to fix a problem with hbase:meta, replicaId " +
2680                metaHbckRegionInfo.getReplicaId() + "..");
2681            setShouldRerun();
2682            // try fix it (treat is a dupe assignment)
2683            HBaseFsckRepair
2684                .fixMultiAssignment(connection, metaHbckRegionInfo.getMetaEntry(), servers);
2685          }
2686        }
2687      }
2688    }
2689    // unassign whatever is remaining in metaRegions. They are excess replicas.
2690    for (Map.Entry<Integer, HbckRegionInfo> entry : metaRegions.entrySet()) {
2691      noProblem = false;
2692      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2693          "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
2694          ", deployed " + metaRegions.size());
2695      if (shouldFixAssignments()) {
2696        errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
2697            " of hbase:meta..");
2698        setShouldRerun();
2699        unassignMetaReplica(entry.getValue());
2700      }
2701    }
2702    // if noProblem is false, rerun hbck with hopefully fixed META
2703    // if noProblem is true, no errors, so continue normally
2704    return noProblem;
2705  }
2706
2707  private void unassignMetaReplica(HbckRegionInfo hi)
2708      throws IOException, InterruptedException, KeeperException {
2709    undeployRegions(hi);
2710    ZKUtil
2711        .deleteNode(zkw, zkw.getZNodePaths().getZNodeForReplica(hi.getMetaEntry().getReplicaId()));
2712  }
2713
2714  private void assignMetaReplica(int replicaId)
2715      throws IOException, KeeperException, InterruptedException {
2716    errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
2717        replicaId +" is not found on any region.");
2718    if (shouldFixAssignments()) {
2719      errors.print("Trying to fix a problem with hbase:meta..");
2720      setShouldRerun();
2721      // try to fix it (treat it as unassigned region)
2722      RegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
2723          RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId);
2724      HBaseFsckRepair.fixUnassigned(admin, h);
2725      HBaseFsckRepair.waitUntilAssigned(admin, h);
2726    }
2727  }
2728
2729  /**
2730   * Scan hbase:meta, adding all regions found to the regionInfo map.
2731   * @throws IOException if an error is encountered
2732   */
2733  boolean loadMetaEntries() throws IOException {
2734    MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() {
2735      int countRecord = 1;
2736
2737      // comparator to sort KeyValues with latest modtime
2738      final Comparator<Cell> comp = new Comparator<Cell>() {
2739        @Override
2740        public int compare(Cell k1, Cell k2) {
2741          return Long.compare(k1.getTimestamp(), k2.getTimestamp());
2742        }
2743      };
2744
2745      @Override
2746      public boolean visit(Result result) throws IOException {
2747        try {
2748
2749          // record the latest modification of this META record
2750          long ts =  Collections.max(result.listCells(), comp).getTimestamp();
2751          RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
2752          if (rl == null) {
2753            emptyRegionInfoQualifiers.add(result);
2754            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2755              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
2756            return true;
2757          }
2758          ServerName sn = null;
2759          if (rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID) == null ||
2760              rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegion() == null) {
2761            emptyRegionInfoQualifiers.add(result);
2762            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2763              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
2764            return true;
2765          }
2766          RegionInfo hri = rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegion();
2767          if (!(isTableIncluded(hri.getTable())
2768              || hri.isMetaRegion())) {
2769            return true;
2770          }
2771          PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(result);
2772          for (HRegionLocation h : rl.getRegionLocations()) {
2773            if (h == null || h.getRegion() == null) {
2774              continue;
2775            }
2776            sn = h.getServerName();
2777            hri = h.getRegion();
2778
2779            HbckRegionInfo.MetaEntry m = null;
2780            if (hri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2781              m = new HbckRegionInfo.MetaEntry(hri, sn, ts, daughters.getFirst(),
2782                  daughters.getSecond());
2783            } else {
2784              m = new HbckRegionInfo.MetaEntry(hri, sn, ts, null, null);
2785            }
2786            HbckRegionInfo previous = regionInfoMap.get(hri.getEncodedName());
2787            if (previous == null) {
2788              regionInfoMap.put(hri.getEncodedName(), new HbckRegionInfo(m));
2789            } else if (previous.getMetaEntry() == null) {
2790              previous.setMetaEntry(m);
2791            } else {
2792              throw new IOException("Two entries in hbase:meta are same " + previous);
2793            }
2794          }
2795          List<RegionInfo> mergeParents = MetaTableAccessor.getMergeRegions(result.rawCells());
2796          if (mergeParents != null) {
2797            for (RegionInfo mergeRegion : mergeParents) {
2798              if (mergeRegion != null) {
2799                // This region is already being merged
2800                HbckRegionInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
2801                hbInfo.setMerged(true);
2802              }
2803            }
2804          }
2805
2806          // show proof of progress to the user, once for every 100 records.
2807          if (countRecord % 100 == 0) {
2808            errors.progress();
2809          }
2810          countRecord++;
2811          return true;
2812        } catch (RuntimeException e) {
2813          LOG.error("Result=" + result);
2814          throw e;
2815        }
2816      }
2817    };
2818    if (!checkMetaOnly) {
2819      // Scan hbase:meta to pick up user regions
2820      MetaTableAccessor.fullScanRegions(connection, visitor);
2821    }
2822
2823    errors.print("");
2824    return true;
2825  }
2826
2827  /**
2828   * Prints summary of all tables found on the system.
2829   */
2830  private void printTableSummary(SortedMap<TableName, HbckTableInfo> tablesInfo) {
2831    StringBuilder sb = new StringBuilder();
2832    int numOfSkippedRegions;
2833    errors.print("Summary:");
2834    for (HbckTableInfo tInfo : tablesInfo.values()) {
2835      numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
2836          skippedRegions.get(tInfo.getName()).size() : 0;
2837
2838      if (errors.tableHasErrors(tInfo)) {
2839        errors.print("Table " + tInfo.getName() + " is inconsistent.");
2840      } else if (numOfSkippedRegions > 0){
2841        errors.print("Table " + tInfo.getName() + " is okay (with "
2842          + numOfSkippedRegions + " skipped regions).");
2843      }
2844      else {
2845        errors.print("Table " + tInfo.getName() + " is okay.");
2846      }
2847      errors.print("    Number of regions: " + tInfo.getNumRegions());
2848      if (numOfSkippedRegions > 0) {
2849        Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
2850        System.out.println("    Number of skipped regions: " + numOfSkippedRegions);
2851        System.out.println("      List of skipped regions:");
2852        for(String sr : skippedRegionStrings) {
2853          System.out.println("        " + sr);
2854        }
2855      }
2856      sb.setLength(0); // clear out existing buffer, if any.
2857      sb.append("    Deployed on: ");
2858      for (ServerName server : tInfo.deployedOn) {
2859        sb.append(" " + server.toString());
2860      }
2861      errors.print(sb.toString());
2862    }
2863  }
2864
2865  static HbckErrorReporter getErrorReporter(final Configuration conf)
2866      throws ClassNotFoundException {
2867    Class<? extends HbckErrorReporter> reporter =
2868        conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class,
2869            HbckErrorReporter.class);
2870    return ReflectionUtils.newInstance(reporter, conf);
2871  }
2872
2873  static class PrintingErrorReporter implements HbckErrorReporter {
2874    public int errorCount = 0;
2875    private int showProgress;
2876    // How frequently calls to progress() will create output
2877    private static final int progressThreshold = 100;
2878
2879    Set<HbckTableInfo> errorTables = new HashSet<>();
2880
2881    // for use by unit tests to verify which errors were discovered
2882    private ArrayList<ERROR_CODE> errorList = new ArrayList<>();
2883
2884    @Override
2885    public void clear() {
2886      errorTables.clear();
2887      errorList.clear();
2888      errorCount = 0;
2889    }
2890
2891    @Override
2892    public synchronized void reportError(ERROR_CODE errorCode, String message) {
2893      if (errorCode == ERROR_CODE.WRONG_USAGE) {
2894        System.err.println(message);
2895        return;
2896      }
2897
2898      errorList.add(errorCode);
2899      if (!summary) {
2900        System.out.println("ERROR: " + message);
2901      }
2902      errorCount++;
2903      showProgress = 0;
2904    }
2905
2906    @Override
2907    public synchronized void reportError(ERROR_CODE errorCode, String message,
2908        HbckTableInfo table) {
2909      errorTables.add(table);
2910      reportError(errorCode, message);
2911    }
2912
2913    @Override
2914    public synchronized void reportError(ERROR_CODE errorCode, String message, HbckTableInfo table,
2915                                         HbckRegionInfo info) {
2916      errorTables.add(table);
2917      String reference = "(region " + info.getRegionNameAsString() + ")";
2918      reportError(errorCode, reference + " " + message);
2919    }
2920
2921    @Override
2922    public synchronized void reportError(ERROR_CODE errorCode, String message, HbckTableInfo table,
2923                                         HbckRegionInfo info1, HbckRegionInfo info2) {
2924      errorTables.add(table);
2925      String reference = "(regions " + info1.getRegionNameAsString()
2926          + " and " + info2.getRegionNameAsString() + ")";
2927      reportError(errorCode, reference + " " + message);
2928    }
2929
2930    @Override
2931    public synchronized void reportError(String message) {
2932      reportError(ERROR_CODE.UNKNOWN, message);
2933    }
2934
2935    /**
2936     * Report error information, but do not increment the error count.  Intended for cases
2937     * where the actual error would have been reported previously.
2938     * @param message
2939     */
2940    @Override
2941    public synchronized void report(String message) {
2942      if (! summary) {
2943        System.out.println("ERROR: " + message);
2944      }
2945      showProgress = 0;
2946    }
2947
2948    @Override
2949    public synchronized int summarize() {
2950      System.out.println(Integer.toString(errorCount) +
2951                         " inconsistencies detected.");
2952      if (errorCount == 0) {
2953        System.out.println("Status: OK");
2954        return 0;
2955      } else {
2956        System.out.println("Status: INCONSISTENT");
2957        return -1;
2958      }
2959    }
2960
2961    @Override
2962    public ArrayList<ERROR_CODE> getErrorList() {
2963      return errorList;
2964    }
2965
2966    @Override
2967    public synchronized void print(String message) {
2968      if (!summary) {
2969        System.out.println(message);
2970      }
2971    }
2972
2973    @Override
2974    public boolean tableHasErrors(HbckTableInfo table) {
2975      return errorTables.contains(table);
2976    }
2977
2978    @Override
2979    public void resetErrors() {
2980      errorCount = 0;
2981    }
2982
2983    @Override
2984    public synchronized void detail(String message) {
2985      if (details) {
2986        System.out.println(message);
2987      }
2988      showProgress = 0;
2989    }
2990
2991    @Override
2992    public synchronized void progress() {
2993      if (showProgress++ == progressThreshold) {
2994        if (!summary) {
2995          System.out.print(".");
2996        }
2997        showProgress = 0;
2998      }
2999    }
3000  }
3001
3002  /**
3003   * Contact a region server and get all information from it
3004   */
3005  static class WorkItemRegion implements Callable<Void> {
3006    private final HBaseFsck hbck;
3007    private final ServerName rsinfo;
3008    private final HbckErrorReporter errors;
3009    private final Connection connection;
3010
3011    WorkItemRegion(HBaseFsck hbck, ServerName info, HbckErrorReporter errors,
3012        Connection connection) {
3013      this.hbck = hbck;
3014      this.rsinfo = info;
3015      this.errors = errors;
3016      this.connection = connection;
3017    }
3018
3019    @Override
3020    public synchronized Void call() throws IOException {
3021      errors.progress();
3022      try {
3023        // list all online regions from this region server
3024        List<RegionInfo> regions = connection.getAdmin().getRegions(rsinfo);
3025        regions = filterRegions(regions);
3026
3027        if (details) {
3028          errors.detail(
3029            "RegionServer: " + rsinfo.getServerName() + " number of regions: " + regions.size());
3030          for (RegionInfo rinfo : regions) {
3031            errors.detail("  " + rinfo.getRegionNameAsString() + " id: " + rinfo.getRegionId() +
3032              " encoded_name: " + rinfo.getEncodedName() + " start: " +
3033              Bytes.toStringBinary(rinfo.getStartKey()) + " end: " +
3034              Bytes.toStringBinary(rinfo.getEndKey()));
3035          }
3036        }
3037
3038        // check to see if the existence of this region matches the region in META
3039        for (RegionInfo r : regions) {
3040          HbckRegionInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3041          hbi.addServer(r, rsinfo);
3042        }
3043      } catch (IOException e) { // unable to connect to the region server.
3044        errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE,
3045          "RegionServer: " + rsinfo.getServerName() + " Unable to fetch region information. " + e);
3046        throw e;
3047      }
3048      return null;
3049    }
3050
3051    private List<RegionInfo> filterRegions(List<RegionInfo> regions) {
3052      List<RegionInfo> ret = Lists.newArrayList();
3053      for (RegionInfo hri : regions) {
3054        if (hri.isMetaRegion() || (!hbck.checkMetaOnly
3055            && hbck.isTableIncluded(hri.getTable()))) {
3056          ret.add(hri);
3057        }
3058      }
3059      return ret;
3060    }
3061  }
3062
3063  /**
3064   * Contact hdfs and get all information about specified table directory into
3065   * regioninfo list.
3066   */
3067  class WorkItemHdfsDir implements Callable<Void> {
3068    private FileStatus tableDir;
3069    private HbckErrorReporter errors;
3070    private FileSystem fs;
3071
3072    WorkItemHdfsDir(FileSystem fs, HbckErrorReporter errors, FileStatus status) {
3073      this.fs = fs;
3074      this.tableDir = status;
3075      this.errors = errors;
3076    }
3077
3078    @Override
3079    public synchronized Void call() throws InterruptedException, ExecutionException {
3080      final Vector<Exception> exceptions = new Vector<>();
3081
3082      try {
3083        final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
3084        final List<Future<?>> futures = new ArrayList<>(regionDirs.length);
3085
3086        for (final FileStatus regionDir : regionDirs) {
3087          errors.progress();
3088          final String encodedName = regionDir.getPath().getName();
3089          // ignore directories that aren't hexadecimal
3090          if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
3091            continue;
3092          }
3093
3094          if (!exceptions.isEmpty()) {
3095            break;
3096          }
3097
3098          futures.add(executor.submit(new Runnable() {
3099            @Override
3100            public void run() {
3101              try {
3102                LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
3103
3104                Path regioninfoFile = new Path(regionDir.getPath(), HRegionFileSystem.REGION_INFO_FILE);
3105                boolean regioninfoFileExists = fs.exists(regioninfoFile);
3106
3107                if (!regioninfoFileExists) {
3108                  // As tables become larger it is more and more likely that by the time you
3109                  // reach a given region that it will be gone due to region splits/merges.
3110                  if (!fs.exists(regionDir.getPath())) {
3111                    LOG.warn("By the time we tried to process this region dir it was already gone: "
3112                        + regionDir.getPath());
3113                    return;
3114                  }
3115                }
3116
3117                HbckRegionInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
3118                HbckRegionInfo.HdfsEntry he = new HbckRegionInfo.HdfsEntry();
3119                synchronized (hbi) {
3120                  if (hbi.getHdfsRegionDir() != null) {
3121                    errors.print("Directory " + encodedName + " duplicate??" +
3122                                 hbi.getHdfsRegionDir());
3123                  }
3124
3125                  he.regionDir = regionDir.getPath();
3126                  he.regionDirModTime = regionDir.getModificationTime();
3127                  he.hdfsRegioninfoFilePresent = regioninfoFileExists;
3128                  // we add to orphan list when we attempt to read .regioninfo
3129
3130                  // Set a flag if this region contains only edits
3131                  // This is special case if a region is left after split
3132                  he.hdfsOnlyEdits = true;
3133                  FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
3134                  Path ePath = WALSplitUtil.getRegionDirRecoveredEditsDir(regionDir.getPath());
3135                  for (FileStatus subDir : subDirs) {
3136                    errors.progress();
3137                    String sdName = subDir.getPath().getName();
3138                    if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
3139                      he.hdfsOnlyEdits = false;
3140                      break;
3141                    }
3142                  }
3143                  hbi.setHdfsEntry(he);
3144                }
3145              } catch (Exception e) {
3146                LOG.error("Could not load region dir", e);
3147                exceptions.add(e);
3148              }
3149            }
3150          }));
3151        }
3152
3153        // Ensure all pending tasks are complete (or that we run into an exception)
3154        for (Future<?> f : futures) {
3155          if (!exceptions.isEmpty()) {
3156            break;
3157          }
3158          try {
3159            f.get();
3160          } catch (ExecutionException e) {
3161            LOG.error("Unexpected exec exception!  Should've been caught already.  (Bug?)", e);
3162            // Shouldn't happen, we already logged/caught any exceptions in the Runnable
3163          }
3164        }
3165      } catch (IOException e) {
3166        LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
3167        exceptions.add(e);
3168      } finally {
3169        if (!exceptions.isEmpty()) {
3170          errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
3171              + tableDir.getPath().getName()
3172              + " Unable to fetch all HDFS region information. ");
3173          // Just throw the first exception as an indication something bad happened
3174          // Don't need to propagate all the exceptions, we already logged them all anyway
3175          throw new ExecutionException("First exception in WorkItemHdfsDir", exceptions.firstElement());
3176        }
3177      }
3178      return null;
3179    }
3180  }
3181
3182  /**
3183   * Contact hdfs and get all information about specified table directory into
3184   * regioninfo list.
3185   */
3186  static class WorkItemHdfsRegionInfo implements Callable<Void> {
3187    private HbckRegionInfo hbi;
3188    private HBaseFsck hbck;
3189    private HbckErrorReporter errors;
3190
3191    WorkItemHdfsRegionInfo(HbckRegionInfo hbi, HBaseFsck hbck, HbckErrorReporter errors) {
3192      this.hbi = hbi;
3193      this.hbck = hbck;
3194      this.errors = errors;
3195    }
3196
3197    @Override
3198    public synchronized Void call() throws IOException {
3199      // only load entries that haven't been loaded yet.
3200      if (hbi.getHdfsHRI() == null) {
3201        try {
3202          errors.progress();
3203          hbi.loadHdfsRegioninfo(hbck.getConf());
3204        } catch (IOException ioe) {
3205          String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3206              + hbi.getTableName() + " in hdfs dir "
3207              + hbi.getHdfsRegionDir()
3208              + "!  It may be an invalid format or version file.  Treating as "
3209              + "an orphaned regiondir.";
3210          errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3211          try {
3212            hbck.debugLsr(hbi.getHdfsRegionDir());
3213          } catch (IOException ioe2) {
3214            LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3215            throw ioe2;
3216          }
3217          hbck.orphanHdfsDirs.add(hbi);
3218          throw ioe;
3219        }
3220      }
3221      return null;
3222    }
3223  }
3224
3225  /**
3226   * Display the full report from fsck. This displays all live and dead region
3227   * servers, and all known regions.
3228   */
3229  public static void setDisplayFullReport() {
3230    details = true;
3231  }
3232
3233  public static boolean shouldDisplayFullReport() {
3234    return details;
3235  }
3236
3237  /**
3238   * Set exclusive mode.
3239   */
3240  public static void setForceExclusive() {
3241    forceExclusive = true;
3242  }
3243
3244  /**
3245   * Only one instance of hbck can modify HBase at a time.
3246   */
3247  public boolean isExclusive() {
3248    return fixAny || forceExclusive;
3249  }
3250
3251  /**
3252   * Set summary mode.
3253   * Print only summary of the tables and status (OK or INCONSISTENT)
3254   */
3255  static void setSummary() {
3256    summary = true;
3257  }
3258
3259  /**
3260   * Set hbase:meta check mode.
3261   * Print only info about hbase:meta table deployment/state
3262   */
3263  void setCheckMetaOnly() {
3264    checkMetaOnly = true;
3265  }
3266
3267  /**
3268   * Set region boundaries check mode.
3269   */
3270  void setRegionBoundariesCheck() {
3271    checkRegionBoundaries = true;
3272  }
3273
3274  /**
3275   * Set replication fix mode.
3276   */
3277  public void setFixReplication(boolean shouldFix) {
3278    fixReplication = shouldFix;
3279    fixAny |= shouldFix;
3280  }
3281
3282  public void setCleanReplicationBarrier(boolean shouldClean) {
3283    cleanReplicationBarrier = shouldClean;
3284  }
3285
3286  /**
3287   * Check if we should rerun fsck again. This checks if we've tried to
3288   * fix something and we should rerun fsck tool again.
3289   * Display the full report from fsck. This displays all live and dead
3290   * region servers, and all known regions.
3291   */
3292  void setShouldRerun() {
3293    rerun = true;
3294  }
3295
3296  public boolean shouldRerun() {
3297    return rerun;
3298  }
3299
3300  /**
3301   * Fix inconsistencies found by fsck. This should try to fix errors (if any)
3302   * found by fsck utility.
3303   */
3304  public void setFixAssignments(boolean shouldFix) {
3305    fixAssignments = shouldFix;
3306    fixAny |= shouldFix;
3307  }
3308
3309  boolean shouldFixAssignments() {
3310    return fixAssignments;
3311  }
3312
3313  public void setFixMeta(boolean shouldFix) {
3314    fixMeta = shouldFix;
3315    fixAny |= shouldFix;
3316  }
3317
3318  boolean shouldFixMeta() {
3319    return fixMeta;
3320  }
3321
3322  public void setFixEmptyMetaCells(boolean shouldFix) {
3323    fixEmptyMetaCells = shouldFix;
3324    fixAny |= shouldFix;
3325  }
3326
3327  boolean shouldFixEmptyMetaCells() {
3328    return fixEmptyMetaCells;
3329  }
3330
3331  public void setCheckHdfs(boolean checking) {
3332    checkHdfs = checking;
3333  }
3334
3335  boolean shouldCheckHdfs() {
3336    return checkHdfs;
3337  }
3338
3339  public void setFixHdfsHoles(boolean shouldFix) {
3340    fixHdfsHoles = shouldFix;
3341    fixAny |= shouldFix;
3342  }
3343
3344  boolean shouldFixHdfsHoles() {
3345    return fixHdfsHoles;
3346  }
3347
3348  public void setFixTableOrphans(boolean shouldFix) {
3349    fixTableOrphans = shouldFix;
3350    fixAny |= shouldFix;
3351  }
3352
3353  boolean shouldFixTableOrphans() {
3354    return fixTableOrphans;
3355  }
3356
3357  public void setFixHdfsOverlaps(boolean shouldFix) {
3358    fixHdfsOverlaps = shouldFix;
3359    fixAny |= shouldFix;
3360  }
3361
3362  boolean shouldFixHdfsOverlaps() {
3363    return fixHdfsOverlaps;
3364  }
3365
3366  public void setFixHdfsOrphans(boolean shouldFix) {
3367    fixHdfsOrphans = shouldFix;
3368    fixAny |= shouldFix;
3369  }
3370
3371  boolean shouldFixHdfsOrphans() {
3372    return fixHdfsOrphans;
3373  }
3374
3375  public void setFixVersionFile(boolean shouldFix) {
3376    fixVersionFile = shouldFix;
3377    fixAny |= shouldFix;
3378  }
3379
3380  public boolean shouldFixVersionFile() {
3381    return fixVersionFile;
3382  }
3383
3384  public void setSidelineBigOverlaps(boolean sbo) {
3385    this.sidelineBigOverlaps = sbo;
3386  }
3387
3388  public boolean shouldSidelineBigOverlaps() {
3389    return sidelineBigOverlaps;
3390  }
3391
3392  public void setFixSplitParents(boolean shouldFix) {
3393    fixSplitParents = shouldFix;
3394    fixAny |= shouldFix;
3395  }
3396
3397  public void setRemoveParents(boolean shouldFix) {
3398    removeParents = shouldFix;
3399    fixAny |= shouldFix;
3400  }
3401
3402  boolean shouldFixSplitParents() {
3403    return fixSplitParents;
3404  }
3405
3406  boolean shouldRemoveParents() {
3407    return removeParents;
3408  }
3409
3410  public void setFixReferenceFiles(boolean shouldFix) {
3411    fixReferenceFiles = shouldFix;
3412    fixAny |= shouldFix;
3413  }
3414
3415  boolean shouldFixReferenceFiles() {
3416    return fixReferenceFiles;
3417  }
3418
3419  public void setFixHFileLinks(boolean shouldFix) {
3420    fixHFileLinks = shouldFix;
3421    fixAny |= shouldFix;
3422  }
3423
3424  boolean shouldFixHFileLinks() {
3425    return fixHFileLinks;
3426  }
3427
3428  public boolean shouldIgnorePreCheckPermission() {
3429    return !fixAny || ignorePreCheckPermission;
3430  }
3431
3432  public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3433    this.ignorePreCheckPermission = ignorePreCheckPermission;
3434  }
3435
3436  /**
3437   * @param mm maximum number of regions to merge into a single region.
3438   */
3439  public void setMaxMerge(int mm) {
3440    this.maxMerge = mm;
3441  }
3442
3443  public int getMaxMerge() {
3444    return maxMerge;
3445  }
3446
3447  public void setMaxOverlapsToSideline(int mo) {
3448    this.maxOverlapsToSideline = mo;
3449  }
3450
3451  public int getMaxOverlapsToSideline() {
3452    return maxOverlapsToSideline;
3453  }
3454
3455  /**
3456   * Only check/fix tables specified by the list,
3457   * Empty list means all tables are included.
3458   */
3459  boolean isTableIncluded(TableName table) {
3460    return (tablesIncluded.isEmpty()) || tablesIncluded.contains(table);
3461  }
3462
3463  public void includeTable(TableName table) {
3464    tablesIncluded.add(table);
3465  }
3466
3467  Set<TableName> getIncludedTables() {
3468    return new HashSet<>(tablesIncluded);
3469  }
3470
3471  /**
3472   * We are interested in only those tables that have not changed their state in
3473   * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
3474   * @param seconds - the time in seconds
3475   */
3476  public void setTimeLag(long seconds) {
3477    timelag = seconds * 1000; // convert to milliseconds
3478  }
3479
3480  /**
3481   *
3482   * @param sidelineDir - HDFS path to sideline data
3483   */
3484  public void setSidelineDir(String sidelineDir) {
3485    this.sidelineDir = new Path(sidelineDir);
3486  }
3487
3488  protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
3489    return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
3490  }
3491
3492  public HFileCorruptionChecker getHFilecorruptionChecker() {
3493    return hfcc;
3494  }
3495
3496  public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
3497    this.hfcc = hfcc;
3498  }
3499
3500  public void setRetCode(int code) {
3501    this.retcode = code;
3502  }
3503
3504  public int getRetCode() {
3505    return retcode;
3506  }
3507
3508  protected HBaseFsck printUsageAndExit() {
3509    StringWriter sw = new StringWriter(2048);
3510    PrintWriter out = new PrintWriter(sw);
3511    out.println("");
3512    out.println("-----------------------------------------------------------------------");
3513    out.println("NOTE: As of HBase version 2.0, the hbck tool is significantly changed.");
3514    out.println("In general, all Read-Only options are supported and can be be used");
3515    out.println("safely. Most -fix/ -repair options are NOT supported. Please see usage");
3516    out.println("below for details on which options are not supported.");
3517    out.println("-----------------------------------------------------------------------");
3518    out.println("");
3519    out.println("Usage: fsck [opts] {only tables}");
3520    out.println(" where [opts] are:");
3521    out.println("   -help Display help options (this)");
3522    out.println("   -details Display full report of all regions.");
3523    out.println("   -timelag <timeInSeconds>  Process only regions that " +
3524                       " have not experienced any metadata updates in the last " +
3525                       " <timeInSeconds> seconds.");
3526    out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
3527        " before checking if the fix worked if run with -fix");
3528    out.println("   -summary Print only summary of the tables and status.");
3529    out.println("   -metaonly Only check the state of the hbase:meta table.");
3530    out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
3531    out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
3532    out.println("   -exclusive Abort if another hbck is exclusive or fixing.");
3533
3534    out.println("");
3535    out.println("  Datafile Repair options: (expert features, use with caution!)");
3536    out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
3537    out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
3538
3539    out.println("");
3540    out.println(" Replication options");
3541    out.println("   -fixReplication   Deletes replication queues for removed peers");
3542
3543    out.println("");
3544    out.println("  Metadata Repair options supported as of version 2.0: (expert features, use with caution!)");
3545    out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
3546    out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
3547    out.println("   -fixHFileLinks  Try to offline lingering HFileLinks");
3548    out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
3549        + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
3550    out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
3551
3552    out.println("");
3553    out.println("NOTE: Following options are NOT supported as of HBase version 2.0+.");
3554    out.println("");
3555    out.println("  UNSUPPORTED Metadata Repair options: (expert features, use with caution!)");
3556    out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
3557    out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
3558    out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
3559    out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
3560    out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
3561    out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
3562    out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
3563    out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
3564    out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
3565    out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
3566    out.println("   -fixSplitParents  Try to force offline split parents to be online.");
3567    out.println("   -removeParents    Try to offline and sideline lingering parents and keep daughter regions.");
3568    out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
3569        + " (empty REGIONINFO_QUALIFIER rows)");
3570
3571    out.println("");
3572    out.println("  UNSUPPORTED Metadata Repair shortcuts");
3573    out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
3574        "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles" +
3575        "-fixHFileLinks");
3576    out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
3577    out.println("");
3578    out.println(" Replication options");
3579    out.println("   -fixReplication   Deletes replication queues for removed peers");
3580    out.println("   -cleanReplicationBrarier [tableName] clean the replication barriers " +
3581        "of a specified table, tableName is required");
3582    out.flush();
3583    errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
3584
3585    setRetCode(-2);
3586    return this;
3587  }
3588
3589  /**
3590   * Main program
3591   *
3592   * @param args
3593   * @throws Exception
3594   */
3595  public static void main(String[] args) throws Exception {
3596    // create a fsck object
3597    Configuration conf = HBaseConfiguration.create();
3598    Path hbasedir = CommonFSUtils.getRootDir(conf);
3599    URI defaultFs = hbasedir.getFileSystem(conf).getUri();
3600    CommonFSUtils.setFsDefault(conf, new Path(defaultFs));
3601    int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
3602    System.exit(ret);
3603  }
3604
3605  /**
3606   * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
3607   */
3608  static class HBaseFsckTool extends Configured implements Tool {
3609    HBaseFsckTool(Configuration conf) { super(conf); }
3610    @Override
3611    public int run(String[] args) throws Exception {
3612      HBaseFsck hbck = new HBaseFsck(getConf());
3613      hbck.exec(hbck.executor, args);
3614      hbck.close();
3615      return hbck.getRetCode();
3616    }
3617  }
3618
3619  public HBaseFsck exec(ExecutorService exec, String[] args)
3620      throws KeeperException, IOException, InterruptedException, ReplicationException {
3621    long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
3622
3623    boolean checkCorruptHFiles = false;
3624    boolean sidelineCorruptHFiles = false;
3625
3626    // Process command-line args.
3627    for (int i = 0; i < args.length; i++) {
3628      String cmd = args[i];
3629      if (cmd.equals("-help") || cmd.equals("-h")) {
3630        return printUsageAndExit();
3631      } else if (cmd.equals("-details")) {
3632        setDisplayFullReport();
3633      } else if (cmd.equals("-exclusive")) {
3634        setForceExclusive();
3635      } else if (cmd.equals("-timelag")) {
3636        if (i == args.length - 1) {
3637          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
3638          return printUsageAndExit();
3639        }
3640        try {
3641          long timelag = Long.parseLong(args[++i]);
3642          setTimeLag(timelag);
3643        } catch (NumberFormatException e) {
3644          errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
3645          return printUsageAndExit();
3646        }
3647      } else if (cmd.equals("-sleepBeforeRerun")) {
3648        if (i == args.length - 1) {
3649          errors.reportError(ERROR_CODE.WRONG_USAGE,
3650            "HBaseFsck: -sleepBeforeRerun needs a value.");
3651          return printUsageAndExit();
3652        }
3653        try {
3654          sleepBeforeRerun = Long.parseLong(args[++i]);
3655        } catch (NumberFormatException e) {
3656          errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
3657          return printUsageAndExit();
3658        }
3659      } else if (cmd.equals("-sidelineDir")) {
3660        if (i == args.length - 1) {
3661          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
3662          return printUsageAndExit();
3663        }
3664        setSidelineDir(args[++i]);
3665      } else if (cmd.equals("-fix")) {
3666        errors.reportError(ERROR_CODE.WRONG_USAGE,
3667          "This option is deprecated, please use  -fixAssignments instead.");
3668        setFixAssignments(true);
3669      } else if (cmd.equals("-fixAssignments")) {
3670        setFixAssignments(true);
3671      } else if (cmd.equals("-fixMeta")) {
3672        setFixMeta(true);
3673      } else if (cmd.equals("-noHdfsChecking")) {
3674        setCheckHdfs(false);
3675      } else if (cmd.equals("-fixHdfsHoles")) {
3676        setFixHdfsHoles(true);
3677      } else if (cmd.equals("-fixHdfsOrphans")) {
3678        setFixHdfsOrphans(true);
3679      } else if (cmd.equals("-fixTableOrphans")) {
3680        setFixTableOrphans(true);
3681      } else if (cmd.equals("-fixHdfsOverlaps")) {
3682        setFixHdfsOverlaps(true);
3683      } else if (cmd.equals("-fixVersionFile")) {
3684        setFixVersionFile(true);
3685      } else if (cmd.equals("-sidelineBigOverlaps")) {
3686        setSidelineBigOverlaps(true);
3687      } else if (cmd.equals("-fixSplitParents")) {
3688        setFixSplitParents(true);
3689      } else if (cmd.equals("-removeParents")) {
3690        setRemoveParents(true);
3691      } else if (cmd.equals("-ignorePreCheckPermission")) {
3692        setIgnorePreCheckPermission(true);
3693      } else if (cmd.equals("-checkCorruptHFiles")) {
3694        checkCorruptHFiles = true;
3695      } else if (cmd.equals("-sidelineCorruptHFiles")) {
3696        sidelineCorruptHFiles = true;
3697      } else if (cmd.equals("-fixReferenceFiles")) {
3698        setFixReferenceFiles(true);
3699      } else if (cmd.equals("-fixHFileLinks")) {
3700        setFixHFileLinks(true);
3701      } else if (cmd.equals("-fixEmptyMetaCells")) {
3702        setFixEmptyMetaCells(true);
3703      } else if (cmd.equals("-repair")) {
3704        // this attempts to merge overlapping hdfs regions, needs testing
3705        // under load
3706        setFixHdfsHoles(true);
3707        setFixHdfsOrphans(true);
3708        setFixMeta(true);
3709        setFixAssignments(true);
3710        setFixHdfsOverlaps(true);
3711        setFixVersionFile(true);
3712        setSidelineBigOverlaps(true);
3713        setFixSplitParents(false);
3714        setCheckHdfs(true);
3715        setFixReferenceFiles(true);
3716        setFixHFileLinks(true);
3717      } else if (cmd.equals("-repairHoles")) {
3718        // this will make all missing hdfs regions available but may lose data
3719        setFixHdfsHoles(true);
3720        setFixHdfsOrphans(false);
3721        setFixMeta(true);
3722        setFixAssignments(true);
3723        setFixHdfsOverlaps(false);
3724        setSidelineBigOverlaps(false);
3725        setFixSplitParents(false);
3726        setCheckHdfs(true);
3727      } else if (cmd.equals("-maxOverlapsToSideline")) {
3728        if (i == args.length - 1) {
3729          errors.reportError(ERROR_CODE.WRONG_USAGE,
3730            "-maxOverlapsToSideline needs a numeric value argument.");
3731          return printUsageAndExit();
3732        }
3733        try {
3734          int maxOverlapsToSideline = Integer.parseInt(args[++i]);
3735          setMaxOverlapsToSideline(maxOverlapsToSideline);
3736        } catch (NumberFormatException e) {
3737          errors.reportError(ERROR_CODE.WRONG_USAGE,
3738            "-maxOverlapsToSideline needs a numeric value argument.");
3739          return printUsageAndExit();
3740        }
3741      } else if (cmd.equals("-maxMerge")) {
3742        if (i == args.length - 1) {
3743          errors.reportError(ERROR_CODE.WRONG_USAGE,
3744            "-maxMerge needs a numeric value argument.");
3745          return printUsageAndExit();
3746        }
3747        try {
3748          int maxMerge = Integer.parseInt(args[++i]);
3749          setMaxMerge(maxMerge);
3750        } catch (NumberFormatException e) {
3751          errors.reportError(ERROR_CODE.WRONG_USAGE,
3752            "-maxMerge needs a numeric value argument.");
3753          return printUsageAndExit();
3754        }
3755      } else if (cmd.equals("-summary")) {
3756        setSummary();
3757      } else if (cmd.equals("-metaonly")) {
3758        setCheckMetaOnly();
3759      } else if (cmd.equals("-boundaries")) {
3760        setRegionBoundariesCheck();
3761      } else if (cmd.equals("-fixReplication")) {
3762        setFixReplication(true);
3763      } else if (cmd.equals("-cleanReplicationBarrier")) {
3764        setCleanReplicationBarrier(true);
3765        if(args[++i].startsWith("-")){
3766          printUsageAndExit();
3767        }
3768        setCleanReplicationBarrierTable(args[i]);
3769      } else if (cmd.startsWith("-")) {
3770        errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
3771        return printUsageAndExit();
3772      } else {
3773        includeTable(TableName.valueOf(cmd));
3774        errors.print("Allow checking/fixes for table: " + cmd);
3775      }
3776    }
3777
3778    errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
3779
3780    // pre-check current user has FS write permission or not
3781    try {
3782      preCheckPermission();
3783    } catch (IOException ioe) {
3784      Runtime.getRuntime().exit(-1);
3785    }
3786
3787    // do the real work of hbck
3788    connect();
3789
3790    // after connecting to server above, we have server version
3791    // check if unsupported option is specified based on server version
3792    if (!isOptionsSupported(args)) {
3793      return printUsageAndExit();
3794    }
3795
3796    try {
3797      // if corrupt file mode is on, first fix them since they may be opened later
3798      if (checkCorruptHFiles || sidelineCorruptHFiles) {
3799        LOG.info("Checking all hfiles for corruption");
3800        HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
3801        setHFileCorruptionChecker(hfcc); // so we can get result
3802        Collection<TableName> tables = getIncludedTables();
3803        Collection<Path> tableDirs = new ArrayList<>();
3804        Path rootdir = CommonFSUtils.getRootDir(getConf());
3805        if (tables.size() > 0) {
3806          for (TableName t : tables) {
3807            tableDirs.add(CommonFSUtils.getTableDir(rootdir, t));
3808          }
3809        } else {
3810          tableDirs = FSUtils.getTableDirs(CommonFSUtils.getCurrentFileSystem(getConf()), rootdir);
3811        }
3812        hfcc.checkTables(tableDirs);
3813        hfcc.report(errors);
3814      }
3815
3816      // check and fix table integrity, region consistency.
3817      int code = onlineHbck();
3818      setRetCode(code);
3819      // If we have changed the HBase state it is better to run hbck again
3820      // to see if we haven't broken something else in the process.
3821      // We run it only once more because otherwise we can easily fall into
3822      // an infinite loop.
3823      if (shouldRerun()) {
3824        try {
3825          LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
3826          Thread.sleep(sleepBeforeRerun);
3827        } catch (InterruptedException ie) {
3828          LOG.warn("Interrupted while sleeping");
3829          return this;
3830        }
3831        // Just report
3832        setFixAssignments(false);
3833        setFixMeta(false);
3834        setFixHdfsHoles(false);
3835        setFixHdfsOverlaps(false);
3836        setFixVersionFile(false);
3837        setFixTableOrphans(false);
3838        errors.resetErrors();
3839        code = onlineHbck();
3840        setRetCode(code);
3841      }
3842    } finally {
3843      IOUtils.closeQuietly(this);
3844    }
3845    return this;
3846  }
3847
3848  private boolean isOptionsSupported(String[] args) {
3849    boolean result = true;
3850    String hbaseServerVersion = status.getHBaseVersion();
3851    if (VersionInfo.compareVersion("2.any.any", hbaseServerVersion) < 0) {
3852      // Process command-line args.
3853      for (String arg : args) {
3854        if (unsupportedOptionsInV2.contains(arg)) {
3855          errors.reportError(ERROR_CODE.UNSUPPORTED_OPTION,
3856              "option '" + arg + "' is not " + "supportted!");
3857          result = false;
3858          break;
3859        }
3860      }
3861    }
3862    return result;
3863  }
3864
3865  public void setCleanReplicationBarrierTable(String cleanReplicationBarrierTable) {
3866    this.cleanReplicationBarrierTable = TableName.valueOf(cleanReplicationBarrierTable);
3867  }
3868
3869  public void cleanReplicationBarrier() throws IOException {
3870    if (!cleanReplicationBarrier || cleanReplicationBarrierTable == null) {
3871      return;
3872    }
3873    if (cleanReplicationBarrierTable.isSystemTable()) {
3874      errors.reportError(ERROR_CODE.INVALID_TABLE,
3875        "invalid table: " + cleanReplicationBarrierTable);
3876      return;
3877    }
3878
3879    boolean isGlobalScope = false;
3880    try {
3881      isGlobalScope = admin.getDescriptor(cleanReplicationBarrierTable).hasGlobalReplicationScope();
3882    } catch (TableNotFoundException e) {
3883      LOG.info("we may need to clean some erroneous data due to bugs");
3884    }
3885
3886    if (isGlobalScope) {
3887      errors.reportError(ERROR_CODE.INVALID_TABLE,
3888        "table's replication scope is global: " + cleanReplicationBarrierTable);
3889      return;
3890    }
3891    List<byte[]> regionNames = new ArrayList<>();
3892    Scan barrierScan = new Scan();
3893    barrierScan.setCaching(100);
3894    barrierScan.addFamily(HConstants.REPLICATION_BARRIER_FAMILY);
3895    barrierScan
3896        .withStartRow(MetaTableAccessor.getTableStartRowForMeta(cleanReplicationBarrierTable,
3897          MetaTableAccessor.QueryType.REGION))
3898        .withStopRow(MetaTableAccessor.getTableStopRowForMeta(cleanReplicationBarrierTable,
3899          MetaTableAccessor.QueryType.REGION));
3900    Result result;
3901    try (ResultScanner scanner = meta.getScanner(barrierScan)) {
3902      while ((result = scanner.next()) != null) {
3903        regionNames.add(result.getRow());
3904      }
3905    }
3906    if (regionNames.size() <= 0) {
3907      errors.reportError(ERROR_CODE.INVALID_TABLE,
3908        "there is no barriers of this table: " + cleanReplicationBarrierTable);
3909      return;
3910    }
3911    ReplicationQueueStorage queueStorage =
3912        ReplicationStorageFactory.getReplicationQueueStorage(zkw, getConf());
3913    List<ReplicationPeerDescription> peerDescriptions = admin.listReplicationPeers();
3914    if (peerDescriptions != null && peerDescriptions.size() > 0) {
3915      List<String> peers = peerDescriptions.stream()
3916          .filter(peerConfig -> peerConfig.getPeerConfig()
3917            .needToReplicate(cleanReplicationBarrierTable))
3918          .map(peerConfig -> peerConfig.getPeerId()).collect(Collectors.toList());
3919      try {
3920        List<String> batch = new ArrayList<>();
3921        for (String peer : peers) {
3922          for (byte[] regionName : regionNames) {
3923            batch.add(RegionInfo.encodeRegionName(regionName));
3924            if (batch.size() % 100 == 0) {
3925              queueStorage.removeLastSequenceIds(peer, batch);
3926              batch.clear();
3927            }
3928          }
3929          if (batch.size() > 0) {
3930            queueStorage.removeLastSequenceIds(peer, batch);
3931            batch.clear();
3932          }
3933        }
3934      } catch (ReplicationException re) {
3935        throw new IOException(re);
3936      }
3937    }
3938    for (byte[] regionName : regionNames) {
3939      meta.delete(new Delete(regionName).addFamily(HConstants.REPLICATION_BARRIER_FAMILY));
3940    }
3941    setShouldRerun();
3942  }
3943
3944  /**
3945   * ls -r for debugging purposes
3946   */
3947  void debugLsr(Path p) throws IOException {
3948    debugLsr(getConf(), p, errors);
3949  }
3950
3951  /**
3952   * ls -r for debugging purposes
3953   */
3954  public static void debugLsr(Configuration conf,
3955      Path p) throws IOException {
3956    debugLsr(conf, p, new PrintingErrorReporter());
3957  }
3958
3959  /**
3960   * ls -r for debugging purposes
3961   */
3962  public static void debugLsr(Configuration conf,
3963      Path p, HbckErrorReporter errors) throws IOException {
3964    if (!LOG.isDebugEnabled() || p == null) {
3965      return;
3966    }
3967    FileSystem fs = p.getFileSystem(conf);
3968
3969    if (!fs.exists(p)) {
3970      // nothing
3971      return;
3972    }
3973    errors.print(p.toString());
3974
3975    if (fs.isFile(p)) {
3976      return;
3977    }
3978
3979    if (fs.getFileStatus(p).isDirectory()) {
3980      FileStatus[] fss= fs.listStatus(p);
3981      for (FileStatus status : fss) {
3982        debugLsr(conf, status.getPath(), errors);
3983      }
3984    }
3985  }
3986}