001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.io.Closeable;
021import java.io.FileNotFoundException;
022import java.io.IOException;
023import java.io.InterruptedIOException;
024import java.io.PrintWriter;
025import java.io.StringWriter;
026import java.net.InetAddress;
027import java.net.URI;
028import java.util.ArrayList;
029import java.util.Collection;
030import java.util.Collections;
031import java.util.Comparator;
032import java.util.EnumSet;
033import java.util.HashMap;
034import java.util.HashSet;
035import java.util.Iterator;
036import java.util.List;
037import java.util.Locale;
038import java.util.Map;
039import java.util.Map.Entry;
040import java.util.Objects;
041import java.util.Optional;
042import java.util.Set;
043import java.util.SortedMap;
044import java.util.TreeMap;
045import java.util.Vector;
046import java.util.concurrent.Callable;
047import java.util.concurrent.ConcurrentSkipListMap;
048import java.util.concurrent.ExecutionException;
049import java.util.concurrent.ExecutorService;
050import java.util.concurrent.Executors;
051import java.util.concurrent.Future;
052import java.util.concurrent.FutureTask;
053import java.util.concurrent.ScheduledThreadPoolExecutor;
054import java.util.concurrent.TimeUnit;
055import java.util.concurrent.TimeoutException;
056import java.util.concurrent.atomic.AtomicBoolean;
057import java.util.concurrent.atomic.AtomicInteger;
058
059import org.apache.commons.io.IOUtils;
060import org.apache.commons.lang3.StringUtils;
061import org.apache.hadoop.conf.Configuration;
062import org.apache.hadoop.conf.Configured;
063import org.apache.hadoop.fs.FSDataOutputStream;
064import org.apache.hadoop.fs.FileStatus;
065import org.apache.hadoop.fs.FileSystem;
066import org.apache.hadoop.fs.Path;
067import org.apache.hadoop.fs.permission.FsAction;
068import org.apache.hadoop.fs.permission.FsPermission;
069import org.apache.hadoop.hbase.Abortable;
070import org.apache.hadoop.hbase.Cell;
071import org.apache.hadoop.hbase.CellUtil;
072import org.apache.hadoop.hbase.ClusterMetrics;
073import org.apache.hadoop.hbase.ClusterMetrics.Option;
074import org.apache.hadoop.hbase.HBaseConfiguration;
075import org.apache.hadoop.hbase.HBaseInterfaceAudience;
076import org.apache.hadoop.hbase.HConstants;
077import org.apache.hadoop.hbase.HRegionLocation;
078import org.apache.hadoop.hbase.KeyValue;
079import org.apache.hadoop.hbase.MasterNotRunningException;
080import org.apache.hadoop.hbase.MetaTableAccessor;
081import org.apache.hadoop.hbase.RegionLocations;
082import org.apache.hadoop.hbase.ServerName;
083import org.apache.hadoop.hbase.TableName;
084import org.apache.hadoop.hbase.ZooKeeperConnectionException;
085import org.apache.hadoop.hbase.client.Admin;
086import org.apache.hadoop.hbase.client.ClusterConnection;
087import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
088import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
089import org.apache.hadoop.hbase.client.Connection;
090import org.apache.hadoop.hbase.client.ConnectionFactory;
091import org.apache.hadoop.hbase.client.Delete;
092import org.apache.hadoop.hbase.client.Get;
093import org.apache.hadoop.hbase.client.Put;
094import org.apache.hadoop.hbase.client.RegionInfo;
095import org.apache.hadoop.hbase.client.RegionInfoBuilder;
096import org.apache.hadoop.hbase.client.RegionReplicaUtil;
097import org.apache.hadoop.hbase.client.Result;
098import org.apache.hadoop.hbase.client.RowMutations;
099import org.apache.hadoop.hbase.client.Table;
100import org.apache.hadoop.hbase.client.TableDescriptor;
101import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
102import org.apache.hadoop.hbase.client.TableState;
103import org.apache.hadoop.hbase.io.FileLink;
104import org.apache.hadoop.hbase.io.HFileLink;
105import org.apache.hadoop.hbase.io.hfile.CacheConfig;
106import org.apache.hadoop.hbase.io.hfile.HFile;
107import org.apache.hadoop.hbase.master.RegionState;
108import org.apache.hadoop.hbase.regionserver.HRegion;
109import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
110import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
111import org.apache.hadoop.hbase.security.AccessDeniedException;
112import org.apache.hadoop.hbase.security.UserProvider;
113import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
114import org.apache.hadoop.hbase.util.HbckErrorReporter.ERROR_CODE;
115import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
116import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
117import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
118import org.apache.hadoop.hbase.wal.WALSplitter;
119import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
120import org.apache.hadoop.hbase.zookeeper.ZKUtil;
121import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
122import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
123import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
124import org.apache.hadoop.ipc.RemoteException;
125import org.apache.hadoop.security.UserGroupInformation;
126import org.apache.hadoop.util.ReflectionUtils;
127import org.apache.hadoop.util.Tool;
128import org.apache.hadoop.util.ToolRunner;
129import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
130import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
131import org.apache.yetus.audience.InterfaceAudience;
132import org.apache.yetus.audience.InterfaceStability;
133import org.apache.zookeeper.KeeperException;
134import org.slf4j.Logger;
135import org.slf4j.LoggerFactory;
136
137import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
138import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
139import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
140
141import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
142import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
143
144/**
145 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
146 * table integrity problems in a corrupted HBase. This tool was written for hbase-1.x. It does not
147 * work with hbase-2.x; it can read state but is not allowed to change state; i.e. effect 'repair'.
148 * See hbck2 (HBASE-19121) for a hbck tool for hbase2.
149 *
150 * <p>
151 * Region consistency checks verify that hbase:meta, region deployment on region
152 * servers and the state of data in HDFS (.regioninfo files) all are in
153 * accordance.
154 * <p>
155 * Table integrity checks verify that all possible row keys resolve to exactly
156 * one region of a table.  This means there are no individual degenerate
157 * or backwards regions; no holes between regions; and that there are no
158 * overlapping regions.
159 * <p>
160 * The general repair strategy works in two phases:
161 * <ol>
162 * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
163 * <li> Repair Region Consistency with hbase:meta and assignments
164 * </ol>
165 * <p>
166 * For table integrity repairs, the tables' region directories are scanned
167 * for .regioninfo files.  Each table's integrity is then verified.  If there
168 * are any orphan regions (regions with no .regioninfo files) or holes, new
169 * regions are fabricated.  Backwards regions are sidelined as well as empty
170 * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
171 * a new region is created and all data is merged into the new region.
172 * <p>
173 * Table integrity repairs deal solely with HDFS and could potentially be done
174 * offline -- the hbase region servers or master do not need to be running.
175 * This phase can eventually be used to completely reconstruct the hbase:meta table in
176 * an offline fashion.
177 * <p>
178 * Region consistency requires three conditions -- 1) valid .regioninfo file
179 * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
180 * and 3) a region is deployed only at the regionserver that was assigned to
181 * with proper state in the master.
182 * <p>
183 * Region consistency repairs require hbase to be online so that hbck can
184 * contact the HBase master and region servers.  The hbck#connect() method must
185 * first be called successfully.  Much of the region consistency information
186 * is transient and less risky to repair.
187 * <p>
188 * If hbck is run from the command line, there are a handful of arguments that
189 * can be used to limit the kinds of repairs hbck will do.  See the code in
190 * {@link #printUsageAndExit()} for more details.
191 */
192@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
193@InterfaceStability.Evolving
194public class HBaseFsck extends Configured implements Closeable {
195  public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
196  public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
197  private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
198  private static boolean rsSupportsOffline = true;
199  private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
200  private static final int DEFAULT_MAX_MERGE = 5;
201
202  /**
203   * Here is where hbase-1.x used to default the lock for hbck1.
204   * It puts in place a lock when it goes to write/make changes.
205   */
206  @VisibleForTesting
207  public static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
208  private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
209  private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
210  private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
211  // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
212  // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
213  // AlreadyBeingCreatedException which is implies timeout on this operations up to
214  // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
215  private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
216  private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
217  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
218  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
219
220  /**********************
221   * Internal resources
222   **********************/
223  private static final Logger LOG = LoggerFactory.getLogger(HBaseFsck.class.getName());
224  private ClusterMetrics status;
225  private ClusterConnection connection;
226  private Admin admin;
227  private Table meta;
228  // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
229  protected ExecutorService executor;
230  private long startMillis = EnvironmentEdgeManager.currentTime();
231  private HFileCorruptionChecker hfcc;
232  private int retcode = 0;
233  private Path HBCK_LOCK_PATH;
234  private FSDataOutputStream hbckOutFd;
235  // This lock is to prevent cleanup of balancer resources twice between
236  // ShutdownHook and the main code. We cleanup only if the connect() is
237  // successful
238  private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
239
240  // Unsupported options in HBase 2.0+
241  private static final Set<String> unsupportedOptionsInV2 = Sets.newHashSet("-fix",
242      "-fixAssignments", "-fixMeta", "-fixHdfsHoles", "-fixHdfsOrphans", "-fixTableOrphans",
243      "-fixHdfsOverlaps", "-sidelineBigOverlaps", "-fixSplitParents", "-removeParents",
244      "-fixEmptyMetaCells", "-repair", "-repairHoles", "-maxOverlapsToSideline", "-maxMerge");
245
246  /***********
247   * Options
248   ***********/
249  private static boolean details = false; // do we display the full report
250  private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
251  private static boolean forceExclusive = false; // only this hbck can modify HBase
252  private boolean fixAssignments = false; // fix assignment errors?
253  private boolean fixMeta = false; // fix meta errors?
254  private boolean checkHdfs = true; // load and check fs consistency?
255  private boolean fixHdfsHoles = false; // fix fs holes?
256  private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
257  private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
258  private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
259  private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
260  private boolean fixSplitParents = false; // fix lingering split parents
261  private boolean removeParents = false; // remove split parents
262  private boolean fixReferenceFiles = false; // fix lingering reference store file
263  private boolean fixHFileLinks = false; // fix lingering HFileLinks
264  private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
265  private boolean fixReplication = false; // fix undeleted replication queues for removed peer
266  private boolean fixAny = false; // Set to true if any of the fix is required.
267
268  // limit checking/fixes to listed tables, if empty attempt to check/fix all
269  // hbase:meta are always checked
270  private Set<TableName> tablesIncluded = new HashSet<>();
271  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
272  // maximum number of overlapping regions to sideline
273  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
274  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
275  private Path sidelineDir = null;
276
277  private boolean rerun = false; // if we tried to fix something, rerun hbck
278  private static boolean summary = false; // if we want to print less output
279  private boolean checkMetaOnly = false;
280  private boolean checkRegionBoundaries = false;
281  private boolean ignorePreCheckPermission = false; // if pre-check permission
282
283  /*********
284   * State
285   *********/
286  final private HbckErrorReporter errors;
287  int fixes = 0;
288
289  /**
290   * This map contains the state of all hbck items.  It maps from encoded region
291   * name to HbckRegionInfo structure.  The information contained in HbckRegionInfo is used
292   * to detect and correct consistency (hdfs/meta/deployment) problems.
293   */
294  private TreeMap<String, HbckRegionInfo> regionInfoMap = new TreeMap<>();
295  // Empty regioninfo qualifiers in hbase:meta
296  private Set<Result> emptyRegionInfoQualifiers = new HashSet<>();
297
298  /**
299   * This map from Tablename -> TableInfo contains the structures necessary to
300   * detect table consistency problems (holes, dupes, overlaps).  It is sorted
301   * to prevent dupes.
302   *
303   * If tablesIncluded is empty, this map contains all tables.
304   * Otherwise, it contains only meta tables and tables in tablesIncluded,
305   * unless checkMetaOnly is specified, in which case, it contains only
306   * the meta table
307   */
308  private SortedMap<TableName, HbckTableInfo> tablesInfo = new ConcurrentSkipListMap<>();
309
310  /**
311   * When initially looking at HDFS, we attempt to find any orphaned data.
312   */
313  private List<HbckRegionInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<>());
314
315  private Map<TableName, Set<String>> orphanTableDirs = new HashMap<>();
316  private Map<TableName, TableState> tableStates = new HashMap<>();
317  private final RetryCounterFactory lockFileRetryCounterFactory;
318  private final RetryCounterFactory createZNodeRetryCounterFactory;
319
320  private Map<TableName, Set<String>> skippedRegions = new HashMap<>();
321
322  private ZKWatcher zkw = null;
323  private String hbckEphemeralNodePath = null;
324  private boolean hbckZodeCreated = false;
325
326  /**
327   * Constructor
328   *
329   * @param conf Configuration object
330   * @throws MasterNotRunningException if the master is not running
331   * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
332   */
333  public HBaseFsck(Configuration conf) throws MasterNotRunningException,
334      ZooKeeperConnectionException, IOException, ClassNotFoundException {
335    this(conf, createThreadPool(conf));
336  }
337
338  private static ExecutorService createThreadPool(Configuration conf) {
339    int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
340    return new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
341  }
342
343  /**
344   * Constructor
345   *
346   * @param conf
347   *          Configuration object
348   * @throws MasterNotRunningException
349   *           if the master is not running
350   * @throws ZooKeeperConnectionException
351   *           if unable to connect to ZooKeeper
352   */
353  public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
354      ZooKeeperConnectionException, IOException, ClassNotFoundException {
355    super(conf);
356    errors = getErrorReporter(getConf());
357    this.executor = exec;
358    lockFileRetryCounterFactory = createLockRetryCounterFactory(getConf());
359    createZNodeRetryCounterFactory = createZnodeRetryCounterFactory(getConf());
360    zkw = createZooKeeperWatcher();
361  }
362
363  /**
364   * @return A retry counter factory configured for retrying lock file creation.
365   */
366  public static RetryCounterFactory createLockRetryCounterFactory(Configuration conf) {
367    return new RetryCounterFactory(
368        conf.getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
369        conf.getInt("hbase.hbck.lockfile.attempt.sleep.interval",
370            DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
371        conf.getInt("hbase.hbck.lockfile.attempt.maxsleeptime",
372            DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
373  }
374
375  /**
376   * @return A retry counter factory configured for retrying znode creation.
377   */
378  private static RetryCounterFactory createZnodeRetryCounterFactory(Configuration conf) {
379    return new RetryCounterFactory(
380        conf.getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
381        conf.getInt("hbase.hbck.createznode.attempt.sleep.interval",
382            DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
383        conf.getInt("hbase.hbck.createznode.attempt.maxsleeptime",
384            DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
385  }
386
387  /**
388   * @return Return the tmp dir this tool writes too.
389   */
390  @VisibleForTesting
391  public static Path getTmpDir(Configuration conf) throws IOException {
392    return new Path(FSUtils.getRootDir(conf), HConstants.HBASE_TEMP_DIRECTORY);
393  }
394
395  private static class FileLockCallable implements Callable<FSDataOutputStream> {
396    RetryCounter retryCounter;
397    private final Configuration conf;
398    private Path hbckLockPath = null;
399
400    public FileLockCallable(Configuration conf, RetryCounter retryCounter) {
401      this.retryCounter = retryCounter;
402      this.conf = conf;
403    }
404
405    /**
406     * @return Will be <code>null</code> unless you call {@link #call()}
407     */
408    Path getHbckLockPath() {
409      return this.hbckLockPath;
410    }
411
412    @Override
413    public FSDataOutputStream call() throws IOException {
414      try {
415        FileSystem fs = FSUtils.getCurrentFileSystem(this.conf);
416        FsPermission defaultPerms = FSUtils.getFilePermissions(fs, this.conf,
417            HConstants.DATA_FILE_UMASK_KEY);
418        Path tmpDir = getTmpDir(conf);
419        this.hbckLockPath = new Path(tmpDir, HBCK_LOCK_FILE);
420        fs.mkdirs(tmpDir);
421        final FSDataOutputStream out = createFileWithRetries(fs, this.hbckLockPath, defaultPerms);
422        out.writeBytes(InetAddress.getLocalHost().toString());
423        // Add a note into the file we write on why hbase2 is writing out an hbck1 lock file.
424        out.writeBytes(" Written by an hbase-2.x Master to block an " +
425            "attempt by an hbase-1.x HBCK tool making modification to state. " +
426            "See 'HBCK must match HBase server version' in the hbase refguide.");
427        out.flush();
428        return out;
429      } catch(RemoteException e) {
430        if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
431          return null;
432        } else {
433          throw e;
434        }
435      }
436    }
437
438    private FSDataOutputStream createFileWithRetries(final FileSystem fs,
439        final Path hbckLockFilePath, final FsPermission defaultPerms)
440        throws IOException {
441      IOException exception = null;
442      do {
443        try {
444          return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
445        } catch (IOException ioe) {
446          LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
447              + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
448              + retryCounter.getMaxAttempts());
449          LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(),
450              ioe);
451          try {
452            exception = ioe;
453            retryCounter.sleepUntilNextRetry();
454          } catch (InterruptedException ie) {
455            throw (InterruptedIOException) new InterruptedIOException(
456                "Can't create lock file " + hbckLockFilePath.getName())
457            .initCause(ie);
458          }
459        }
460      } while (retryCounter.shouldRetry());
461
462      throw exception;
463    }
464  }
465
466  /**
467   * This method maintains a lock using a file. If the creation fails we return null
468   *
469   * @return FSDataOutputStream object corresponding to the newly opened lock file
470   * @throws IOException if IO failure occurs
471   */
472  public static Pair<Path, FSDataOutputStream> checkAndMarkRunningHbck(Configuration conf,
473      RetryCounter retryCounter) throws IOException {
474    FileLockCallable callable = new FileLockCallable(conf, retryCounter);
475    ExecutorService executor = Executors.newFixedThreadPool(1);
476    FutureTask<FSDataOutputStream> futureTask = new FutureTask<>(callable);
477    executor.execute(futureTask);
478    final int timeoutInSeconds = conf.getInt(
479      "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
480    FSDataOutputStream stream = null;
481    try {
482      stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
483    } catch (ExecutionException ee) {
484      LOG.warn("Encountered exception when opening lock file", ee);
485    } catch (InterruptedException ie) {
486      LOG.warn("Interrupted when opening lock file", ie);
487      Thread.currentThread().interrupt();
488    } catch (TimeoutException exception) {
489      // took too long to obtain lock
490      LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
491      futureTask.cancel(true);
492    } finally {
493      executor.shutdownNow();
494    }
495    return new Pair<Path, FSDataOutputStream>(callable.getHbckLockPath(), stream);
496  }
497
498  private void unlockHbck() {
499    if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) {
500      RetryCounter retryCounter = lockFileRetryCounterFactory.create();
501      do {
502        try {
503          IOUtils.closeQuietly(hbckOutFd);
504          FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
505          LOG.info("Finishing hbck");
506          return;
507        } catch (IOException ioe) {
508          LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
509              + (retryCounter.getAttemptTimes() + 1) + " of "
510              + retryCounter.getMaxAttempts());
511          LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
512          try {
513            retryCounter.sleepUntilNextRetry();
514          } catch (InterruptedException ie) {
515            Thread.currentThread().interrupt();
516            LOG.warn("Interrupted while deleting lock file" +
517                HBCK_LOCK_PATH);
518            return;
519          }
520        }
521      } while (retryCounter.shouldRetry());
522    }
523  }
524
525  /**
526   * To repair region consistency, one must call connect() in order to repair
527   * online state.
528   */
529  public void connect() throws IOException {
530
531    if (isExclusive()) {
532      // Grab the lock
533      Pair<Path, FSDataOutputStream> pair =
534          checkAndMarkRunningHbck(getConf(), this.lockFileRetryCounterFactory.create());
535      HBCK_LOCK_PATH = pair.getFirst();
536      this.hbckOutFd = pair.getSecond();
537      if (hbckOutFd == null) {
538        setRetCode(-1);
539        LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " +
540            "[If you are sure no other instance is running, delete the lock file " +
541            HBCK_LOCK_PATH + " and rerun the tool]");
542        throw new IOException("Duplicate hbck - Abort");
543      }
544
545      // Make sure to cleanup the lock
546      hbckLockCleanup.set(true);
547    }
548
549
550    // Add a shutdown hook to this thread, in case user tries to
551    // kill the hbck with a ctrl-c, we want to cleanup the lock so that
552    // it is available for further calls
553    Runtime.getRuntime().addShutdownHook(new Thread() {
554      @Override
555      public void run() {
556        IOUtils.closeQuietly(HBaseFsck.this);
557        cleanupHbckZnode();
558        unlockHbck();
559      }
560    });
561
562    LOG.info("Launching hbck");
563
564    connection = (ClusterConnection)ConnectionFactory.createConnection(getConf());
565    admin = connection.getAdmin();
566    meta = connection.getTable(TableName.META_TABLE_NAME);
567    status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS,
568      Option.DEAD_SERVERS, Option.MASTER, Option.BACKUP_MASTERS,
569      Option.REGIONS_IN_TRANSITION, Option.HBASE_VERSION));
570  }
571
572  /**
573   * Get deployed regions according to the region servers.
574   */
575  private void loadDeployedRegions() throws IOException, InterruptedException {
576    // From the master, get a list of all known live region servers
577    Collection<ServerName> regionServers = status.getLiveServerMetrics().keySet();
578    errors.print("Number of live region servers: " + regionServers.size());
579    if (details) {
580      for (ServerName rsinfo: regionServers) {
581        errors.print("  " + rsinfo.getServerName());
582      }
583    }
584
585    // From the master, get a list of all dead region servers
586    Collection<ServerName> deadRegionServers = status.getDeadServerNames();
587    errors.print("Number of dead region servers: " + deadRegionServers.size());
588    if (details) {
589      for (ServerName name: deadRegionServers) {
590        errors.print("  " + name);
591      }
592    }
593
594    // Print the current master name and state
595    errors.print("Master: " + status.getMasterName());
596
597    // Print the list of all backup masters
598    Collection<ServerName> backupMasters = status.getBackupMasterNames();
599    errors.print("Number of backup masters: " + backupMasters.size());
600    if (details) {
601      for (ServerName name: backupMasters) {
602        errors.print("  " + name);
603      }
604    }
605
606    errors.print("Average load: " + status.getAverageLoad());
607    errors.print("Number of requests: " + status.getRequestCount());
608    errors.print("Number of regions: " + status.getRegionCount());
609
610    List<RegionState> rits = status.getRegionStatesInTransition();
611    errors.print("Number of regions in transition: " + rits.size());
612    if (details) {
613      for (RegionState state: rits) {
614        errors.print("  " + state.toDescriptiveString());
615      }
616    }
617
618    // Determine what's deployed
619    processRegionServers(regionServers);
620  }
621
622  /**
623   * Clear the current state of hbck.
624   */
625  private void clearState() {
626    // Make sure regionInfo is empty before starting
627    fixes = 0;
628    regionInfoMap.clear();
629    emptyRegionInfoQualifiers.clear();
630    tableStates.clear();
631    errors.clear();
632    tablesInfo.clear();
633    orphanHdfsDirs.clear();
634    skippedRegions.clear();
635  }
636
637  /**
638   * This repair method analyzes hbase data in hdfs and repairs it to satisfy
639   * the table integrity rules.  HBase doesn't need to be online for this
640   * operation to work.
641   */
642  public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
643    // Initial pass to fix orphans.
644    if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
645        || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
646      LOG.info("Loading regioninfos HDFS");
647      // if nothing is happening this should always complete in two iterations.
648      int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
649      int curIter = 0;
650      do {
651        clearState(); // clears hbck state and reset fixes to 0 and.
652        // repair what's on HDFS
653        restoreHdfsIntegrity();
654        curIter++;// limit the number of iterations.
655      } while (fixes > 0 && curIter <= maxIterations);
656
657      // Repairs should be done in the first iteration and verification in the second.
658      // If there are more than 2 passes, something funny has happened.
659      if (curIter > 2) {
660        if (curIter == maxIterations) {
661          LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
662              + "Tables integrity may not be fully repaired!");
663        } else {
664          LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
665        }
666      }
667    }
668  }
669
670  /**
671   * This repair method requires the cluster to be online since it contacts
672   * region servers and the masters.  It makes each region's state in HDFS, in
673   * hbase:meta, and deployments consistent.
674   *
675   * @return If &gt; 0 , number of errors detected, if &lt; 0 there was an unrecoverable
676   *     error.  If 0, we have a clean hbase.
677   */
678  public int onlineConsistencyRepair() throws IOException, KeeperException,
679    InterruptedException {
680
681    // get regions according to what is online on each RegionServer
682    loadDeployedRegions();
683    // check whether hbase:meta is deployed and online
684    recordMetaRegion();
685    // Check if hbase:meta is found only once and in the right place
686    if (!checkMetaRegion()) {
687      String errorMsg = "hbase:meta table is not consistent. ";
688      if (shouldFixAssignments()) {
689        errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
690      } else {
691        errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
692      }
693      errors.reportError(errorMsg + " Exiting...");
694      return -2;
695    }
696    // Not going with further consistency check for tables when hbase:meta itself is not consistent.
697    LOG.info("Loading regionsinfo from the hbase:meta table");
698    boolean success = loadMetaEntries();
699    if (!success) return -1;
700
701    // Empty cells in hbase:meta?
702    reportEmptyMetaCells();
703
704    // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
705    if (shouldFixEmptyMetaCells()) {
706      fixEmptyMetaCells();
707    }
708
709    // get a list of all tables that have not changed recently.
710    if (!checkMetaOnly) {
711      reportTablesInFlux();
712    }
713
714    // Get disabled tables states
715    loadTableStates();
716
717    // load regiondirs and regioninfos from HDFS
718    if (shouldCheckHdfs()) {
719      LOG.info("Loading region directories from HDFS");
720      loadHdfsRegionDirs();
721      LOG.info("Loading region information from HDFS");
722      loadHdfsRegionInfos();
723    }
724
725    // fix the orphan tables
726    fixOrphanTables();
727
728    LOG.info("Checking and fixing region consistency");
729    // Check and fix consistency
730    checkAndFixConsistency();
731
732    // Check integrity (does not fix)
733    checkIntegrity();
734    return errors.getErrorList().size();
735  }
736
737  /**
738   * This method maintains an ephemeral znode. If the creation fails we return false or throw
739   * exception
740   *
741   * @return true if creating znode succeeds; false otherwise
742   * @throws IOException if IO failure occurs
743   */
744  private boolean setMasterInMaintenanceMode() throws IOException {
745    RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
746    hbckEphemeralNodePath = ZNodePaths.joinZNode(
747      zkw.znodePaths.masterMaintZNode,
748      "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
749    do {
750      try {
751        hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
752        if (hbckZodeCreated) {
753          break;
754        }
755      } catch (KeeperException e) {
756        if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
757           throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
758        }
759        // fall through and retry
760      }
761
762      LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try=" +
763          (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
764
765      try {
766        retryCounter.sleepUntilNextRetry();
767      } catch (InterruptedException ie) {
768        throw (InterruptedIOException) new InterruptedIOException(
769              "Can't create znode " + hbckEphemeralNodePath).initCause(ie);
770      }
771    } while (retryCounter.shouldRetry());
772    return hbckZodeCreated;
773  }
774
775  private void cleanupHbckZnode() {
776    try {
777      if (zkw != null && hbckZodeCreated) {
778        ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
779        hbckZodeCreated = false;
780      }
781    } catch (KeeperException e) {
782      // Ignore
783      if (!e.code().equals(KeeperException.Code.NONODE)) {
784        LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
785      }
786    }
787  }
788
789  /**
790   * Contacts the master and prints out cluster-wide information
791   * @return 0 on success, non-zero on failure
792   */
793  public int onlineHbck()
794      throws IOException, KeeperException, InterruptedException {
795    // print hbase server version
796    errors.print("Version: " + status.getHBaseVersion());
797
798    // Clean start
799    clearState();
800    // Do offline check and repair first
801    offlineHdfsIntegrityRepair();
802    offlineReferenceFileRepair();
803    offlineHLinkFileRepair();
804    // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
805    // hbck, it is likely that hbck would be misled and report transient errors.  Therefore, it
806    // is better to set Master into maintenance mode during online hbck.
807    //
808    if (!setMasterInMaintenanceMode()) {
809      LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
810        + "error.  Please run HBCK multiple times to reduce the chance of transient error.");
811    }
812
813    onlineConsistencyRepair();
814
815    if (checkRegionBoundaries) {
816      checkRegionBoundaries();
817    }
818
819    checkAndFixReplication();
820
821    // Remove the hbck znode
822    cleanupHbckZnode();
823
824    // Remove the hbck lock
825    unlockHbck();
826
827    // Print table summary
828    printTableSummary(tablesInfo);
829    return errors.summarize();
830  }
831
832  public static byte[] keyOnly(byte[] b) {
833    if (b == null)
834      return b;
835    int rowlength = Bytes.toShort(b, 0);
836    byte[] result = new byte[rowlength];
837    System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
838    return result;
839  }
840
841  @Override
842  public void close() throws IOException {
843    try {
844      cleanupHbckZnode();
845      unlockHbck();
846    } catch (Exception io) {
847      LOG.warn(io.toString(), io);
848    } finally {
849      if (zkw != null) {
850        zkw.close();
851        zkw = null;
852      }
853      IOUtils.closeQuietly(admin);
854      IOUtils.closeQuietly(meta);
855      IOUtils.closeQuietly(connection);
856    }
857  }
858
859  private static class RegionBoundariesInformation {
860    public byte [] regionName;
861    public byte [] metaFirstKey;
862    public byte [] metaLastKey;
863    public byte [] storesFirstKey;
864    public byte [] storesLastKey;
865    @Override
866    public String toString () {
867      return "regionName=" + Bytes.toStringBinary(regionName) +
868             "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
869             "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
870             "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
871             "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
872    }
873  }
874
875  public void checkRegionBoundaries() {
876    try {
877      ByteArrayComparator comparator = new ByteArrayComparator();
878      List<RegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
879      final RegionBoundariesInformation currentRegionBoundariesInformation =
880          new RegionBoundariesInformation();
881      Path hbaseRoot = FSUtils.getRootDir(getConf());
882      for (RegionInfo regionInfo : regions) {
883        Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
884        currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
885        // For each region, get the start and stop key from the META and compare them to the
886        // same information from the Stores.
887        Path path = new Path(tableDir, regionInfo.getEncodedName());
888        FileSystem fs = path.getFileSystem(getConf());
889        FileStatus[] files = fs.listStatus(path);
890        // For all the column families in this region...
891        byte[] storeFirstKey = null;
892        byte[] storeLastKey = null;
893        for (FileStatus file : files) {
894          String fileName = file.getPath().toString();
895          fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
896          if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
897            FileStatus[] storeFiles = fs.listStatus(file.getPath());
898            // For all the stores in this column family.
899            for (FileStatus storeFile : storeFiles) {
900              HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(),
901                CacheConfig.DISABLED, true, getConf());
902              if ((reader.getFirstKey() != null)
903                  && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
904                      ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey()) > 0))) {
905                storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey().get()).getKey();
906              }
907              if ((reader.getLastKey() != null)
908                  && ((storeLastKey == null) || (comparator.compare(storeLastKey,
909                      ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey())) < 0)) {
910                storeLastKey = ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey();
911              }
912              reader.close();
913            }
914          }
915        }
916        currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
917        currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
918        currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
919        currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
920        if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
921          currentRegionBoundariesInformation.metaFirstKey = null;
922        if (currentRegionBoundariesInformation.metaLastKey.length == 0)
923          currentRegionBoundariesInformation.metaLastKey = null;
924
925        // For a region to be correct, we need the META start key to be smaller or equal to the
926        // smallest start key from all the stores, and the start key from the next META entry to
927        // be bigger than the last key from all the current stores. First region start key is null;
928        // Last region end key is null; some regions can be empty and not have any store.
929
930        boolean valid = true;
931        // Checking start key.
932        if ((currentRegionBoundariesInformation.storesFirstKey != null)
933            && (currentRegionBoundariesInformation.metaFirstKey != null)) {
934          valid = valid
935              && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
936                currentRegionBoundariesInformation.metaFirstKey) >= 0;
937        }
938        // Checking stop key.
939        if ((currentRegionBoundariesInformation.storesLastKey != null)
940            && (currentRegionBoundariesInformation.metaLastKey != null)) {
941          valid = valid
942              && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
943                currentRegionBoundariesInformation.metaLastKey) < 0;
944        }
945        if (!valid) {
946          errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
947            tablesInfo.get(regionInfo.getTable()));
948          LOG.warn("Region's boundaries not aligned between stores and META for:");
949          LOG.warn(Objects.toString(currentRegionBoundariesInformation));
950        }
951      }
952    } catch (IOException e) {
953      LOG.error(e.toString(), e);
954    }
955  }
956
957  /**
958   * Iterates through the list of all orphan/invalid regiondirs.
959   */
960  private void adoptHdfsOrphans(Collection<HbckRegionInfo> orphanHdfsDirs) throws IOException {
961    for (HbckRegionInfo hi : orphanHdfsDirs) {
962      LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
963      adoptHdfsOrphan(hi);
964    }
965  }
966
967  /**
968   * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
969   * these orphans by creating a new region, and moving the column families,
970   * recovered edits, WALs, into the new region dir.  We determine the region
971   * startkey and endkeys by looking at all of the hfiles inside the column
972   * families to identify the min and max keys. The resulting region will
973   * likely violate table integrity but will be dealt with by merging
974   * overlapping regions.
975   */
976  @SuppressWarnings("deprecation")
977  private void adoptHdfsOrphan(HbckRegionInfo hi) throws IOException {
978    Path p = hi.getHdfsRegionDir();
979    FileSystem fs = p.getFileSystem(getConf());
980    FileStatus[] dirs = fs.listStatus(p);
981    if (dirs == null) {
982      LOG.warn("Attempt to adopt orphan hdfs region skipped because no files present in " +
983          p + ". This dir could probably be deleted.");
984      return ;
985    }
986
987    TableName tableName = hi.getTableName();
988    HbckTableInfo tableInfo = tablesInfo.get(tableName);
989    Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
990    TableDescriptor template = tableInfo.getTableDescriptor();
991
992    // find min and max key values
993    Pair<byte[],byte[]> orphanRegionRange = null;
994    for (FileStatus cf : dirs) {
995      String cfName= cf.getPath().getName();
996      // TODO Figure out what the special dirs are
997      if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
998
999      FileStatus[] hfiles = fs.listStatus(cf.getPath());
1000      for (FileStatus hfile : hfiles) {
1001        byte[] start, end;
1002        HFile.Reader hf = null;
1003        try {
1004          hf = HFile.createReader(fs, hfile.getPath(), CacheConfig.DISABLED, true, getConf());
1005          hf.loadFileInfo();
1006          Optional<Cell> startKv = hf.getFirstKey();
1007          start = CellUtil.cloneRow(startKv.get());
1008          Optional<Cell> endKv = hf.getLastKey();
1009          end = CellUtil.cloneRow(endKv.get());
1010        } catch (IOException ioe) {
1011          LOG.warn("Problem reading orphan file " + hfile + ", skipping");
1012          continue;
1013        } catch (NullPointerException ioe) {
1014          LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
1015          continue;
1016        } finally {
1017          if (hf != null) {
1018            hf.close();
1019          }
1020        }
1021
1022        // expand the range to include the range of all hfiles
1023        if (orphanRegionRange == null) {
1024          // first range
1025          orphanRegionRange = new Pair<>(start, end);
1026        } else {
1027          // TODO add test
1028
1029          // expand range only if the hfile is wider.
1030          if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
1031            orphanRegionRange.setFirst(start);
1032          }
1033          if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
1034            orphanRegionRange.setSecond(end);
1035          }
1036        }
1037      }
1038    }
1039    if (orphanRegionRange == null) {
1040      LOG.warn("No data in dir " + p + ", sidelining data");
1041      fixes++;
1042      sidelineRegionDir(fs, hi);
1043      return;
1044    }
1045    LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
1046        Bytes.toString(orphanRegionRange.getSecond()) + ")");
1047
1048    // create new region on hdfs. move data into place.
1049    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(template.getTableName())
1050        .setStartKey(orphanRegionRange.getFirst())
1051        .setEndKey(Bytes.add(orphanRegionRange.getSecond(), new byte[1]))
1052        .build();
1053    LOG.info("Creating new region : " + regionInfo);
1054    HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), regionInfo, template);
1055    Path target = region.getRegionFileSystem().getRegionDir();
1056
1057    // rename all the data to new region
1058    mergeRegionDirs(target, hi);
1059    fixes++;
1060  }
1061
1062  /**
1063   * This method determines if there are table integrity errors in HDFS.  If
1064   * there are errors and the appropriate "fix" options are enabled, the method
1065   * will first correct orphan regions making them into legit regiondirs, and
1066   * then reload to merge potentially overlapping regions.
1067   *
1068   * @return number of table integrity errors found
1069   */
1070  private int restoreHdfsIntegrity() throws IOException, InterruptedException {
1071    // Determine what's on HDFS
1072    LOG.info("Loading HBase regioninfo from HDFS...");
1073    loadHdfsRegionDirs(); // populating regioninfo table.
1074
1075    int errs = errors.getErrorList().size();
1076    // First time just get suggestions.
1077    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1078    checkHdfsIntegrity(false, false);
1079
1080    if (errors.getErrorList().size() == errs) {
1081      LOG.info("No integrity errors.  We are done with this phase. Glorious.");
1082      return 0;
1083    }
1084
1085    if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
1086      adoptHdfsOrphans(orphanHdfsDirs);
1087      // TODO optimize by incrementally adding instead of reloading.
1088    }
1089
1090    // Make sure there are no holes now.
1091    if (shouldFixHdfsHoles()) {
1092      clearState(); // this also resets # fixes.
1093      loadHdfsRegionDirs();
1094      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1095      tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1096    }
1097
1098    // Now we fix overlaps
1099    if (shouldFixHdfsOverlaps()) {
1100      // second pass we fix overlaps.
1101      clearState(); // this also resets # fixes.
1102      loadHdfsRegionDirs();
1103      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1104      tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1105    }
1106
1107    return errors.getErrorList().size();
1108  }
1109
1110  /**
1111   * Scan all the store file names to find any lingering reference files,
1112   * which refer to some none-exiting files. If "fix" option is enabled,
1113   * any lingering reference file will be sidelined if found.
1114   * <p>
1115   * Lingering reference file prevents a region from opening. It has to
1116   * be fixed before a cluster can start properly.
1117   */
1118  private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1119    clearState();
1120    Configuration conf = getConf();
1121    Path hbaseRoot = FSUtils.getRootDir(conf);
1122    FileSystem fs = hbaseRoot.getFileSystem(conf);
1123    LOG.info("Computing mapping of all store files");
1124    Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1125      new FSUtils.ReferenceFileFilter(fs), executor, errors);
1126    errors.print("");
1127    LOG.info("Validating mapping using HDFS state");
1128    for (Path path: allFiles.values()) {
1129      Path referredToFile = StoreFileInfo.getReferredToFile(path);
1130      if (fs.exists(referredToFile)) continue;  // good, expected
1131
1132      // Found a lingering reference file
1133      errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1134        "Found lingering reference file " + path);
1135      if (!shouldFixReferenceFiles()) continue;
1136
1137      // Now, trying to fix it since requested
1138      boolean success = false;
1139      String pathStr = path.toString();
1140
1141      // A reference file path should be like
1142      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1143      // Up 5 directories to get the root folder.
1144      // So the file will be sidelined to a similar folder structure.
1145      int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1146      for (int i = 0; index > 0 && i < 5; i++) {
1147        index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1148      }
1149      if (index > 0) {
1150        Path rootDir = getSidelineDir();
1151        Path dst = new Path(rootDir, pathStr.substring(index + 1));
1152        fs.mkdirs(dst.getParent());
1153        LOG.info("Trying to sideline reference file "
1154          + path + " to " + dst);
1155        setShouldRerun();
1156
1157        success = fs.rename(path, dst);
1158        debugLsr(dst);
1159
1160      }
1161      if (!success) {
1162        LOG.error("Failed to sideline reference file " + path);
1163      }
1164    }
1165  }
1166
1167  /**
1168   * Scan all the store file names to find any lingering HFileLink files,
1169   * which refer to some none-exiting files. If "fix" option is enabled,
1170   * any lingering HFileLink file will be sidelined if found.
1171   */
1172  private void offlineHLinkFileRepair() throws IOException, InterruptedException {
1173    Configuration conf = getConf();
1174    Path hbaseRoot = FSUtils.getRootDir(conf);
1175    FileSystem fs = hbaseRoot.getFileSystem(conf);
1176    LOG.info("Computing mapping of all link files");
1177    Map<String, Path> allFiles = FSUtils
1178        .getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors);
1179    errors.print("");
1180
1181    LOG.info("Validating mapping using HDFS state");
1182    for (Path path : allFiles.values()) {
1183      // building HFileLink object to gather locations
1184      HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
1185      if (actualLink.exists(fs)) continue; // good, expected
1186
1187      // Found a lingering HFileLink
1188      errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
1189      if (!shouldFixHFileLinks()) continue;
1190
1191      // Now, trying to fix it since requested
1192      setShouldRerun();
1193
1194      // An HFileLink path should be like
1195      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
1196      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1197      boolean success = sidelineFile(fs, hbaseRoot, path);
1198
1199      if (!success) {
1200        LOG.error("Failed to sideline HFileLink file " + path);
1201      }
1202
1203      // An HFileLink backreference path should be like
1204      // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
1205      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1206      Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil
1207              .getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()),
1208                  HFileLink.getReferencedRegionName(path.getName().toString()),
1209                  path.getParent().getName()),
1210          HFileLink.getReferencedHFileName(path.getName().toString()));
1211      success = sidelineFile(fs, hbaseRoot, backRefPath);
1212
1213      if (!success) {
1214        LOG.error("Failed to sideline HFileLink backreference file " + path);
1215      }
1216    }
1217  }
1218
1219  private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
1220    URI uri = hbaseRoot.toUri().relativize(path.toUri());
1221    if (uri.isAbsolute()) return false;
1222    String relativePath = uri.getPath();
1223    Path rootDir = getSidelineDir();
1224    Path dst = new Path(rootDir, relativePath);
1225    boolean pathCreated = fs.mkdirs(dst.getParent());
1226    if (!pathCreated) {
1227      LOG.error("Failed to create path: " + dst.getParent());
1228      return false;
1229    }
1230    LOG.info("Trying to sideline file " + path + " to " + dst);
1231    return fs.rename(path, dst);
1232  }
1233
1234  /**
1235   * TODO -- need to add tests for this.
1236   */
1237  private void reportEmptyMetaCells() {
1238    errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1239      emptyRegionInfoQualifiers.size());
1240    if (details) {
1241      for (Result r: emptyRegionInfoQualifiers) {
1242        errors.print("  " + r);
1243      }
1244    }
1245  }
1246
1247  /**
1248   * TODO -- need to add tests for this.
1249   */
1250  private void reportTablesInFlux() {
1251    AtomicInteger numSkipped = new AtomicInteger(0);
1252    TableDescriptor[] allTables = getTables(numSkipped);
1253    errors.print("Number of Tables: " + allTables.length);
1254    if (details) {
1255      if (numSkipped.get() > 0) {
1256        errors.detail("Number of Tables in flux: " + numSkipped.get());
1257      }
1258      for (TableDescriptor td : allTables) {
1259        errors.detail("  Table: " + td.getTableName() + "\t" +
1260                           (td.isReadOnly() ? "ro" : "rw") + "\t" +
1261                            (td.isMetaRegion() ? "META" : "    ") + "\t" +
1262                           " families: " + td.getColumnFamilyCount());
1263      }
1264    }
1265  }
1266
1267  public HbckErrorReporter getErrors() {
1268    return errors;
1269  }
1270
1271  /**
1272   * Populate hbi's from regionInfos loaded from file system.
1273   */
1274  private SortedMap<TableName, HbckTableInfo> loadHdfsRegionInfos()
1275      throws IOException, InterruptedException {
1276    tablesInfo.clear(); // regenerating the data
1277    // generate region split structure
1278    Collection<HbckRegionInfo> hbckRegionInfos = regionInfoMap.values();
1279
1280    // Parallelized read of .regioninfo files.
1281    List<WorkItemHdfsRegionInfo> hbis = new ArrayList<>(hbckRegionInfos.size());
1282    List<Future<Void>> hbiFutures;
1283
1284    for (HbckRegionInfo hbi : hbckRegionInfos) {
1285      WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1286      hbis.add(work);
1287    }
1288
1289    // Submit and wait for completion
1290    hbiFutures = executor.invokeAll(hbis);
1291
1292    for(int i=0; i<hbiFutures.size(); i++) {
1293      WorkItemHdfsRegionInfo work = hbis.get(i);
1294      Future<Void> f = hbiFutures.get(i);
1295      try {
1296        f.get();
1297      } catch(ExecutionException e) {
1298        LOG.warn("Failed to read .regioninfo file for region " +
1299              work.hbi.getRegionNameAsString(), e.getCause());
1300      }
1301    }
1302
1303    Path hbaseRoot = FSUtils.getRootDir(getConf());
1304    FileSystem fs = hbaseRoot.getFileSystem(getConf());
1305    // serialized table info gathering.
1306    for (HbckRegionInfo hbi: hbckRegionInfos) {
1307
1308      if (hbi.getHdfsHRI() == null) {
1309        // was an orphan
1310        continue;
1311      }
1312
1313
1314      // get table name from hdfs, populate various HBaseFsck tables.
1315      TableName tableName = hbi.getTableName();
1316      if (tableName == null) {
1317        // There was an entry in hbase:meta not in the HDFS?
1318        LOG.warn("tableName was null for: " + hbi);
1319        continue;
1320      }
1321
1322      HbckTableInfo modTInfo = tablesInfo.get(tableName);
1323      if (modTInfo == null) {
1324        // only executed once per table.
1325        modTInfo = new HbckTableInfo(tableName, this);
1326        tablesInfo.put(tableName, modTInfo);
1327        try {
1328          TableDescriptor htd =
1329              FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1330          modTInfo.htds.add(htd);
1331        } catch (IOException ioe) {
1332          if (!orphanTableDirs.containsKey(tableName)) {
1333            LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1334            //should only report once for each table
1335            errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1336                "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1337            Set<String> columns = new HashSet<>();
1338            orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1339          }
1340        }
1341      }
1342      if (!hbi.isSkipChecks()) {
1343        modTInfo.addRegionInfo(hbi);
1344      }
1345    }
1346
1347    loadTableInfosForTablesWithNoRegion();
1348    errors.print("");
1349
1350    return tablesInfo;
1351  }
1352
1353  /**
1354   * To get the column family list according to the column family dirs
1355   * @param columns
1356   * @param hbi
1357   * @return a set of column families
1358   * @throws IOException
1359   */
1360  private Set<String> getColumnFamilyList(Set<String> columns, HbckRegionInfo hbi)
1361      throws IOException {
1362    Path regionDir = hbi.getHdfsRegionDir();
1363    FileSystem fs = regionDir.getFileSystem(getConf());
1364    FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1365    for (FileStatus subdir : subDirs) {
1366      String columnfamily = subdir.getPath().getName();
1367      columns.add(columnfamily);
1368    }
1369    return columns;
1370  }
1371
1372  /**
1373   * To fabricate a .tableinfo file with following contents<br>
1374   * 1. the correct tablename <br>
1375   * 2. the correct colfamily list<br>
1376   * 3. the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1377   * @throws IOException
1378   */
1379  private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1380      Set<String> columns) throws IOException {
1381    if (columns ==null || columns.isEmpty()) return false;
1382    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
1383    for (String columnfamimly : columns) {
1384      builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(columnfamimly));
1385    }
1386    fstd.createTableDescriptor(builder.build(), true);
1387    return true;
1388  }
1389
1390  /**
1391   * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1392   * @throws IOException
1393   */
1394  public void fixEmptyMetaCells() throws IOException {
1395    if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1396      LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1397      for (Result region : emptyRegionInfoQualifiers) {
1398        deleteMetaRegion(region.getRow());
1399        errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1400      }
1401      emptyRegionInfoQualifiers.clear();
1402    }
1403  }
1404
1405  /**
1406   * To fix orphan table by creating a .tableinfo file under tableDir <br>
1407   * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1408   * 2. else create a default .tableinfo file with following items<br>
1409   * &nbsp;2.1 the correct tablename <br>
1410   * &nbsp;2.2 the correct colfamily list<br>
1411   * &nbsp;2.3 the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1412   * @throws IOException
1413   */
1414  public void fixOrphanTables() throws IOException {
1415    if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1416
1417      List<TableName> tmpList = new ArrayList<>(orphanTableDirs.keySet().size());
1418      tmpList.addAll(orphanTableDirs.keySet());
1419      TableDescriptor[] htds = getTableDescriptors(tmpList);
1420      Iterator<Entry<TableName, Set<String>>> iter =
1421          orphanTableDirs.entrySet().iterator();
1422      int j = 0;
1423      int numFailedCase = 0;
1424      FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1425      while (iter.hasNext()) {
1426        Entry<TableName, Set<String>> entry =
1427            iter.next();
1428        TableName tableName = entry.getKey();
1429        LOG.info("Trying to fix orphan table error: " + tableName);
1430        if (j < htds.length) {
1431          if (tableName.equals(htds[j].getTableName())) {
1432            TableDescriptor htd = htds[j];
1433            LOG.info("fixing orphan table: " + tableName + " from cache");
1434            fstd.createTableDescriptor(htd, true);
1435            j++;
1436            iter.remove();
1437          }
1438        } else {
1439          if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1440            LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1441            LOG.warn("Strongly recommend to modify the TableDescriptor if necessary for: " + tableName);
1442            iter.remove();
1443          } else {
1444            LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1445            numFailedCase++;
1446          }
1447        }
1448        fixes++;
1449      }
1450
1451      if (orphanTableDirs.isEmpty()) {
1452        // all orphanTableDirs are luckily recovered
1453        // re-run doFsck after recovering the .tableinfo file
1454        setShouldRerun();
1455        LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1456      } else if (numFailedCase > 0) {
1457        LOG.error("Failed to fix " + numFailedCase
1458            + " OrphanTables with default .tableinfo files");
1459      }
1460
1461    }
1462    //cleanup the list
1463    orphanTableDirs.clear();
1464
1465  }
1466
1467  /**
1468   * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1469   */
1470  private void logParallelMerge() {
1471    if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1472      LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1473          " false to run serially.");
1474    } else {
1475      LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
1476          " true to run in parallel.");
1477    }
1478  }
1479
1480  private SortedMap<TableName, HbckTableInfo> checkHdfsIntegrity(boolean fixHoles,
1481      boolean fixOverlaps) throws IOException {
1482    LOG.info("Checking HBase region split map from HDFS data...");
1483    logParallelMerge();
1484    for (HbckTableInfo tInfo : tablesInfo.values()) {
1485      TableIntegrityErrorHandler handler;
1486      if (fixHoles || fixOverlaps) {
1487        handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1488          fixHoles, fixOverlaps);
1489      } else {
1490        handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1491      }
1492      if (!tInfo.checkRegionChain(handler)) {
1493        // should dump info as well.
1494        errors.report("Found inconsistency in table " + tInfo.getName());
1495      }
1496    }
1497    return tablesInfo;
1498  }
1499
1500  Path getSidelineDir() throws IOException {
1501    if (sidelineDir == null) {
1502      Path hbaseDir = FSUtils.getRootDir(getConf());
1503      Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1504      sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1505          + startMillis);
1506    }
1507    return sidelineDir;
1508  }
1509
1510  /**
1511   * Sideline a region dir (instead of deleting it)
1512   */
1513  Path sidelineRegionDir(FileSystem fs, HbckRegionInfo hi) throws IOException {
1514    return sidelineRegionDir(fs, null, hi);
1515  }
1516
1517  /**
1518   * Sideline a region dir (instead of deleting it)
1519   *
1520   * @param parentDir if specified, the region will be sidelined to folder like
1521   *     {@literal .../parentDir/<table name>/<region name>}. The purpose is to group together
1522   *     similar regions sidelined, for example, those regions should be bulk loaded back later
1523   *     on. If NULL, it is ignored.
1524   */
1525  Path sidelineRegionDir(FileSystem fs,
1526      String parentDir, HbckRegionInfo hi) throws IOException {
1527    TableName tableName = hi.getTableName();
1528    Path regionDir = hi.getHdfsRegionDir();
1529
1530    if (!fs.exists(regionDir)) {
1531      LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1532      return null;
1533    }
1534
1535    Path rootDir = getSidelineDir();
1536    if (parentDir != null) {
1537      rootDir = new Path(rootDir, parentDir);
1538    }
1539    Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1540    Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1541    fs.mkdirs(sidelineRegionDir);
1542    boolean success = false;
1543    FileStatus[] cfs =  fs.listStatus(regionDir);
1544    if (cfs == null) {
1545      LOG.info("Region dir is empty: " + regionDir);
1546    } else {
1547      for (FileStatus cf : cfs) {
1548        Path src = cf.getPath();
1549        Path dst =  new Path(sidelineRegionDir, src.getName());
1550        if (fs.isFile(src)) {
1551          // simple file
1552          success = fs.rename(src, dst);
1553          if (!success) {
1554            String msg = "Unable to rename file " + src +  " to " + dst;
1555            LOG.error(msg);
1556            throw new IOException(msg);
1557          }
1558          continue;
1559        }
1560
1561        // is a directory.
1562        fs.mkdirs(dst);
1563
1564        LOG.info("Sidelining files from " + src + " into containing region " + dst);
1565        // FileSystem.rename is inconsistent with directories -- if the
1566        // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1567        // it moves the src into the dst dir resulting in (foo/a/b).  If
1568        // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1569        FileStatus[] hfiles = fs.listStatus(src);
1570        if (hfiles != null && hfiles.length > 0) {
1571          for (FileStatus hfile : hfiles) {
1572            success = fs.rename(hfile.getPath(), dst);
1573            if (!success) {
1574              String msg = "Unable to rename file " + src +  " to " + dst;
1575              LOG.error(msg);
1576              throw new IOException(msg);
1577            }
1578          }
1579        }
1580        LOG.debug("Sideline directory contents:");
1581        debugLsr(sidelineRegionDir);
1582      }
1583    }
1584
1585    LOG.info("Removing old region dir: " + regionDir);
1586    success = fs.delete(regionDir, true);
1587    if (!success) {
1588      String msg = "Unable to delete dir " + regionDir;
1589      LOG.error(msg);
1590      throw new IOException(msg);
1591    }
1592    return sidelineRegionDir;
1593  }
1594
1595  /**
1596   * Load the list of disabled tables in ZK into local set.
1597   * @throws ZooKeeperConnectionException
1598   * @throws IOException
1599   */
1600  private void loadTableStates()
1601  throws IOException {
1602    tableStates = MetaTableAccessor.getTableStates(connection);
1603    // Add hbase:meta so this tool keeps working. In hbase2, meta is always enabled though it
1604    // has no entry in the table states. HBCK doesn't work right w/ hbase2 but just do this in
1605    // meantime.
1606    this.tableStates.put(TableName.META_TABLE_NAME,
1607        new TableState(TableName.META_TABLE_NAME, TableState.State.ENABLED));
1608  }
1609
1610  /**
1611   * Check if the specified region's table is disabled.
1612   * @param tableName table to check status of
1613   */
1614  boolean isTableDisabled(TableName tableName) {
1615    return tableStates.containsKey(tableName)
1616        && tableStates.get(tableName)
1617        .inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1618  }
1619
1620  /**
1621   * Scan HDFS for all regions, recording their information into
1622   * regionInfoMap
1623   */
1624  public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1625    Path rootDir = FSUtils.getRootDir(getConf());
1626    FileSystem fs = rootDir.getFileSystem(getConf());
1627
1628    // list all tables from HDFS
1629    List<FileStatus> tableDirs = Lists.newArrayList();
1630
1631    boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1632
1633    List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1634    for (Path path : paths) {
1635      TableName tableName = FSUtils.getTableName(path);
1636       if ((!checkMetaOnly &&
1637           isTableIncluded(tableName)) ||
1638           tableName.equals(TableName.META_TABLE_NAME)) {
1639         tableDirs.add(fs.getFileStatus(path));
1640       }
1641    }
1642
1643    // verify that version file exists
1644    if (!foundVersionFile) {
1645      errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1646          "Version file does not exist in root dir " + rootDir);
1647      if (shouldFixVersionFile()) {
1648        LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1649            + " file.");
1650        setShouldRerun();
1651        FSUtils.setVersion(fs, rootDir, getConf().getInt(
1652            HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1653            HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1654            HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1655      }
1656    }
1657
1658    // Avoid multithreading at table-level because already multithreaded internally at
1659    // region-level.  Additionally multithreading at table-level can lead to deadlock
1660    // if there are many tables in the cluster.  Since there are a limited # of threads
1661    // in the executor's thread pool and if we multithread at the table-level by putting
1662    // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1663    // executor tied up solely in waiting for the tables' region-level calls to complete.
1664    // If there are enough tables then there will be no actual threads in the pool left
1665    // for the region-level callables to be serviced.
1666    for (FileStatus tableDir : tableDirs) {
1667      LOG.debug("Loading region dirs from " +tableDir.getPath());
1668      WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1669      try {
1670        item.call();
1671      } catch (ExecutionException e) {
1672        LOG.warn("Could not completely load table dir " +
1673            tableDir.getPath(), e.getCause());
1674      }
1675    }
1676    errors.print("");
1677  }
1678
1679  /**
1680   * Record the location of the hbase:meta region as found in ZooKeeper.
1681   */
1682  private boolean recordMetaRegion() throws IOException {
1683    RegionLocations rl = connection.locateRegion(TableName.META_TABLE_NAME,
1684        HConstants.EMPTY_START_ROW, false, false);
1685    if (rl == null) {
1686      errors.reportError(ERROR_CODE.NULL_META_REGION,
1687          "META region was not found in ZooKeeper");
1688      return false;
1689    }
1690    for (HRegionLocation metaLocation : rl.getRegionLocations()) {
1691      // Check if Meta region is valid and existing
1692      if (metaLocation == null ) {
1693        errors.reportError(ERROR_CODE.NULL_META_REGION,
1694            "META region location is null");
1695        return false;
1696      }
1697      if (metaLocation.getRegionInfo() == null) {
1698        errors.reportError(ERROR_CODE.NULL_META_REGION,
1699            "META location regionInfo is null");
1700        return false;
1701      }
1702      if (metaLocation.getHostname() == null) {
1703        errors.reportError(ERROR_CODE.NULL_META_REGION,
1704            "META location hostName is null");
1705        return false;
1706      }
1707      ServerName sn = metaLocation.getServerName();
1708      HbckRegionInfo.MetaEntry m = new HbckRegionInfo.MetaEntry(metaLocation.getRegion(), sn,
1709          EnvironmentEdgeManager.currentTime());
1710      HbckRegionInfo hbckRegionInfo = regionInfoMap.get(metaLocation.getRegion().getEncodedName());
1711      if (hbckRegionInfo == null) {
1712        regionInfoMap.put(metaLocation.getRegion().getEncodedName(), new HbckRegionInfo(m));
1713      } else {
1714        hbckRegionInfo.setMetaEntry(m);
1715      }
1716    }
1717    return true;
1718  }
1719
1720  private ZKWatcher createZooKeeperWatcher() throws IOException {
1721    return new ZKWatcher(getConf(), "hbase Fsck", new Abortable() {
1722      @Override
1723      public void abort(String why, Throwable e) {
1724        LOG.error(why, e);
1725        System.exit(1);
1726      }
1727
1728      @Override
1729      public boolean isAborted() {
1730        return false;
1731      }
1732
1733    });
1734  }
1735
1736  private ServerName getMetaRegionServerName(int replicaId)
1737  throws IOException, KeeperException {
1738    return new MetaTableLocator().getMetaRegionLocation(zkw, replicaId);
1739  }
1740
1741  /**
1742   * Contacts each regionserver and fetches metadata about regions.
1743   * @param regionServerList - the list of region servers to connect to
1744   * @throws IOException if a remote or network exception occurs
1745   */
1746  void processRegionServers(Collection<ServerName> regionServerList)
1747    throws IOException, InterruptedException {
1748
1749    List<WorkItemRegion> workItems = new ArrayList<>(regionServerList.size());
1750    List<Future<Void>> workFutures;
1751
1752    // loop to contact each region server in parallel
1753    for (ServerName rsinfo: regionServerList) {
1754      workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1755    }
1756
1757    workFutures = executor.invokeAll(workItems);
1758
1759    for(int i=0; i<workFutures.size(); i++) {
1760      WorkItemRegion item = workItems.get(i);
1761      Future<Void> f = workFutures.get(i);
1762      try {
1763        f.get();
1764      } catch(ExecutionException e) {
1765        LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1766            e.getCause());
1767      }
1768    }
1769  }
1770
1771  /**
1772   * Check consistency of all regions that have been found in previous phases.
1773   */
1774  private void checkAndFixConsistency()
1775  throws IOException, KeeperException, InterruptedException {
1776    // Divide the checks in two phases. One for default/primary replicas and another
1777    // for the non-primary ones. Keeps code cleaner this way.
1778
1779    List<CheckRegionConsistencyWorkItem> workItems = new ArrayList<>(regionInfoMap.size());
1780    for (java.util.Map.Entry<String, HbckRegionInfo> e: regionInfoMap.entrySet()) {
1781      if (e.getValue().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1782        workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1783      }
1784    }
1785    checkRegionConsistencyConcurrently(workItems);
1786
1787    boolean prevHdfsCheck = shouldCheckHdfs();
1788    setCheckHdfs(false); //replicas don't have any hdfs data
1789    // Run a pass over the replicas and fix any assignment issues that exist on the currently
1790    // deployed/undeployed replicas.
1791    List<CheckRegionConsistencyWorkItem> replicaWorkItems = new ArrayList<>(regionInfoMap.size());
1792    for (java.util.Map.Entry<String, HbckRegionInfo> e: regionInfoMap.entrySet()) {
1793      if (e.getValue().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
1794        replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1795      }
1796    }
1797    checkRegionConsistencyConcurrently(replicaWorkItems);
1798    setCheckHdfs(prevHdfsCheck);
1799
1800    // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
1801    // not get accurate state of the hbase if continuing. The config here allows users to tune
1802    // the tolerance of number of skipped region.
1803    // TODO: evaluate the consequence to continue the hbck operation without config.
1804    int terminateThreshold =  getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
1805    int numOfSkippedRegions = skippedRegions.size();
1806    if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
1807      throw new IOException(numOfSkippedRegions
1808        + " region(s) could not be checked or repaired.  See logs for detail.");
1809    }
1810
1811    if (shouldCheckHdfs()) {
1812      checkAndFixTableStates();
1813    }
1814  }
1815
1816  /**
1817   * Check consistency of all regions using mulitple threads concurrently.
1818   */
1819  private void checkRegionConsistencyConcurrently(
1820    final List<CheckRegionConsistencyWorkItem> workItems)
1821    throws IOException, KeeperException, InterruptedException {
1822    if (workItems.isEmpty()) {
1823      return;  // nothing to check
1824    }
1825
1826    List<Future<Void>> workFutures = executor.invokeAll(workItems);
1827    for(Future<Void> f: workFutures) {
1828      try {
1829        f.get();
1830      } catch(ExecutionException e1) {
1831        LOG.warn("Could not check region consistency " , e1.getCause());
1832        if (e1.getCause() instanceof IOException) {
1833          throw (IOException)e1.getCause();
1834        } else if (e1.getCause() instanceof KeeperException) {
1835          throw (KeeperException)e1.getCause();
1836        } else if (e1.getCause() instanceof InterruptedException) {
1837          throw (InterruptedException)e1.getCause();
1838        } else {
1839          throw new IOException(e1.getCause());
1840        }
1841      }
1842    }
1843  }
1844
1845  class CheckRegionConsistencyWorkItem implements Callable<Void> {
1846    private final String key;
1847    private final HbckRegionInfo hbi;
1848
1849    CheckRegionConsistencyWorkItem(String key, HbckRegionInfo hbi) {
1850      this.key = key;
1851      this.hbi = hbi;
1852    }
1853
1854    @Override
1855    public synchronized Void call() throws Exception {
1856      try {
1857        checkRegionConsistency(key, hbi);
1858      } catch (Exception e) {
1859        // If the region is non-META region, skip this region and send warning/error message; if
1860        // the region is META region, we should not continue.
1861        LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()
1862          + "'.", e);
1863        if (hbi.getHdfsHRI().isMetaRegion()) {
1864          throw e;
1865        }
1866        LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
1867        addSkippedRegion(hbi);
1868      }
1869      return null;
1870    }
1871  }
1872
1873  private void addSkippedRegion(final HbckRegionInfo hbi) {
1874    Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
1875    if (skippedRegionNames == null) {
1876      skippedRegionNames = new HashSet<>();
1877    }
1878    skippedRegionNames.add(hbi.getRegionNameAsString());
1879    skippedRegions.put(hbi.getTableName(), skippedRegionNames);
1880  }
1881
1882  /**
1883   * Check and fix table states, assumes full info available:
1884   * - tableInfos
1885   * - empty tables loaded
1886   */
1887  private void checkAndFixTableStates() throws IOException {
1888    // first check dangling states
1889    for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
1890      TableName tableName = entry.getKey();
1891      TableState tableState = entry.getValue();
1892      HbckTableInfo tableInfo = tablesInfo.get(tableName);
1893      if (isTableIncluded(tableName)
1894          && !tableName.isSystemTable()
1895          && tableInfo == null) {
1896        if (fixMeta) {
1897          MetaTableAccessor.deleteTableState(connection, tableName);
1898          TableState state = MetaTableAccessor.getTableState(connection, tableName);
1899          if (state != null) {
1900            errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1901                tableName + " unable to delete dangling table state " + tableState);
1902          }
1903        } else if (!checkMetaOnly) {
1904          // dangling table state in meta if checkMetaOnly is false. If checkMetaOnly is
1905          // true, tableInfo will be null as tablesInfo are not polulated for all tables from hdfs
1906          errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1907              tableName + " has dangling table state " + tableState);
1908        }
1909      }
1910    }
1911    // check that all tables have states
1912    for (TableName tableName : tablesInfo.keySet()) {
1913      if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
1914        if (fixMeta) {
1915          MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
1916          TableState newState = MetaTableAccessor.getTableState(connection, tableName);
1917          if (newState == null) {
1918            errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1919                "Unable to change state for table " + tableName + " in meta ");
1920          }
1921        } else {
1922          errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1923              tableName + " has no state in meta ");
1924        }
1925      }
1926    }
1927  }
1928
1929  private void preCheckPermission() throws IOException, AccessDeniedException {
1930    if (shouldIgnorePreCheckPermission()) {
1931      return;
1932    }
1933
1934    Path hbaseDir = FSUtils.getRootDir(getConf());
1935    FileSystem fs = hbaseDir.getFileSystem(getConf());
1936    UserProvider userProvider = UserProvider.instantiate(getConf());
1937    UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1938    FileStatus[] files = fs.listStatus(hbaseDir);
1939    for (FileStatus file : files) {
1940      try {
1941        FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1942      } catch (AccessDeniedException ace) {
1943        LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1944        errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1945          + " does not have write perms to " + file.getPath()
1946          + ". Please rerun hbck as hdfs user " + file.getOwner());
1947        throw ace;
1948      }
1949    }
1950  }
1951
1952  /**
1953   * Deletes region from meta table
1954   */
1955  private void deleteMetaRegion(HbckRegionInfo hi) throws IOException {
1956    deleteMetaRegion(hi.getMetaEntry().getRegionName());
1957  }
1958
1959  /**
1960   * Deletes region from meta table
1961   */
1962  private void deleteMetaRegion(byte[] metaKey) throws IOException {
1963    Delete d = new Delete(metaKey);
1964    meta.delete(d);
1965    LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1966  }
1967
1968  /**
1969   * Reset the split parent region info in meta table
1970   */
1971  private void resetSplitParent(HbckRegionInfo hi) throws IOException {
1972    RowMutations mutations = new RowMutations(hi.getMetaEntry().getRegionName());
1973    Delete d = new Delete(hi.getMetaEntry().getRegionName());
1974    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1975    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1976    mutations.add(d);
1977
1978    RegionInfo hri = RegionInfoBuilder.newBuilder(hi.getMetaEntry())
1979        .setOffline(false)
1980        .setSplit(false)
1981        .build();
1982    Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime());
1983    mutations.add(p);
1984
1985    meta.mutateRow(mutations);
1986    LOG.info("Reset split parent " + hi.getMetaEntry().getRegionNameAsString() + " in META");
1987  }
1988
1989  /**
1990   * This backwards-compatibility wrapper for permanently offlining a region
1991   * that should not be alive.  If the region server does not support the
1992   * "offline" method, it will use the closest unassign method instead.  This
1993   * will basically work until one attempts to disable or delete the affected
1994   * table.  The problem has to do with in-memory only master state, so
1995   * restarting the HMaster or failing over to another should fix this.
1996   */
1997  void offline(byte[] regionName) throws IOException {
1998    String regionString = Bytes.toStringBinary(regionName);
1999    if (!rsSupportsOffline) {
2000      LOG.warn(
2001          "Using unassign region " + regionString + " instead of using offline method, you should" +
2002              " restart HMaster after these repairs");
2003      admin.unassign(regionName, true);
2004      return;
2005    }
2006
2007    // first time we assume the rs's supports #offline.
2008    try {
2009      LOG.info("Offlining region " + regionString);
2010      admin.offline(regionName);
2011    } catch (IOException ioe) {
2012      String notFoundMsg = "java.lang.NoSuchMethodException: " +
2013          "org.apache.hadoop.hbase.master.HMaster.offline([B)";
2014      if (ioe.getMessage().contains(notFoundMsg)) {
2015        LOG.warn("Using unassign region " + regionString +
2016            " instead of using offline method, you should" +
2017            " restart HMaster after these repairs");
2018        rsSupportsOffline = false; // in the future just use unassign
2019        admin.unassign(regionName, true);
2020        return;
2021      }
2022      throw ioe;
2023    }
2024  }
2025
2026  /**
2027   * Attempts to undeploy a region from a region server based in information in
2028   * META.  Any operations that modify the file system should make sure that
2029   * its corresponding region is not deployed to prevent data races.
2030   *
2031   * A separate call is required to update the master in-memory region state
2032   * kept in the AssignementManager.  Because disable uses this state instead of
2033   * that found in META, we can't seem to cleanly disable/delete tables that
2034   * have been hbck fixed.  When used on a version of HBase that does not have
2035   * the offline ipc call exposed on the master (&lt;0.90.5, &lt;0.92.0) a master
2036   * restart or failover may be required.
2037   */
2038  void closeRegion(HbckRegionInfo hi) throws IOException, InterruptedException {
2039    if (hi.getMetaEntry() == null && hi.getHdfsEntry() == null) {
2040      undeployRegions(hi);
2041      return;
2042    }
2043
2044    // get assignment info and hregioninfo from meta.
2045    Get get = new Get(hi.getRegionName());
2046    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2047    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2048    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2049    // also get the locations of the replicas to close if the primary region is being closed
2050    if (hi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2051      int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2052      for (int i = 0; i < numReplicas; i++) {
2053        get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
2054        get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
2055      }
2056    }
2057    Result r = meta.get(get);
2058    RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2059    if (rl == null) {
2060      LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2061          " since meta does not have handle to reach it");
2062      return;
2063    }
2064    for (HRegionLocation h : rl.getRegionLocations()) {
2065      ServerName serverName = h.getServerName();
2066      if (serverName == null) {
2067        errors.reportError("Unable to close region "
2068            + hi.getRegionNameAsString() +  " because meta does not "
2069            + "have handle to reach it.");
2070        continue;
2071      }
2072      RegionInfo hri = h.getRegionInfo();
2073      if (hri == null) {
2074        LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2075            + " because hbase:meta had invalid or missing "
2076            + HConstants.CATALOG_FAMILY_STR + ":"
2077            + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2078            + " qualifier value.");
2079        continue;
2080      }
2081      // close the region -- close files and remove assignment
2082      HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2083    }
2084  }
2085
2086  private void undeployRegions(HbckRegionInfo hi) throws IOException, InterruptedException {
2087    undeployRegionsForHbi(hi);
2088    // undeploy replicas of the region (but only if the method is invoked for the primary)
2089    if (hi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2090      return;
2091    }
2092    int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2093    for (int i = 1; i < numReplicas; i++) {
2094      if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2095      RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
2096          hi.getPrimaryHRIForDeployedReplica(), i);
2097      HbckRegionInfo h = regionInfoMap.get(hri.getEncodedName());
2098      if (h != null) {
2099        undeployRegionsForHbi(h);
2100        //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2101        //in consistency checks
2102        h.setSkipChecks(true);
2103      }
2104    }
2105  }
2106
2107  private void undeployRegionsForHbi(HbckRegionInfo hi) throws IOException, InterruptedException {
2108    for (HbckRegionInfo.OnlineEntry rse : hi.getOnlineEntries()) {
2109      LOG.debug("Undeploy region "  + rse.getRegionInfo() + " from " + rse.getServerName());
2110      try {
2111        HBaseFsckRepair
2112            .closeRegionSilentlyAndWait(connection, rse.getServerName(), rse.getRegionInfo());
2113        offline(rse.getRegionInfo().getRegionName());
2114      } catch (IOException ioe) {
2115        LOG.warn("Got exception when attempting to offline region "
2116            + Bytes.toString(rse.getRegionInfo().getRegionName()), ioe);
2117      }
2118    }
2119  }
2120
2121  private void tryAssignmentRepair(HbckRegionInfo hbi, String msg) throws IOException,
2122    KeeperException, InterruptedException {
2123    // If we are trying to fix the errors
2124    if (shouldFixAssignments()) {
2125      errors.print(msg);
2126      undeployRegions(hbi);
2127      setShouldRerun();
2128      RegionInfo hri = hbi.getHdfsHRI();
2129      if (hri == null) {
2130        hri = hbi.getMetaEntry();
2131      }
2132      HBaseFsckRepair.fixUnassigned(admin, hri);
2133      HBaseFsckRepair.waitUntilAssigned(admin, hri);
2134
2135      // also assign replicas if needed (do it only when this call operates on a primary replica)
2136      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) return;
2137      int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();
2138      for (int i = 1; i < replicationCount; i++) {
2139        hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2140        HbckRegionInfo h = regionInfoMap.get(hri.getEncodedName());
2141        if (h != null) {
2142          undeployRegions(h);
2143          //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2144          //in consistency checks
2145          h.setSkipChecks(true);
2146        }
2147        HBaseFsckRepair.fixUnassigned(admin, hri);
2148        HBaseFsckRepair.waitUntilAssigned(admin, hri);
2149      }
2150
2151    }
2152  }
2153
2154  /**
2155   * Check a single region for consistency and correct deployment.
2156   */
2157  private void checkRegionConsistency(final String key, final HbckRegionInfo hbi)
2158      throws IOException, KeeperException, InterruptedException {
2159
2160    if (hbi.isSkipChecks()) return;
2161    String descriptiveName = hbi.toString();
2162    boolean inMeta = hbi.getMetaEntry() != null;
2163    // In case not checking HDFS, assume the region is on HDFS
2164    boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2165    boolean hasMetaAssignment = inMeta && hbi.getMetaEntry().regionServer != null;
2166    boolean isDeployed = !hbi.getDeployedOn().isEmpty();
2167    boolean isMultiplyDeployed = hbi.getDeployedOn().size() > 1;
2168    boolean deploymentMatchesMeta =
2169      hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2170      hbi.getMetaEntry().regionServer.equals(hbi.getDeployedOn().get(0));
2171    boolean splitParent =
2172        inMeta && hbi.getMetaEntry().isSplit() && hbi.getMetaEntry().isOffline();
2173    boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.getMetaEntry().getTable());
2174    boolean recentlyModified = inHdfs &&
2175      hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2176
2177    // ========== First the healthy cases =============
2178    if (hbi.containsOnlyHdfsEdits()) {
2179      return;
2180    }
2181    if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2182      return;
2183    } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2184      LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2185        "tabled that is not deployed");
2186      return;
2187    } else if (recentlyModified) {
2188      LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2189      return;
2190    }
2191    // ========== Cases where the region is not in hbase:meta =============
2192    else if (!inMeta && !inHdfs && !isDeployed) {
2193      // We shouldn't have record of this region at all then!
2194      assert false : "Entry for region with no data";
2195    } else if (!inMeta && !inHdfs && isDeployed) {
2196      errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2197          + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2198          "deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2199      if (shouldFixAssignments()) {
2200        undeployRegions(hbi);
2201      }
2202
2203    } else if (!inMeta && inHdfs && !isDeployed) {
2204      if (hbi.isMerged()) {
2205        // This region has already been merged, the remaining hdfs file will be
2206        // cleaned by CatalogJanitor later
2207        hbi.setSkipChecks(true);
2208        LOG.info("Region " + descriptiveName
2209            + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2210        return;
2211      }
2212      errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2213          + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2214          "or deployed on any region server");
2215      // restore region consistency of an adopted orphan
2216      if (shouldFixMeta()) {
2217        if (!hbi.isHdfsRegioninfoPresent()) {
2218          LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2219              +  " in table integrity repair phase if -fixHdfsOrphans was" +
2220              " used.");
2221          return;
2222        }
2223
2224        RegionInfo hri = hbi.getHdfsHRI();
2225        HbckTableInfo tableInfo = tablesInfo.get(hri.getTable());
2226
2227        for (RegionInfo region : tableInfo.getRegionsFromMeta(this.regionInfoMap)) {
2228          if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2229              && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2230                hri.getEndKey()) >= 0)
2231              && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2232            if(region.isSplit() || region.isOffline()) continue;
2233            Path regionDir = hbi.getHdfsRegionDir();
2234            FileSystem fs = regionDir.getFileSystem(getConf());
2235            List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2236            for (Path familyDir : familyDirs) {
2237              List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2238              for (Path referenceFilePath : referenceFilePaths) {
2239                Path parentRegionDir =
2240                    StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2241                if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2242                  LOG.warn(hri + " start and stop keys are in the range of " + region
2243                      + ". The region might not be cleaned up from hdfs when region " + region
2244                      + " split failed. Hence deleting from hdfs.");
2245                  HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2246                    regionDir.getParent(), hri);
2247                  return;
2248                }
2249              }
2250            }
2251          }
2252        }
2253        LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2254        int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2255        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2256            admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
2257              .getLiveServerMetrics().keySet(), numReplicas);
2258
2259        tryAssignmentRepair(hbi, "Trying to reassign region...");
2260      }
2261
2262    } else if (!inMeta && inHdfs && isDeployed) {
2263      errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2264          + " not in META, but deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2265      debugLsr(hbi.getHdfsRegionDir());
2266      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2267        // for replicas, this means that we should undeploy the region (we would have
2268        // gone over the primaries and fixed meta holes in first phase under
2269        // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2270        // this stage unless unwanted replica)
2271        if (shouldFixAssignments()) {
2272          undeployRegionsForHbi(hbi);
2273        }
2274      }
2275      if (shouldFixMeta() && hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2276        if (!hbi.isHdfsRegioninfoPresent()) {
2277          LOG.error("This should have been repaired in table integrity repair phase");
2278          return;
2279        }
2280
2281        LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2282        int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2283        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2284            admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
2285              .getLiveServerMetrics().keySet(), numReplicas);
2286        tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2287      }
2288
2289    // ========== Cases where the region is in hbase:meta =============
2290    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2291      // check whether this is an actual error, or just transient state where parent
2292      // is not cleaned
2293      if (hbi.getMetaEntry().splitA != null && hbi.getMetaEntry().splitB != null) {
2294        // check that split daughters are there
2295        HbckRegionInfo infoA = this.regionInfoMap.get(hbi.getMetaEntry().splitA.getEncodedName());
2296        HbckRegionInfo infoB = this.regionInfoMap.get(hbi.getMetaEntry().splitB.getEncodedName());
2297        if (infoA != null && infoB != null) {
2298          // we already processed or will process daughters. Move on, nothing to see here.
2299          hbi.setSkipChecks(true);
2300          return;
2301        }
2302      }
2303
2304      // For Replica region, we need to do a similar check. If replica is not split successfully,
2305      // error is going to be reported against primary daughter region.
2306      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2307        LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, "
2308            + "and not deployed on any region server. This may be transient.");
2309        hbi.setSkipChecks(true);
2310        return;
2311      }
2312
2313      errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2314          + descriptiveName + " is a split parent in META, in HDFS, "
2315          + "and not deployed on any region server. This could be transient, "
2316          + "consider to run the catalog janitor first!");
2317      if (shouldFixSplitParents()) {
2318        setShouldRerun();
2319        resetSplitParent(hbi);
2320      }
2321    } else if (inMeta && !inHdfs && !isDeployed) {
2322      errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2323          + descriptiveName + " found in META, but not in HDFS "
2324          + "or deployed on any region server.");
2325      if (shouldFixMeta()) {
2326        deleteMetaRegion(hbi);
2327      }
2328    } else if (inMeta && !inHdfs && isDeployed) {
2329      errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2330          + " found in META, but not in HDFS, " +
2331          "and deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2332      // We treat HDFS as ground truth.  Any information in meta is transient
2333      // and equivalent data can be regenerated.  So, lets unassign and remove
2334      // these problems from META.
2335      if (shouldFixAssignments()) {
2336        errors.print("Trying to fix unassigned region...");
2337        undeployRegions(hbi);
2338      }
2339      if (shouldFixMeta()) {
2340        // wait for it to complete
2341        deleteMetaRegion(hbi);
2342      }
2343    } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2344      errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2345          + " not deployed on any region server.");
2346      tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2347    } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2348      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2349          "Region " + descriptiveName + " should not be deployed according " +
2350          "to META, but is deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2351      if (shouldFixAssignments()) {
2352        errors.print("Trying to close the region " + descriptiveName);
2353        setShouldRerun();
2354        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry(), hbi.getDeployedOn());
2355      }
2356    } else if (inMeta && inHdfs && isMultiplyDeployed) {
2357      errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2358          + " is listed in hbase:meta on region server " + hbi.getMetaEntry().regionServer
2359          + " but is multiply assigned to region servers " +
2360          Joiner.on(", ").join(hbi.getDeployedOn()));
2361      // If we are trying to fix the errors
2362      if (shouldFixAssignments()) {
2363        errors.print("Trying to fix assignment error...");
2364        setShouldRerun();
2365        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry(), hbi.getDeployedOn());
2366      }
2367    } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2368      errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2369          + descriptiveName + " listed in hbase:meta on region server " +
2370          hbi.getMetaEntry().regionServer + " but found on region server " +
2371          hbi.getDeployedOn().get(0));
2372      // If we are trying to fix the errors
2373      if (shouldFixAssignments()) {
2374        errors.print("Trying to fix assignment error...");
2375        setShouldRerun();
2376        HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry(), hbi.getDeployedOn());
2377        HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2378      }
2379    } else {
2380      errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2381          " is in an unforeseen state:" +
2382          " inMeta=" + inMeta +
2383          " inHdfs=" + inHdfs +
2384          " isDeployed=" + isDeployed +
2385          " isMultiplyDeployed=" + isMultiplyDeployed +
2386          " deploymentMatchesMeta=" + deploymentMatchesMeta +
2387          " shouldBeDeployed=" + shouldBeDeployed);
2388    }
2389  }
2390
2391  /**
2392   * Checks tables integrity. Goes over all regions and scans the tables.
2393   * Collects all the pieces for each table and checks if there are missing,
2394   * repeated or overlapping ones.
2395   * @throws IOException
2396   */
2397  SortedMap<TableName, HbckTableInfo> checkIntegrity() throws IOException {
2398    tablesInfo = new TreeMap<>();
2399    LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2400    for (HbckRegionInfo hbi : regionInfoMap.values()) {
2401      // Check only valid, working regions
2402      if (hbi.getMetaEntry() == null) {
2403        // this assumes that consistency check has run loadMetaEntry
2404        Path p = hbi.getHdfsRegionDir();
2405        if (p == null) {
2406          errors.report("No regioninfo in Meta or HDFS. " + hbi);
2407        }
2408
2409        // TODO test.
2410        continue;
2411      }
2412      if (hbi.getMetaEntry().regionServer == null) {
2413        errors.detail("Skipping region because no region server: " + hbi);
2414        continue;
2415      }
2416      if (hbi.getMetaEntry().isOffline()) {
2417        errors.detail("Skipping region because it is offline: " + hbi);
2418        continue;
2419      }
2420      if (hbi.containsOnlyHdfsEdits()) {
2421        errors.detail("Skipping region because it only contains edits" + hbi);
2422        continue;
2423      }
2424
2425      // Missing regionDir or over-deployment is checked elsewhere. Include
2426      // these cases in modTInfo, so we can evaluate those regions as part of
2427      // the region chain in META
2428      //if (hbi.foundRegionDir == null) continue;
2429      //if (hbi.deployedOn.size() != 1) continue;
2430      if (hbi.getDeployedOn().isEmpty()) {
2431        continue;
2432      }
2433
2434      // We should be safe here
2435      TableName tableName = hbi.getMetaEntry().getTable();
2436      HbckTableInfo modTInfo = tablesInfo.get(tableName);
2437      if (modTInfo == null) {
2438        modTInfo = new HbckTableInfo(tableName, this);
2439      }
2440      for (ServerName server : hbi.getDeployedOn()) {
2441        modTInfo.addServer(server);
2442      }
2443
2444      if (!hbi.isSkipChecks()) {
2445        modTInfo.addRegionInfo(hbi);
2446      }
2447
2448      tablesInfo.put(tableName, modTInfo);
2449    }
2450
2451    loadTableInfosForTablesWithNoRegion();
2452
2453    logParallelMerge();
2454    for (HbckTableInfo tInfo : tablesInfo.values()) {
2455      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2456      if (!tInfo.checkRegionChain(handler)) {
2457        errors.report("Found inconsistency in table " + tInfo.getName());
2458      }
2459    }
2460    return tablesInfo;
2461  }
2462
2463  /** Loads table info's for tables that may not have been included, since there are no
2464   * regions reported for the table, but table dir is there in hdfs
2465   */
2466  private void loadTableInfosForTablesWithNoRegion() throws IOException {
2467    Map<String, TableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2468    for (TableDescriptor htd : allTables.values()) {
2469      if (checkMetaOnly && !htd.isMetaTable()) {
2470        continue;
2471      }
2472
2473      TableName tableName = htd.getTableName();
2474      if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2475        HbckTableInfo tableInfo = new HbckTableInfo(tableName, this);
2476        tableInfo.htds.add(htd);
2477        tablesInfo.put(htd.getTableName(), tableInfo);
2478      }
2479    }
2480  }
2481
2482  /**
2483   * Merge hdfs data by moving from contained HbckRegionInfo into targetRegionDir.
2484   * @return number of file move fixes done to merge regions.
2485   */
2486  public int mergeRegionDirs(Path targetRegionDir, HbckRegionInfo contained) throws IOException {
2487    int fileMoves = 0;
2488    String thread = Thread.currentThread().getName();
2489    LOG.debug("[" + thread + "] Contained region dir after close and pause");
2490    debugLsr(contained.getHdfsRegionDir());
2491
2492    // rename the contained into the container.
2493    FileSystem fs = targetRegionDir.getFileSystem(getConf());
2494    FileStatus[] dirs = null;
2495    try {
2496      dirs = fs.listStatus(contained.getHdfsRegionDir());
2497    } catch (FileNotFoundException fnfe) {
2498      // region we are attempting to merge in is not present!  Since this is a merge, there is
2499      // no harm skipping this region if it does not exist.
2500      if (!fs.exists(contained.getHdfsRegionDir())) {
2501        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2502            + " is missing. Assuming already sidelined or moved.");
2503      } else {
2504        sidelineRegionDir(fs, contained);
2505      }
2506      return fileMoves;
2507    }
2508
2509    if (dirs == null) {
2510      if (!fs.exists(contained.getHdfsRegionDir())) {
2511        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2512            + " already sidelined.");
2513      } else {
2514        sidelineRegionDir(fs, contained);
2515      }
2516      return fileMoves;
2517    }
2518
2519    for (FileStatus cf : dirs) {
2520      Path src = cf.getPath();
2521      Path dst =  new Path(targetRegionDir, src.getName());
2522
2523      if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2524        // do not copy the old .regioninfo file.
2525        continue;
2526      }
2527
2528      if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2529        // do not copy the .oldlogs files
2530        continue;
2531      }
2532
2533      LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2534      // FileSystem.rename is inconsistent with directories -- if the
2535      // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2536      // it moves the src into the dst dir resulting in (foo/a/b).  If
2537      // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2538      for (FileStatus hfile : fs.listStatus(src)) {
2539        boolean success = fs.rename(hfile.getPath(), dst);
2540        if (success) {
2541          fileMoves++;
2542        }
2543      }
2544      LOG.debug("[" + thread + "] Sideline directory contents:");
2545      debugLsr(targetRegionDir);
2546    }
2547
2548    // if all success.
2549    sidelineRegionDir(fs, contained);
2550    LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2551        getSidelineDir());
2552    debugLsr(contained.getHdfsRegionDir());
2553
2554    return fileMoves;
2555  }
2556
2557
2558  static class WorkItemOverlapMerge implements Callable<Void> {
2559    private TableIntegrityErrorHandler handler;
2560    Collection<HbckRegionInfo> overlapgroup;
2561
2562    WorkItemOverlapMerge(Collection<HbckRegionInfo> overlapgroup,
2563        TableIntegrityErrorHandler handler) {
2564      this.handler = handler;
2565      this.overlapgroup = overlapgroup;
2566    }
2567
2568    @Override
2569    public Void call() throws Exception {
2570      handler.handleOverlapGroup(overlapgroup);
2571      return null;
2572    }
2573  };
2574
2575  /**
2576   * Return a list of user-space table names whose metadata have not been
2577   * modified in the last few milliseconds specified by timelag
2578   * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
2579   * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
2580   * milliseconds specified by timelag, then the table is a candidate to be returned.
2581   * @return tables that have not been modified recently
2582   * @throws IOException if an error is encountered
2583   */
2584  TableDescriptor[] getTables(AtomicInteger numSkipped) {
2585    List<TableName> tableNames = new ArrayList<>();
2586    long now = EnvironmentEdgeManager.currentTime();
2587
2588    for (HbckRegionInfo hbi : regionInfoMap.values()) {
2589      HbckRegionInfo.MetaEntry info = hbi.getMetaEntry();
2590
2591      // if the start key is zero, then we have found the first region of a table.
2592      // pick only those tables that were not modified in the last few milliseconds.
2593      if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2594        if (info.modTime + timelag < now) {
2595          tableNames.add(info.getTable());
2596        } else {
2597          numSkipped.incrementAndGet(); // one more in-flux table
2598        }
2599      }
2600    }
2601    return getTableDescriptors(tableNames);
2602  }
2603
2604  TableDescriptor[] getTableDescriptors(List<TableName> tableNames) {
2605      LOG.info("getTableDescriptors == tableNames => " + tableNames);
2606    try (Connection conn = ConnectionFactory.createConnection(getConf());
2607        Admin admin = conn.getAdmin()) {
2608      List<TableDescriptor> tds = admin.listTableDescriptors(tableNames);
2609      return tds.toArray(new TableDescriptor[tds.size()]);
2610    } catch (IOException e) {
2611      LOG.debug("Exception getting table descriptors", e);
2612    }
2613    return new TableDescriptor[0];
2614  }
2615
2616  /**
2617   * Gets the entry in regionInfo corresponding to the the given encoded
2618   * region name. If the region has not been seen yet, a new entry is added
2619   * and returned.
2620   */
2621  private synchronized HbckRegionInfo getOrCreateInfo(String name) {
2622    HbckRegionInfo hbi = regionInfoMap.get(name);
2623    if (hbi == null) {
2624      hbi = new HbckRegionInfo(null);
2625      regionInfoMap.put(name, hbi);
2626    }
2627    return hbi;
2628  }
2629
2630  private void checkAndFixReplication() throws IOException {
2631    ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, connection, errors);
2632    checker.checkUnDeletedQueues();
2633
2634    if (checker.hasUnDeletedQueues() && this.fixReplication) {
2635      checker.fixUnDeletedQueues();
2636      setShouldRerun();
2637    }
2638  }
2639
2640  /**
2641    * Check values in regionInfo for hbase:meta
2642    * Check if zero or more than one regions with hbase:meta are found.
2643    * If there are inconsistencies (i.e. zero or more than one regions
2644    * pretend to be holding the hbase:meta) try to fix that and report an error.
2645    * @throws IOException from HBaseFsckRepair functions
2646    * @throws KeeperException
2647    * @throws InterruptedException
2648    */
2649  boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
2650    Map<Integer, HbckRegionInfo> metaRegions = new HashMap<>();
2651    for (HbckRegionInfo value : regionInfoMap.values()) {
2652      if (value.getMetaEntry() != null && value.getMetaEntry().isMetaRegion()) {
2653        metaRegions.put(value.getReplicaId(), value);
2654      }
2655    }
2656    int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME)
2657        .getRegionReplication();
2658    boolean noProblem = true;
2659    // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
2660    // Check the deployed servers. It should be exactly one server for each replica.
2661    for (int i = 0; i < metaReplication; i++) {
2662      HbckRegionInfo metaHbckRegionInfo = metaRegions.remove(i);
2663      List<ServerName> servers = new ArrayList<>();
2664      if (metaHbckRegionInfo != null) {
2665        servers = metaHbckRegionInfo.getDeployedOn();
2666      }
2667      if (servers.size() != 1) {
2668        noProblem = false;
2669        if (servers.isEmpty()) {
2670          assignMetaReplica(i);
2671        } else if (servers.size() > 1) {
2672          errors
2673          .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
2674                       metaHbckRegionInfo.getReplicaId() + " is found on more than one region.");
2675          if (shouldFixAssignments()) {
2676            errors.print("Trying to fix a problem with hbase:meta, replicaId " +
2677                metaHbckRegionInfo.getReplicaId() + "..");
2678            setShouldRerun();
2679            // try fix it (treat is a dupe assignment)
2680            HBaseFsckRepair
2681                .fixMultiAssignment(connection, metaHbckRegionInfo.getMetaEntry(), servers);
2682          }
2683        }
2684      }
2685    }
2686    // unassign whatever is remaining in metaRegions. They are excess replicas.
2687    for (Map.Entry<Integer, HbckRegionInfo> entry : metaRegions.entrySet()) {
2688      noProblem = false;
2689      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2690          "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
2691          ", deployed " + metaRegions.size());
2692      if (shouldFixAssignments()) {
2693        errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
2694            " of hbase:meta..");
2695        setShouldRerun();
2696        unassignMetaReplica(entry.getValue());
2697      }
2698    }
2699    // if noProblem is false, rerun hbck with hopefully fixed META
2700    // if noProblem is true, no errors, so continue normally
2701    return noProblem;
2702  }
2703
2704  private void unassignMetaReplica(HbckRegionInfo hi)
2705      throws IOException, InterruptedException, KeeperException {
2706    undeployRegions(hi);
2707    ZKUtil
2708        .deleteNode(zkw, zkw.getZNodePaths().getZNodeForReplica(hi.getMetaEntry().getReplicaId()));
2709  }
2710
2711  private void assignMetaReplica(int replicaId)
2712      throws IOException, KeeperException, InterruptedException {
2713    errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
2714        replicaId +" is not found on any region.");
2715    if (shouldFixAssignments()) {
2716      errors.print("Trying to fix a problem with hbase:meta..");
2717      setShouldRerun();
2718      // try to fix it (treat it as unassigned region)
2719      RegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
2720          RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId);
2721      HBaseFsckRepair.fixUnassigned(admin, h);
2722      HBaseFsckRepair.waitUntilAssigned(admin, h);
2723    }
2724  }
2725
2726  /**
2727   * Scan hbase:meta, adding all regions found to the regionInfo map.
2728   * @throws IOException if an error is encountered
2729   */
2730  boolean loadMetaEntries() throws IOException {
2731    MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() {
2732      int countRecord = 1;
2733
2734      // comparator to sort KeyValues with latest modtime
2735      final Comparator<Cell> comp = new Comparator<Cell>() {
2736        @Override
2737        public int compare(Cell k1, Cell k2) {
2738          return Long.compare(k1.getTimestamp(), k2.getTimestamp());
2739        }
2740      };
2741
2742      @Override
2743      public boolean visit(Result result) throws IOException {
2744        try {
2745
2746          // record the latest modification of this META record
2747          long ts =  Collections.max(result.listCells(), comp).getTimestamp();
2748          RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
2749          if (rl == null) {
2750            emptyRegionInfoQualifiers.add(result);
2751            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2752              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
2753            return true;
2754          }
2755          ServerName sn = null;
2756          if (rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID) == null ||
2757              rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) {
2758            emptyRegionInfoQualifiers.add(result);
2759            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2760              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
2761            return true;
2762          }
2763          RegionInfo hri = rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegionInfo();
2764          if (!(isTableIncluded(hri.getTable())
2765              || hri.isMetaRegion())) {
2766            return true;
2767          }
2768          PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(result);
2769          for (HRegionLocation h : rl.getRegionLocations()) {
2770            if (h == null || h.getRegionInfo() == null) {
2771              continue;
2772            }
2773            sn = h.getServerName();
2774            hri = h.getRegionInfo();
2775
2776            HbckRegionInfo.MetaEntry m = null;
2777            if (hri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2778              m = new HbckRegionInfo.MetaEntry(hri, sn, ts, daughters.getFirst(),
2779                  daughters.getSecond());
2780            } else {
2781              m = new HbckRegionInfo.MetaEntry(hri, sn, ts, null, null);
2782            }
2783            HbckRegionInfo previous = regionInfoMap.get(hri.getEncodedName());
2784            if (previous == null) {
2785              regionInfoMap.put(hri.getEncodedName(), new HbckRegionInfo(m));
2786            } else if (previous.getMetaEntry() == null) {
2787              previous.setMetaEntry(m);
2788            } else {
2789              throw new IOException("Two entries in hbase:meta are same " + previous);
2790            }
2791          }
2792          List<RegionInfo> mergeParents = MetaTableAccessor.getMergeRegions(result.rawCells());
2793          if (mergeParents != null) {
2794            for (RegionInfo mergeRegion : mergeParents) {
2795              if (mergeRegion != null) {
2796                // This region is already being merged
2797                HbckRegionInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
2798                hbInfo.setMerged(true);
2799              }
2800            }
2801          }
2802
2803          // show proof of progress to the user, once for every 100 records.
2804          if (countRecord % 100 == 0) {
2805            errors.progress();
2806          }
2807          countRecord++;
2808          return true;
2809        } catch (RuntimeException e) {
2810          LOG.error("Result=" + result);
2811          throw e;
2812        }
2813      }
2814    };
2815    if (!checkMetaOnly) {
2816      // Scan hbase:meta to pick up user regions
2817      MetaTableAccessor.fullScanRegions(connection, visitor);
2818    }
2819
2820    errors.print("");
2821    return true;
2822  }
2823
2824  /**
2825   * Prints summary of all tables found on the system.
2826   */
2827  private void printTableSummary(SortedMap<TableName, HbckTableInfo> tablesInfo) {
2828    StringBuilder sb = new StringBuilder();
2829    int numOfSkippedRegions;
2830    errors.print("Summary:");
2831    for (HbckTableInfo tInfo : tablesInfo.values()) {
2832      numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
2833          skippedRegions.get(tInfo.getName()).size() : 0;
2834
2835      if (errors.tableHasErrors(tInfo)) {
2836        errors.print("Table " + tInfo.getName() + " is inconsistent.");
2837      } else if (numOfSkippedRegions > 0){
2838        errors.print("Table " + tInfo.getName() + " is okay (with "
2839          + numOfSkippedRegions + " skipped regions).");
2840      }
2841      else {
2842        errors.print("Table " + tInfo.getName() + " is okay.");
2843      }
2844      errors.print("    Number of regions: " + tInfo.getNumRegions());
2845      if (numOfSkippedRegions > 0) {
2846        Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
2847        System.out.println("    Number of skipped regions: " + numOfSkippedRegions);
2848        System.out.println("      List of skipped regions:");
2849        for(String sr : skippedRegionStrings) {
2850          System.out.println("        " + sr);
2851        }
2852      }
2853      sb.setLength(0); // clear out existing buffer, if any.
2854      sb.append("    Deployed on: ");
2855      for (ServerName server : tInfo.deployedOn) {
2856        sb.append(" " + server.toString());
2857      }
2858      errors.print(sb.toString());
2859    }
2860  }
2861
2862  static HbckErrorReporter getErrorReporter(final Configuration conf)
2863      throws ClassNotFoundException {
2864    Class<? extends HbckErrorReporter> reporter =
2865        conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class,
2866            HbckErrorReporter.class);
2867    return ReflectionUtils.newInstance(reporter, conf);
2868  }
2869
2870  static class PrintingErrorReporter implements HbckErrorReporter {
2871    public int errorCount = 0;
2872    private int showProgress;
2873    // How frequently calls to progress() will create output
2874    private static final int progressThreshold = 100;
2875
2876    Set<HbckTableInfo> errorTables = new HashSet<>();
2877
2878    // for use by unit tests to verify which errors were discovered
2879    private ArrayList<ERROR_CODE> errorList = new ArrayList<>();
2880
2881    @Override
2882    public void clear() {
2883      errorTables.clear();
2884      errorList.clear();
2885      errorCount = 0;
2886    }
2887
2888    @Override
2889    public synchronized void reportError(ERROR_CODE errorCode, String message) {
2890      if (errorCode == ERROR_CODE.WRONG_USAGE) {
2891        System.err.println(message);
2892        return;
2893      }
2894
2895      errorList.add(errorCode);
2896      if (!summary) {
2897        System.out.println("ERROR: " + message);
2898      }
2899      errorCount++;
2900      showProgress = 0;
2901    }
2902
2903    @Override
2904    public synchronized void reportError(ERROR_CODE errorCode, String message,
2905        HbckTableInfo table) {
2906      errorTables.add(table);
2907      reportError(errorCode, message);
2908    }
2909
2910    @Override
2911    public synchronized void reportError(ERROR_CODE errorCode, String message, HbckTableInfo table,
2912                                         HbckRegionInfo info) {
2913      errorTables.add(table);
2914      String reference = "(region " + info.getRegionNameAsString() + ")";
2915      reportError(errorCode, reference + " " + message);
2916    }
2917
2918    @Override
2919    public synchronized void reportError(ERROR_CODE errorCode, String message, HbckTableInfo table,
2920                                         HbckRegionInfo info1, HbckRegionInfo info2) {
2921      errorTables.add(table);
2922      String reference = "(regions " + info1.getRegionNameAsString()
2923          + " and " + info2.getRegionNameAsString() + ")";
2924      reportError(errorCode, reference + " " + message);
2925    }
2926
2927    @Override
2928    public synchronized void reportError(String message) {
2929      reportError(ERROR_CODE.UNKNOWN, message);
2930    }
2931
2932    /**
2933     * Report error information, but do not increment the error count.  Intended for cases
2934     * where the actual error would have been reported previously.
2935     * @param message
2936     */
2937    @Override
2938    public synchronized void report(String message) {
2939      if (! summary) {
2940        System.out.println("ERROR: " + message);
2941      }
2942      showProgress = 0;
2943    }
2944
2945    @Override
2946    public synchronized int summarize() {
2947      System.out.println(Integer.toString(errorCount) +
2948                         " inconsistencies detected.");
2949      if (errorCount == 0) {
2950        System.out.println("Status: OK");
2951        return 0;
2952      } else {
2953        System.out.println("Status: INCONSISTENT");
2954        return -1;
2955      }
2956    }
2957
2958    @Override
2959    public ArrayList<ERROR_CODE> getErrorList() {
2960      return errorList;
2961    }
2962
2963    @Override
2964    public synchronized void print(String message) {
2965      if (!summary) {
2966        System.out.println(message);
2967      }
2968    }
2969
2970    @Override
2971    public boolean tableHasErrors(HbckTableInfo table) {
2972      return errorTables.contains(table);
2973    }
2974
2975    @Override
2976    public void resetErrors() {
2977      errorCount = 0;
2978    }
2979
2980    @Override
2981    public synchronized void detail(String message) {
2982      if (details) {
2983        System.out.println(message);
2984      }
2985      showProgress = 0;
2986    }
2987
2988    @Override
2989    public synchronized void progress() {
2990      if (showProgress++ == progressThreshold) {
2991        if (!summary) {
2992          System.out.print(".");
2993        }
2994        showProgress = 0;
2995      }
2996    }
2997  }
2998
2999  /**
3000   * Contact a region server and get all information from it
3001   */
3002  static class WorkItemRegion implements Callable<Void> {
3003    private final HBaseFsck hbck;
3004    private final ServerName rsinfo;
3005    private final HbckErrorReporter errors;
3006    private final ClusterConnection connection;
3007
3008    WorkItemRegion(HBaseFsck hbck, ServerName info, HbckErrorReporter errors,
3009        ClusterConnection connection) {
3010      this.hbck = hbck;
3011      this.rsinfo = info;
3012      this.errors = errors;
3013      this.connection = connection;
3014    }
3015
3016    @Override
3017    public synchronized Void call() throws IOException {
3018      errors.progress();
3019      try {
3020        BlockingInterface server = connection.getAdmin(rsinfo);
3021
3022        // list all online regions from this region server
3023        List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
3024        regions = filterRegions(regions);
3025
3026        if (details) {
3027          errors.detail("RegionServer: " + rsinfo.getServerName() +
3028                           " number of regions: " + regions.size());
3029          for (RegionInfo rinfo: regions) {
3030            errors.detail("  " + rinfo.getRegionNameAsString() +
3031                             " id: " + rinfo.getRegionId() +
3032                             " encoded_name: " + rinfo.getEncodedName() +
3033                             " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3034                             " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3035          }
3036        }
3037
3038        // check to see if the existence of this region matches the region in META
3039
3040        for (RegionInfo r : regions) {
3041          HbckRegionInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3042          hbi.addServer(r, rsinfo);
3043        }
3044      } catch (IOException e) {          // unable to connect to the region server.
3045        errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3046          " Unable to fetch region information. " + e);
3047        throw e;
3048      }
3049      return null;
3050    }
3051
3052    private List<RegionInfo> filterRegions(List<RegionInfo> regions) {
3053      List<RegionInfo> ret = Lists.newArrayList();
3054      for (RegionInfo hri : regions) {
3055        if (hri.isMetaRegion() || (!hbck.checkMetaOnly
3056            && hbck.isTableIncluded(hri.getTable()))) {
3057          ret.add(hri);
3058        }
3059      }
3060      return ret;
3061    }
3062  }
3063
3064  /**
3065   * Contact hdfs and get all information about specified table directory into
3066   * regioninfo list.
3067   */
3068  class WorkItemHdfsDir implements Callable<Void> {
3069    private FileStatus tableDir;
3070    private HbckErrorReporter errors;
3071    private FileSystem fs;
3072
3073    WorkItemHdfsDir(FileSystem fs, HbckErrorReporter errors, FileStatus status) {
3074      this.fs = fs;
3075      this.tableDir = status;
3076      this.errors = errors;
3077    }
3078
3079    @Override
3080    public synchronized Void call() throws InterruptedException, ExecutionException {
3081      final Vector<Exception> exceptions = new Vector<>();
3082
3083      try {
3084        final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
3085        final List<Future<?>> futures = new ArrayList<>(regionDirs.length);
3086
3087        for (final FileStatus regionDir : regionDirs) {
3088          errors.progress();
3089          final String encodedName = regionDir.getPath().getName();
3090          // ignore directories that aren't hexadecimal
3091          if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
3092            continue;
3093          }
3094
3095          if (!exceptions.isEmpty()) {
3096            break;
3097          }
3098
3099          futures.add(executor.submit(new Runnable() {
3100            @Override
3101            public void run() {
3102              try {
3103                LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
3104
3105                Path regioninfoFile = new Path(regionDir.getPath(), HRegionFileSystem.REGION_INFO_FILE);
3106                boolean regioninfoFileExists = fs.exists(regioninfoFile);
3107
3108                if (!regioninfoFileExists) {
3109                  // As tables become larger it is more and more likely that by the time you
3110                  // reach a given region that it will be gone due to region splits/merges.
3111                  if (!fs.exists(regionDir.getPath())) {
3112                    LOG.warn("By the time we tried to process this region dir it was already gone: "
3113                        + regionDir.getPath());
3114                    return;
3115                  }
3116                }
3117
3118                HbckRegionInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
3119                HbckRegionInfo.HdfsEntry he = new HbckRegionInfo.HdfsEntry();
3120                synchronized (hbi) {
3121                  if (hbi.getHdfsRegionDir() != null) {
3122                    errors.print("Directory " + encodedName + " duplicate??" +
3123                                 hbi.getHdfsRegionDir());
3124                  }
3125
3126                  he.regionDir = regionDir.getPath();
3127                  he.regionDirModTime = regionDir.getModificationTime();
3128                  he.hdfsRegioninfoFilePresent = regioninfoFileExists;
3129                  // we add to orphan list when we attempt to read .regioninfo
3130
3131                  // Set a flag if this region contains only edits
3132                  // This is special case if a region is left after split
3133                  he.hdfsOnlyEdits = true;
3134                  FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
3135                  Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath());
3136                  for (FileStatus subDir : subDirs) {
3137                    errors.progress();
3138                    String sdName = subDir.getPath().getName();
3139                    if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
3140                      he.hdfsOnlyEdits = false;
3141                      break;
3142                    }
3143                  }
3144                  hbi.setHdfsEntry(he);
3145                }
3146              } catch (Exception e) {
3147                LOG.error("Could not load region dir", e);
3148                exceptions.add(e);
3149              }
3150            }
3151          }));
3152        }
3153
3154        // Ensure all pending tasks are complete (or that we run into an exception)
3155        for (Future<?> f : futures) {
3156          if (!exceptions.isEmpty()) {
3157            break;
3158          }
3159          try {
3160            f.get();
3161          } catch (ExecutionException e) {
3162            LOG.error("Unexpected exec exception!  Should've been caught already.  (Bug?)", e);
3163            // Shouldn't happen, we already logged/caught any exceptions in the Runnable
3164          };
3165        }
3166      } catch (IOException e) {
3167        LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
3168        exceptions.add(e);
3169      } finally {
3170        if (!exceptions.isEmpty()) {
3171          errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
3172              + tableDir.getPath().getName()
3173              + " Unable to fetch all HDFS region information. ");
3174          // Just throw the first exception as an indication something bad happened
3175          // Don't need to propagate all the exceptions, we already logged them all anyway
3176          throw new ExecutionException("First exception in WorkItemHdfsDir", exceptions.firstElement());
3177        }
3178      }
3179      return null;
3180    }
3181  }
3182
3183  /**
3184   * Contact hdfs and get all information about specified table directory into
3185   * regioninfo list.
3186   */
3187  static class WorkItemHdfsRegionInfo implements Callable<Void> {
3188    private HbckRegionInfo hbi;
3189    private HBaseFsck hbck;
3190    private HbckErrorReporter errors;
3191
3192    WorkItemHdfsRegionInfo(HbckRegionInfo hbi, HBaseFsck hbck, HbckErrorReporter errors) {
3193      this.hbi = hbi;
3194      this.hbck = hbck;
3195      this.errors = errors;
3196    }
3197
3198    @Override
3199    public synchronized Void call() throws IOException {
3200      // only load entries that haven't been loaded yet.
3201      if (hbi.getHdfsHRI() == null) {
3202        try {
3203          errors.progress();
3204          hbi.loadHdfsRegioninfo(hbck.getConf());
3205        } catch (IOException ioe) {
3206          String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3207              + hbi.getTableName() + " in hdfs dir "
3208              + hbi.getHdfsRegionDir()
3209              + "!  It may be an invalid format or version file.  Treating as "
3210              + "an orphaned regiondir.";
3211          errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3212          try {
3213            hbck.debugLsr(hbi.getHdfsRegionDir());
3214          } catch (IOException ioe2) {
3215            LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3216            throw ioe2;
3217          }
3218          hbck.orphanHdfsDirs.add(hbi);
3219          throw ioe;
3220        }
3221      }
3222      return null;
3223    }
3224  };
3225
3226  /**
3227   * Display the full report from fsck. This displays all live and dead region
3228   * servers, and all known regions.
3229   */
3230  public static void setDisplayFullReport() {
3231    details = true;
3232  }
3233
3234  public static boolean shouldDisplayFullReport() {
3235    return details;
3236  }
3237
3238  /**
3239   * Set exclusive mode.
3240   */
3241  public static void setForceExclusive() {
3242    forceExclusive = true;
3243  }
3244
3245  /**
3246   * Only one instance of hbck can modify HBase at a time.
3247   */
3248  public boolean isExclusive() {
3249    return fixAny || forceExclusive;
3250  }
3251
3252  /**
3253   * Set summary mode.
3254   * Print only summary of the tables and status (OK or INCONSISTENT)
3255   */
3256  static void setSummary() {
3257    summary = true;
3258  }
3259
3260  /**
3261   * Set hbase:meta check mode.
3262   * Print only info about hbase:meta table deployment/state
3263   */
3264  void setCheckMetaOnly() {
3265    checkMetaOnly = true;
3266  }
3267
3268  /**
3269   * Set region boundaries check mode.
3270   */
3271  void setRegionBoundariesCheck() {
3272    checkRegionBoundaries = true;
3273  }
3274
3275  /**
3276   * Set replication fix mode.
3277   */
3278  public void setFixReplication(boolean shouldFix) {
3279    fixReplication = shouldFix;
3280    fixAny |= shouldFix;
3281  }
3282
3283  /**
3284   * Check if we should rerun fsck again. This checks if we've tried to
3285   * fix something and we should rerun fsck tool again.
3286   * Display the full report from fsck. This displays all live and dead
3287   * region servers, and all known regions.
3288   */
3289  void setShouldRerun() {
3290    rerun = true;
3291  }
3292
3293  boolean shouldRerun() {
3294    return rerun;
3295  }
3296
3297  /**
3298   * Fix inconsistencies found by fsck. This should try to fix errors (if any)
3299   * found by fsck utility.
3300   */
3301  public void setFixAssignments(boolean shouldFix) {
3302    fixAssignments = shouldFix;
3303    fixAny |= shouldFix;
3304  }
3305
3306  boolean shouldFixAssignments() {
3307    return fixAssignments;
3308  }
3309
3310  public void setFixMeta(boolean shouldFix) {
3311    fixMeta = shouldFix;
3312    fixAny |= shouldFix;
3313  }
3314
3315  boolean shouldFixMeta() {
3316    return fixMeta;
3317  }
3318
3319  public void setFixEmptyMetaCells(boolean shouldFix) {
3320    fixEmptyMetaCells = shouldFix;
3321    fixAny |= shouldFix;
3322  }
3323
3324  boolean shouldFixEmptyMetaCells() {
3325    return fixEmptyMetaCells;
3326  }
3327
3328  public void setCheckHdfs(boolean checking) {
3329    checkHdfs = checking;
3330  }
3331
3332  boolean shouldCheckHdfs() {
3333    return checkHdfs;
3334  }
3335
3336  public void setFixHdfsHoles(boolean shouldFix) {
3337    fixHdfsHoles = shouldFix;
3338    fixAny |= shouldFix;
3339  }
3340
3341  boolean shouldFixHdfsHoles() {
3342    return fixHdfsHoles;
3343  }
3344
3345  public void setFixTableOrphans(boolean shouldFix) {
3346    fixTableOrphans = shouldFix;
3347    fixAny |= shouldFix;
3348  }
3349
3350  boolean shouldFixTableOrphans() {
3351    return fixTableOrphans;
3352  }
3353
3354  public void setFixHdfsOverlaps(boolean shouldFix) {
3355    fixHdfsOverlaps = shouldFix;
3356    fixAny |= shouldFix;
3357  }
3358
3359  boolean shouldFixHdfsOverlaps() {
3360    return fixHdfsOverlaps;
3361  }
3362
3363  public void setFixHdfsOrphans(boolean shouldFix) {
3364    fixHdfsOrphans = shouldFix;
3365    fixAny |= shouldFix;
3366  }
3367
3368  boolean shouldFixHdfsOrphans() {
3369    return fixHdfsOrphans;
3370  }
3371
3372  public void setFixVersionFile(boolean shouldFix) {
3373    fixVersionFile = shouldFix;
3374    fixAny |= shouldFix;
3375  }
3376
3377  public boolean shouldFixVersionFile() {
3378    return fixVersionFile;
3379  }
3380
3381  public void setSidelineBigOverlaps(boolean sbo) {
3382    this.sidelineBigOverlaps = sbo;
3383  }
3384
3385  public boolean shouldSidelineBigOverlaps() {
3386    return sidelineBigOverlaps;
3387  }
3388
3389  public void setFixSplitParents(boolean shouldFix) {
3390    fixSplitParents = shouldFix;
3391    fixAny |= shouldFix;
3392  }
3393
3394  public void setRemoveParents(boolean shouldFix) {
3395    removeParents = shouldFix;
3396    fixAny |= shouldFix;
3397  }
3398
3399  boolean shouldFixSplitParents() {
3400    return fixSplitParents;
3401  }
3402
3403  boolean shouldRemoveParents() {
3404    return removeParents;
3405  }
3406
3407  public void setFixReferenceFiles(boolean shouldFix) {
3408    fixReferenceFiles = shouldFix;
3409    fixAny |= shouldFix;
3410  }
3411
3412  boolean shouldFixReferenceFiles() {
3413    return fixReferenceFiles;
3414  }
3415
3416  public void setFixHFileLinks(boolean shouldFix) {
3417    fixHFileLinks = shouldFix;
3418    fixAny |= shouldFix;
3419  }
3420
3421  boolean shouldFixHFileLinks() {
3422    return fixHFileLinks;
3423  }
3424
3425  public boolean shouldIgnorePreCheckPermission() {
3426    return !fixAny || ignorePreCheckPermission;
3427  }
3428
3429  public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3430    this.ignorePreCheckPermission = ignorePreCheckPermission;
3431  }
3432
3433  /**
3434   * @param mm maximum number of regions to merge into a single region.
3435   */
3436  public void setMaxMerge(int mm) {
3437    this.maxMerge = mm;
3438  }
3439
3440  public int getMaxMerge() {
3441    return maxMerge;
3442  }
3443
3444  public void setMaxOverlapsToSideline(int mo) {
3445    this.maxOverlapsToSideline = mo;
3446  }
3447
3448  public int getMaxOverlapsToSideline() {
3449    return maxOverlapsToSideline;
3450  }
3451
3452  /**
3453   * Only check/fix tables specified by the list,
3454   * Empty list means all tables are included.
3455   */
3456  boolean isTableIncluded(TableName table) {
3457    return (tablesIncluded.isEmpty()) || tablesIncluded.contains(table);
3458  }
3459
3460  public void includeTable(TableName table) {
3461    tablesIncluded.add(table);
3462  }
3463
3464  Set<TableName> getIncludedTables() {
3465    return new HashSet<>(tablesIncluded);
3466  }
3467
3468  /**
3469   * We are interested in only those tables that have not changed their state in
3470   * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
3471   * @param seconds - the time in seconds
3472   */
3473  public void setTimeLag(long seconds) {
3474    timelag = seconds * 1000; // convert to milliseconds
3475  }
3476
3477  /**
3478   *
3479   * @param sidelineDir - HDFS path to sideline data
3480   */
3481  public void setSidelineDir(String sidelineDir) {
3482    this.sidelineDir = new Path(sidelineDir);
3483  }
3484
3485  protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
3486    return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
3487  }
3488
3489  public HFileCorruptionChecker getHFilecorruptionChecker() {
3490    return hfcc;
3491  }
3492
3493  public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
3494    this.hfcc = hfcc;
3495  }
3496
3497  public void setRetCode(int code) {
3498    this.retcode = code;
3499  }
3500
3501  public int getRetCode() {
3502    return retcode;
3503  }
3504
3505  protected HBaseFsck printUsageAndExit() {
3506    StringWriter sw = new StringWriter(2048);
3507    PrintWriter out = new PrintWriter(sw);
3508    out.println("");
3509    out.println("-----------------------------------------------------------------------");
3510    out.println("NOTE: As of HBase version 2.0, the hbck tool is significantly changed.");
3511    out.println("In general, all Read-Only options are supported and can be be used");
3512    out.println("safely. Most -fix/ -repair options are NOT supported. Please see usage");
3513    out.println("below for details on which options are not supported.");
3514    out.println("-----------------------------------------------------------------------");
3515    out.println("");
3516    out.println("Usage: fsck [opts] {only tables}");
3517    out.println(" where [opts] are:");
3518    out.println("   -help Display help options (this)");
3519    out.println("   -details Display full report of all regions.");
3520    out.println("   -timelag <timeInSeconds>  Process only regions that " +
3521                       " have not experienced any metadata updates in the last " +
3522                       " <timeInSeconds> seconds.");
3523    out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
3524        " before checking if the fix worked if run with -fix");
3525    out.println("   -summary Print only summary of the tables and status.");
3526    out.println("   -metaonly Only check the state of the hbase:meta table.");
3527    out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
3528    out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
3529    out.println("   -exclusive Abort if another hbck is exclusive or fixing.");
3530
3531    out.println("");
3532    out.println("  Datafile Repair options: (expert features, use with caution!)");
3533    out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
3534    out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
3535
3536    out.println("");
3537    out.println(" Replication options");
3538    out.println("   -fixReplication   Deletes replication queues for removed peers");
3539
3540    out.println("");
3541    out.println("  Metadata Repair options supported as of version 2.0: (expert features, use with caution!)");
3542    out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
3543    out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
3544    out.println("   -fixHFileLinks  Try to offline lingering HFileLinks");
3545    out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
3546        + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
3547    out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
3548
3549    out.println("");
3550    out.println("NOTE: Following options are NOT supported as of HBase version 2.0+.");
3551    out.println("");
3552    out.println("  UNSUPPORTED Metadata Repair options: (expert features, use with caution!)");
3553    out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
3554    out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
3555    out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
3556    out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
3557    out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
3558    out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
3559    out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
3560    out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
3561    out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
3562    out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
3563    out.println("   -fixSplitParents  Try to force offline split parents to be online.");
3564    out.println("   -removeParents    Try to offline and sideline lingering parents and keep daughter regions.");
3565    out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
3566        + " (empty REGIONINFO_QUALIFIER rows)");
3567
3568    out.println("");
3569    out.println("  UNSUPPORTED Metadata Repair shortcuts");
3570    out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
3571        "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles" +
3572        "-fixHFileLinks");
3573    out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
3574
3575    out.flush();
3576    errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
3577
3578    setRetCode(-2);
3579    return this;
3580  }
3581
3582  /**
3583   * Main program
3584   *
3585   * @param args
3586   * @throws Exception
3587   */
3588  public static void main(String[] args) throws Exception {
3589    // create a fsck object
3590    Configuration conf = HBaseConfiguration.create();
3591    Path hbasedir = FSUtils.getRootDir(conf);
3592    URI defaultFs = hbasedir.getFileSystem(conf).getUri();
3593    FSUtils.setFsDefault(conf, new Path(defaultFs));
3594    int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
3595    System.exit(ret);
3596  }
3597
3598  /**
3599   * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
3600   */
3601  static class HBaseFsckTool extends Configured implements Tool {
3602    HBaseFsckTool(Configuration conf) { super(conf); }
3603    @Override
3604    public int run(String[] args) throws Exception {
3605      HBaseFsck hbck = new HBaseFsck(getConf());
3606      hbck.exec(hbck.executor, args);
3607      hbck.close();
3608      return hbck.getRetCode();
3609    }
3610  };
3611
3612
3613  public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
3614      InterruptedException {
3615    long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
3616
3617    boolean checkCorruptHFiles = false;
3618    boolean sidelineCorruptHFiles = false;
3619
3620    // Process command-line args.
3621    for (int i = 0; i < args.length; i++) {
3622      String cmd = args[i];
3623      if (cmd.equals("-help") || cmd.equals("-h")) {
3624        return printUsageAndExit();
3625      } else if (cmd.equals("-details")) {
3626        setDisplayFullReport();
3627      } else if (cmd.equals("-exclusive")) {
3628        setForceExclusive();
3629      } else if (cmd.equals("-timelag")) {
3630        if (i == args.length - 1) {
3631          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
3632          return printUsageAndExit();
3633        }
3634        try {
3635          long timelag = Long.parseLong(args[i+1]);
3636          setTimeLag(timelag);
3637        } catch (NumberFormatException e) {
3638          errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
3639          return printUsageAndExit();
3640        }
3641        i++;
3642      } else if (cmd.equals("-sleepBeforeRerun")) {
3643        if (i == args.length - 1) {
3644          errors.reportError(ERROR_CODE.WRONG_USAGE,
3645            "HBaseFsck: -sleepBeforeRerun needs a value.");
3646          return printUsageAndExit();
3647        }
3648        try {
3649          sleepBeforeRerun = Long.parseLong(args[i+1]);
3650        } catch (NumberFormatException e) {
3651          errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
3652          return printUsageAndExit();
3653        }
3654        i++;
3655      } else if (cmd.equals("-sidelineDir")) {
3656        if (i == args.length - 1) {
3657          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
3658          return printUsageAndExit();
3659        }
3660        i++;
3661        setSidelineDir(args[i]);
3662      } else if (cmd.equals("-fix")) {
3663        errors.reportError(ERROR_CODE.WRONG_USAGE,
3664          "This option is deprecated, please use  -fixAssignments instead.");
3665        setFixAssignments(true);
3666      } else if (cmd.equals("-fixAssignments")) {
3667        setFixAssignments(true);
3668      } else if (cmd.equals("-fixMeta")) {
3669        setFixMeta(true);
3670      } else if (cmd.equals("-noHdfsChecking")) {
3671        setCheckHdfs(false);
3672      } else if (cmd.equals("-fixHdfsHoles")) {
3673        setFixHdfsHoles(true);
3674      } else if (cmd.equals("-fixHdfsOrphans")) {
3675        setFixHdfsOrphans(true);
3676      } else if (cmd.equals("-fixTableOrphans")) {
3677        setFixTableOrphans(true);
3678      } else if (cmd.equals("-fixHdfsOverlaps")) {
3679        setFixHdfsOverlaps(true);
3680      } else if (cmd.equals("-fixVersionFile")) {
3681        setFixVersionFile(true);
3682      } else if (cmd.equals("-sidelineBigOverlaps")) {
3683        setSidelineBigOverlaps(true);
3684      } else if (cmd.equals("-fixSplitParents")) {
3685        setFixSplitParents(true);
3686      } else if (cmd.equals("-removeParents")) {
3687        setRemoveParents(true);
3688      } else if (cmd.equals("-ignorePreCheckPermission")) {
3689        setIgnorePreCheckPermission(true);
3690      } else if (cmd.equals("-checkCorruptHFiles")) {
3691        checkCorruptHFiles = true;
3692      } else if (cmd.equals("-sidelineCorruptHFiles")) {
3693        sidelineCorruptHFiles = true;
3694      } else if (cmd.equals("-fixReferenceFiles")) {
3695        setFixReferenceFiles(true);
3696      } else if (cmd.equals("-fixHFileLinks")) {
3697        setFixHFileLinks(true);
3698      } else if (cmd.equals("-fixEmptyMetaCells")) {
3699        setFixEmptyMetaCells(true);
3700      } else if (cmd.equals("-repair")) {
3701        // this attempts to merge overlapping hdfs regions, needs testing
3702        // under load
3703        setFixHdfsHoles(true);
3704        setFixHdfsOrphans(true);
3705        setFixMeta(true);
3706        setFixAssignments(true);
3707        setFixHdfsOverlaps(true);
3708        setFixVersionFile(true);
3709        setSidelineBigOverlaps(true);
3710        setFixSplitParents(false);
3711        setCheckHdfs(true);
3712        setFixReferenceFiles(true);
3713        setFixHFileLinks(true);
3714      } else if (cmd.equals("-repairHoles")) {
3715        // this will make all missing hdfs regions available but may lose data
3716        setFixHdfsHoles(true);
3717        setFixHdfsOrphans(false);
3718        setFixMeta(true);
3719        setFixAssignments(true);
3720        setFixHdfsOverlaps(false);
3721        setSidelineBigOverlaps(false);
3722        setFixSplitParents(false);
3723        setCheckHdfs(true);
3724      } else if (cmd.equals("-maxOverlapsToSideline")) {
3725        if (i == args.length - 1) {
3726          errors.reportError(ERROR_CODE.WRONG_USAGE,
3727            "-maxOverlapsToSideline needs a numeric value argument.");
3728          return printUsageAndExit();
3729        }
3730        try {
3731          int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
3732          setMaxOverlapsToSideline(maxOverlapsToSideline);
3733        } catch (NumberFormatException e) {
3734          errors.reportError(ERROR_CODE.WRONG_USAGE,
3735            "-maxOverlapsToSideline needs a numeric value argument.");
3736          return printUsageAndExit();
3737        }
3738        i++;
3739      } else if (cmd.equals("-maxMerge")) {
3740        if (i == args.length - 1) {
3741          errors.reportError(ERROR_CODE.WRONG_USAGE,
3742            "-maxMerge needs a numeric value argument.");
3743          return printUsageAndExit();
3744        }
3745        try {
3746          int maxMerge = Integer.parseInt(args[i+1]);
3747          setMaxMerge(maxMerge);
3748        } catch (NumberFormatException e) {
3749          errors.reportError(ERROR_CODE.WRONG_USAGE,
3750            "-maxMerge needs a numeric value argument.");
3751          return printUsageAndExit();
3752        }
3753        i++;
3754      } else if (cmd.equals("-summary")) {
3755        setSummary();
3756      } else if (cmd.equals("-metaonly")) {
3757        setCheckMetaOnly();
3758      } else if (cmd.equals("-boundaries")) {
3759        setRegionBoundariesCheck();
3760      } else if (cmd.equals("-fixReplication")) {
3761        setFixReplication(true);
3762      } else if (cmd.startsWith("-")) {
3763        errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
3764        return printUsageAndExit();
3765      } else {
3766        includeTable(TableName.valueOf(cmd));
3767        errors.print("Allow checking/fixes for table: " + cmd);
3768      }
3769    }
3770
3771    errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
3772
3773    // pre-check current user has FS write permission or not
3774    try {
3775      preCheckPermission();
3776    } catch (AccessDeniedException ace) {
3777      Runtime.getRuntime().exit(-1);
3778    } catch (IOException ioe) {
3779      Runtime.getRuntime().exit(-1);
3780    }
3781
3782    // do the real work of hbck
3783    connect();
3784
3785    // after connecting to server above, we have server version
3786    // check if unsupported option is specified based on server version
3787    if (!isOptionsSupported(args)) {
3788      return printUsageAndExit();
3789    }
3790
3791    try {
3792      // if corrupt file mode is on, first fix them since they may be opened later
3793      if (checkCorruptHFiles || sidelineCorruptHFiles) {
3794        LOG.info("Checking all hfiles for corruption");
3795        HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
3796        setHFileCorruptionChecker(hfcc); // so we can get result
3797        Collection<TableName> tables = getIncludedTables();
3798        Collection<Path> tableDirs = new ArrayList<>();
3799        Path rootdir = FSUtils.getRootDir(getConf());
3800        if (tables.size() > 0) {
3801          for (TableName t : tables) {
3802            tableDirs.add(FSUtils.getTableDir(rootdir, t));
3803          }
3804        } else {
3805          tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
3806        }
3807        hfcc.checkTables(tableDirs);
3808        hfcc.report(errors);
3809      }
3810
3811      // check and fix table integrity, region consistency.
3812      int code = onlineHbck();
3813      setRetCode(code);
3814      // If we have changed the HBase state it is better to run hbck again
3815      // to see if we haven't broken something else in the process.
3816      // We run it only once more because otherwise we can easily fall into
3817      // an infinite loop.
3818      if (shouldRerun()) {
3819        try {
3820          LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
3821          Thread.sleep(sleepBeforeRerun);
3822        } catch (InterruptedException ie) {
3823          LOG.warn("Interrupted while sleeping");
3824          return this;
3825        }
3826        // Just report
3827        setFixAssignments(false);
3828        setFixMeta(false);
3829        setFixHdfsHoles(false);
3830        setFixHdfsOverlaps(false);
3831        setFixVersionFile(false);
3832        setFixTableOrphans(false);
3833        errors.resetErrors();
3834        code = onlineHbck();
3835        setRetCode(code);
3836      }
3837    } finally {
3838      IOUtils.closeQuietly(this);
3839    }
3840    return this;
3841  }
3842
3843  private boolean isOptionsSupported(String[] args) {
3844    boolean result = true;
3845    String hbaseServerVersion = status.getHBaseVersion();
3846    Object[] versionComponents = VersionInfo.getVersionComponents(hbaseServerVersion);
3847    if (versionComponents[0] instanceof Integer && ((Integer)versionComponents[0]) >= 2) {
3848      // Process command-line args.
3849      for (String arg : args) {
3850        if (unsupportedOptionsInV2.contains(arg)) {
3851          errors.reportError(ERROR_CODE.UNSUPPORTED_OPTION,
3852              "option '" + arg + "' is not " + "supportted!");
3853          result = false;
3854          break;
3855        }
3856      }
3857    }
3858    return result;
3859  }
3860
3861  /**
3862   * ls -r for debugging purposes
3863   */
3864  void debugLsr(Path p) throws IOException {
3865    debugLsr(getConf(), p, errors);
3866  }
3867
3868  /**
3869   * ls -r for debugging purposes
3870   */
3871  public static void debugLsr(Configuration conf,
3872      Path p) throws IOException {
3873    debugLsr(conf, p, new PrintingErrorReporter());
3874  }
3875
3876  /**
3877   * ls -r for debugging purposes
3878   */
3879  public static void debugLsr(Configuration conf,
3880      Path p, HbckErrorReporter errors) throws IOException {
3881    if (!LOG.isDebugEnabled() || p == null) {
3882      return;
3883    }
3884    FileSystem fs = p.getFileSystem(conf);
3885
3886    if (!fs.exists(p)) {
3887      // nothing
3888      return;
3889    }
3890    errors.print(p.toString());
3891
3892    if (fs.isFile(p)) {
3893      return;
3894    }
3895
3896    if (fs.getFileStatus(p).isDirectory()) {
3897      FileStatus[] fss= fs.listStatus(p);
3898      for (FileStatus status : fss) {
3899        debugLsr(conf, status.getPath(), errors);
3900      }
3901    }
3902  }
3903}