001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.io.Closeable;
021import java.io.FileNotFoundException;
022import java.io.IOException;
023import java.io.InterruptedIOException;
024import java.io.PrintWriter;
025import java.io.StringWriter;
026import java.net.InetAddress;
027import java.net.URI;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Collection;
031import java.util.Collections;
032import java.util.Comparator;
033import java.util.EnumSet;
034import java.util.HashMap;
035import java.util.HashSet;
036import java.util.Iterator;
037import java.util.List;
038import java.util.Locale;
039import java.util.Map;
040import java.util.Map.Entry;
041import java.util.Objects;
042import java.util.Optional;
043import java.util.Set;
044import java.util.SortedMap;
045import java.util.SortedSet;
046import java.util.TreeMap;
047import java.util.TreeSet;
048import java.util.Vector;
049import java.util.concurrent.Callable;
050import java.util.concurrent.ConcurrentSkipListMap;
051import java.util.concurrent.ExecutionException;
052import java.util.concurrent.ExecutorService;
053import java.util.concurrent.Executors;
054import java.util.concurrent.Future;
055import java.util.concurrent.FutureTask;
056import java.util.concurrent.ScheduledThreadPoolExecutor;
057import java.util.concurrent.TimeUnit;
058import java.util.concurrent.TimeoutException;
059import java.util.concurrent.atomic.AtomicBoolean;
060import java.util.concurrent.atomic.AtomicInteger;
061import java.util.stream.Collectors;
062import org.apache.commons.io.IOUtils;
063import org.apache.commons.lang3.StringUtils;
064import org.apache.hadoop.conf.Configuration;
065import org.apache.hadoop.conf.Configured;
066import org.apache.hadoop.fs.FSDataOutputStream;
067import org.apache.hadoop.fs.FileStatus;
068import org.apache.hadoop.fs.FileSystem;
069import org.apache.hadoop.fs.Path;
070import org.apache.hadoop.fs.permission.FsAction;
071import org.apache.hadoop.fs.permission.FsPermission;
072import org.apache.hadoop.hbase.Abortable;
073import org.apache.hadoop.hbase.Cell;
074import org.apache.hadoop.hbase.CellUtil;
075import org.apache.hadoop.hbase.ClusterMetrics;
076import org.apache.hadoop.hbase.ClusterMetrics.Option;
077import org.apache.hadoop.hbase.HBaseConfiguration;
078import org.apache.hadoop.hbase.HBaseInterfaceAudience;
079import org.apache.hadoop.hbase.HConstants;
080import org.apache.hadoop.hbase.HRegionInfo;
081import org.apache.hadoop.hbase.HRegionLocation;
082import org.apache.hadoop.hbase.KeyValue;
083import org.apache.hadoop.hbase.MasterNotRunningException;
084import org.apache.hadoop.hbase.MetaTableAccessor;
085import org.apache.hadoop.hbase.RegionLocations;
086import org.apache.hadoop.hbase.ServerName;
087import org.apache.hadoop.hbase.TableName;
088import org.apache.hadoop.hbase.TableNotFoundException;
089import org.apache.hadoop.hbase.ZooKeeperConnectionException;
090import org.apache.hadoop.hbase.client.Admin;
091import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
092import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
093import org.apache.hadoop.hbase.client.Connection;
094import org.apache.hadoop.hbase.client.ConnectionFactory;
095import org.apache.hadoop.hbase.client.Delete;
096import org.apache.hadoop.hbase.client.Get;
097import org.apache.hadoop.hbase.client.Put;
098import org.apache.hadoop.hbase.client.RegionInfo;
099import org.apache.hadoop.hbase.client.RegionInfoBuilder;
100import org.apache.hadoop.hbase.client.RegionLocator;
101import org.apache.hadoop.hbase.client.RegionReplicaUtil;
102import org.apache.hadoop.hbase.client.Result;
103import org.apache.hadoop.hbase.client.ResultScanner;
104import org.apache.hadoop.hbase.client.RowMutations;
105import org.apache.hadoop.hbase.client.Scan;
106import org.apache.hadoop.hbase.client.Table;
107import org.apache.hadoop.hbase.client.TableDescriptor;
108import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
109import org.apache.hadoop.hbase.client.TableState;
110import org.apache.hadoop.hbase.io.FileLink;
111import org.apache.hadoop.hbase.io.HFileLink;
112import org.apache.hadoop.hbase.io.hfile.CacheConfig;
113import org.apache.hadoop.hbase.io.hfile.HFile;
114import org.apache.hadoop.hbase.master.RegionState;
115import org.apache.hadoop.hbase.regionserver.HRegion;
116import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
117import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
118import org.apache.hadoop.hbase.replication.ReplicationException;
119import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
120import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
121import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
122import org.apache.hadoop.hbase.replication.ReplicationUtils;
123import org.apache.hadoop.hbase.security.AccessDeniedException;
124import org.apache.hadoop.hbase.security.UserProvider;
125import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool;
126import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
127import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
128import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
129import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
130import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
131import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
132import org.apache.hadoop.hbase.wal.WALSplitUtil;
133import org.apache.hadoop.hbase.zookeeper.ZKUtil;
134import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
135import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
136import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
137import org.apache.hadoop.ipc.RemoteException;
138import org.apache.hadoop.security.UserGroupInformation;
139import org.apache.hadoop.util.ReflectionUtils;
140import org.apache.hadoop.util.Tool;
141import org.apache.hadoop.util.ToolRunner;
142import org.apache.yetus.audience.InterfaceAudience;
143import org.apache.yetus.audience.InterfaceStability;
144import org.apache.zookeeper.KeeperException;
145import org.slf4j.Logger;
146import org.slf4j.LoggerFactory;
147
148import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
149import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
150import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
151import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList;
152import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
153import org.apache.hbase.thirdparty.com.google.common.collect.Multimap;
154import org.apache.hbase.thirdparty.com.google.common.collect.Ordering;
155import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
156import org.apache.hbase.thirdparty.com.google.common.collect.TreeMultimap;
157
158/**
159 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
160 * table integrity problems in a corrupted HBase. This tool was written for hbase-1.x. It does not
161 * work with hbase-2.x; it can read state but is not allowed to change state; i.e. effect 'repair'.
162 * See hbck2 (HBASE-19121) for a hbck tool for hbase2.
163 *
164 * <p>
165 * Region consistency checks verify that hbase:meta, region deployment on region
166 * servers and the state of data in HDFS (.regioninfo files) all are in
167 * accordance.
168 * <p>
169 * Table integrity checks verify that all possible row keys resolve to exactly
170 * one region of a table.  This means there are no individual degenerate
171 * or backwards regions; no holes between regions; and that there are no
172 * overlapping regions.
173 * <p>
174 * The general repair strategy works in two phases:
175 * <ol>
176 * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
177 * <li> Repair Region Consistency with hbase:meta and assignments
178 * </ol>
179 * <p>
180 * For table integrity repairs, the tables' region directories are scanned
181 * for .regioninfo files.  Each table's integrity is then verified.  If there
182 * are any orphan regions (regions with no .regioninfo files) or holes, new
183 * regions are fabricated.  Backwards regions are sidelined as well as empty
184 * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
185 * a new region is created and all data is merged into the new region.
186 * <p>
187 * Table integrity repairs deal solely with HDFS and could potentially be done
188 * offline -- the hbase region servers or master do not need to be running.
189 * This phase can eventually be used to completely reconstruct the hbase:meta table in
190 * an offline fashion.
191 * <p>
192 * Region consistency requires three conditions -- 1) valid .regioninfo file
193 * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
194 * and 3) a region is deployed only at the regionserver that was assigned to
195 * with proper state in the master.
196 * <p>
197 * Region consistency repairs require hbase to be online so that hbck can
198 * contact the HBase master and region servers.  The hbck#connect() method must
199 * first be called successfully.  Much of the region consistency information
200 * is transient and less risky to repair.
201 * <p>
202 * If hbck is run from the command line, there are a handful of arguments that
203 * can be used to limit the kinds of repairs hbck will do.  See the code in
204 * {@link #printUsageAndExit()} for more details.
205 */
206@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
207@InterfaceStability.Evolving
208public class HBaseFsck extends Configured implements Closeable {
209  public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
210  public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
211  private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
212  private static boolean rsSupportsOffline = true;
213  private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
214  private static final int DEFAULT_MAX_MERGE = 5;
215  private static final String TO_BE_LOADED = "to_be_loaded";
216  /**
217   * Here is where hbase-1.x used to default the lock for hbck1.
218   * It puts in place a lock when it goes to write/make changes.
219   */
220  @VisibleForTesting
221  public static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
222  private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
223  private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
224  private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
225  // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
226  // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
227  // AlreadyBeingCreatedException which is implies timeout on this operations up to
228  // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
229  private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
230  private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
231  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
232  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
233
234  /**********************
235   * Internal resources
236   **********************/
237  private static final Logger LOG = LoggerFactory.getLogger(HBaseFsck.class.getName());
238  private ClusterMetrics status;
239  private Connection connection;
240  private Admin admin;
241  private Table meta;
242  // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
243  protected ExecutorService executor;
244  private long startMillis = EnvironmentEdgeManager.currentTime();
245  private HFileCorruptionChecker hfcc;
246  private int retcode = 0;
247  private Path HBCK_LOCK_PATH;
248  private FSDataOutputStream hbckOutFd;
249  // This lock is to prevent cleanup of balancer resources twice between
250  // ShutdownHook and the main code. We cleanup only if the connect() is
251  // successful
252  private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
253
254  // Unsupported options in HBase 2.0+
255  private static final Set<String> unsupportedOptionsInV2 = Sets.newHashSet("-fix",
256      "-fixAssignments", "-fixMeta", "-fixHdfsHoles", "-fixHdfsOrphans", "-fixTableOrphans",
257      "-fixHdfsOverlaps", "-sidelineBigOverlaps", "-fixSplitParents", "-removeParents",
258      "-fixEmptyMetaCells", "-repair", "-repairHoles", "-maxOverlapsToSideline", "-maxMerge");
259
260  /***********
261   * Options
262   ***********/
263  private static boolean details = false; // do we display the full report
264  private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
265  private static boolean forceExclusive = false; // only this hbck can modify HBase
266  private boolean fixAssignments = false; // fix assignment errors?
267  private boolean fixMeta = false; // fix meta errors?
268  private boolean checkHdfs = true; // load and check fs consistency?
269  private boolean fixHdfsHoles = false; // fix fs holes?
270  private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
271  private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
272  private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
273  private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
274  private boolean fixSplitParents = false; // fix lingering split parents
275  private boolean removeParents = false; // remove split parents
276  private boolean fixReferenceFiles = false; // fix lingering reference store file
277  private boolean fixHFileLinks = false; // fix lingering HFileLinks
278  private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
279  private boolean fixReplication = false; // fix undeleted replication queues for removed peer
280  private boolean cleanReplicationBarrier = false; // clean replication barriers of a table
281  private boolean fixAny = false; // Set to true if any of the fix is required.
282
283  // limit checking/fixes to listed tables, if empty attempt to check/fix all
284  // hbase:meta are always checked
285  private Set<TableName> tablesIncluded = new HashSet<>();
286  private TableName cleanReplicationBarrierTable;
287  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
288  // maximum number of overlapping regions to sideline
289  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
290  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
291  private Path sidelineDir = null;
292
293  private boolean rerun = false; // if we tried to fix something, rerun hbck
294  private static boolean summary = false; // if we want to print less output
295  private boolean checkMetaOnly = false;
296  private boolean checkRegionBoundaries = false;
297  private boolean ignorePreCheckPermission = false; // if pre-check permission
298
299  /*********
300   * State
301   *********/
302  final private ErrorReporter errors;
303  int fixes = 0;
304
305  /**
306   * This map contains the state of all hbck items.  It maps from encoded region
307   * name to HbckInfo structure.  The information contained in HbckInfo is used
308   * to detect and correct consistency (hdfs/meta/deployment) problems.
309   */
310  private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<>();
311  // Empty regioninfo qualifiers in hbase:meta
312  private Set<Result> emptyRegionInfoQualifiers = new HashSet<>();
313
314  /**
315   * This map from Tablename -> TableInfo contains the structures necessary to
316   * detect table consistency problems (holes, dupes, overlaps).  It is sorted
317   * to prevent dupes.
318   *
319   * If tablesIncluded is empty, this map contains all tables.
320   * Otherwise, it contains only meta tables and tables in tablesIncluded,
321   * unless checkMetaOnly is specified, in which case, it contains only
322   * the meta table
323   */
324  private SortedMap<TableName, TableInfo> tablesInfo = new ConcurrentSkipListMap<>();
325
326  /**
327   * When initially looking at HDFS, we attempt to find any orphaned data.
328   */
329  private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
330
331  private Map<TableName, Set<String>> orphanTableDirs = new HashMap<>();
332  private Map<TableName, TableState> tableStates = new HashMap<>();
333  private final RetryCounterFactory lockFileRetryCounterFactory;
334  private final RetryCounterFactory createZNodeRetryCounterFactory;
335
336  private Map<TableName, Set<String>> skippedRegions = new HashMap<>();
337
338  private ZKWatcher zkw = null;
339  private String hbckEphemeralNodePath = null;
340  private boolean hbckZodeCreated = false;
341
342  /**
343   * Constructor
344   *
345   * @param conf Configuration object
346   * @throws MasterNotRunningException if the master is not running
347   * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
348   */
349  public HBaseFsck(Configuration conf) throws IOException, ClassNotFoundException {
350    this(conf, createThreadPool(conf));
351  }
352
353  private static ExecutorService createThreadPool(Configuration conf) {
354    int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
355    return new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
356  }
357
358  /**
359   * Constructor
360   *
361   * @param conf
362   *          Configuration object
363   * @throws MasterNotRunningException
364   *           if the master is not running
365   * @throws ZooKeeperConnectionException
366   *           if unable to connect to ZooKeeper
367   */
368  public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
369      ZooKeeperConnectionException, IOException, ClassNotFoundException {
370    super(conf);
371    errors = getErrorReporter(getConf());
372    this.executor = exec;
373    lockFileRetryCounterFactory = createLockRetryCounterFactory(getConf());
374    createZNodeRetryCounterFactory = createZnodeRetryCounterFactory(getConf());
375    zkw = createZooKeeperWatcher();
376  }
377
378  /**
379   * @return A retry counter factory configured for retrying lock file creation.
380   */
381  public static RetryCounterFactory createLockRetryCounterFactory(Configuration conf) {
382    return new RetryCounterFactory(
383        conf.getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
384        conf.getInt("hbase.hbck.lockfile.attempt.sleep.interval",
385            DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
386        conf.getInt("hbase.hbck.lockfile.attempt.maxsleeptime",
387            DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
388  }
389
390  /**
391   * @return A retry counter factory configured for retrying znode creation.
392   */
393  private static RetryCounterFactory createZnodeRetryCounterFactory(Configuration conf) {
394    return new RetryCounterFactory(
395        conf.getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
396        conf.getInt("hbase.hbck.createznode.attempt.sleep.interval",
397            DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
398        conf.getInt("hbase.hbck.createznode.attempt.maxsleeptime",
399            DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
400  }
401
402  /**
403   * @return Return the tmp dir this tool writes too.
404   */
405  @VisibleForTesting
406  public static Path getTmpDir(Configuration conf) throws IOException {
407    return new Path(FSUtils.getRootDir(conf), HConstants.HBASE_TEMP_DIRECTORY);
408  }
409
410  private static class FileLockCallable implements Callable<FSDataOutputStream> {
411    RetryCounter retryCounter;
412    private final Configuration conf;
413    private Path hbckLockPath = null;
414
415    public FileLockCallable(Configuration conf, RetryCounter retryCounter) {
416      this.retryCounter = retryCounter;
417      this.conf = conf;
418    }
419
420    /**
421     * @return Will be <code>null</code> unless you call {@link #call()}
422     */
423    Path getHbckLockPath() {
424      return this.hbckLockPath;
425    }
426
427    @Override
428    public FSDataOutputStream call() throws IOException {
429      try {
430        FileSystem fs = FSUtils.getCurrentFileSystem(this.conf);
431        FsPermission defaultPerms = FSUtils.getFilePermissions(fs, this.conf,
432            HConstants.DATA_FILE_UMASK_KEY);
433        Path tmpDir = getTmpDir(conf);
434        this.hbckLockPath = new Path(tmpDir, HBCK_LOCK_FILE);
435        fs.mkdirs(tmpDir);
436        final FSDataOutputStream out = createFileWithRetries(fs, this.hbckLockPath, defaultPerms);
437        out.writeBytes(InetAddress.getLocalHost().toString());
438        // Add a note into the file we write on why hbase2 is writing out an hbck1 lock file.
439        out.writeBytes(" Written by an hbase-2.x Master to block an " +
440            "attempt by an hbase-1.x HBCK tool making modification to state. " +
441            "See 'HBCK must match HBase server version' in the hbase refguide.");
442        out.flush();
443        return out;
444      } catch(RemoteException e) {
445        if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
446          return null;
447        } else {
448          throw e;
449        }
450      }
451    }
452
453    private FSDataOutputStream createFileWithRetries(final FileSystem fs,
454        final Path hbckLockFilePath, final FsPermission defaultPerms)
455        throws IOException {
456      IOException exception = null;
457      do {
458        try {
459          return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
460        } catch (IOException ioe) {
461          LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
462              + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
463              + retryCounter.getMaxAttempts());
464          LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(),
465              ioe);
466          try {
467            exception = ioe;
468            retryCounter.sleepUntilNextRetry();
469          } catch (InterruptedException ie) {
470            throw (InterruptedIOException) new InterruptedIOException(
471                "Can't create lock file " + hbckLockFilePath.getName())
472            .initCause(ie);
473          }
474        }
475      } while (retryCounter.shouldRetry());
476
477      throw exception;
478    }
479  }
480
481  /**
482   * This method maintains a lock using a file. If the creation fails we return null
483   *
484   * @return FSDataOutputStream object corresponding to the newly opened lock file
485   * @throws IOException if IO failure occurs
486   */
487  public static Pair<Path, FSDataOutputStream> checkAndMarkRunningHbck(Configuration conf,
488      RetryCounter retryCounter) throws IOException {
489    FileLockCallable callable = new FileLockCallable(conf, retryCounter);
490    ExecutorService executor = Executors.newFixedThreadPool(1);
491    FutureTask<FSDataOutputStream> futureTask = new FutureTask<>(callable);
492    executor.execute(futureTask);
493    final int timeoutInSeconds = conf.getInt(
494      "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
495    FSDataOutputStream stream = null;
496    try {
497      stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
498    } catch (ExecutionException ee) {
499      LOG.warn("Encountered exception when opening lock file", ee);
500    } catch (InterruptedException ie) {
501      LOG.warn("Interrupted when opening lock file", ie);
502      Thread.currentThread().interrupt();
503    } catch (TimeoutException exception) {
504      // took too long to obtain lock
505      LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
506      futureTask.cancel(true);
507    } finally {
508      executor.shutdownNow();
509    }
510    return new Pair<Path, FSDataOutputStream>(callable.getHbckLockPath(), stream);
511  }
512
513  private void unlockHbck() {
514    if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) {
515      RetryCounter retryCounter = lockFileRetryCounterFactory.create();
516      do {
517        try {
518          IOUtils.closeQuietly(hbckOutFd);
519          FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
520          LOG.info("Finishing hbck");
521          return;
522        } catch (IOException ioe) {
523          LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
524              + (retryCounter.getAttemptTimes() + 1) + " of "
525              + retryCounter.getMaxAttempts());
526          LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
527          try {
528            retryCounter.sleepUntilNextRetry();
529          } catch (InterruptedException ie) {
530            Thread.currentThread().interrupt();
531            LOG.warn("Interrupted while deleting lock file" +
532                HBCK_LOCK_PATH);
533            return;
534          }
535        }
536      } while (retryCounter.shouldRetry());
537    }
538  }
539
540  /**
541   * To repair region consistency, one must call connect() in order to repair
542   * online state.
543   */
544  public void connect() throws IOException {
545
546    if (isExclusive()) {
547      // Grab the lock
548      Pair<Path, FSDataOutputStream> pair =
549          checkAndMarkRunningHbck(getConf(), this.lockFileRetryCounterFactory.create());
550      HBCK_LOCK_PATH = pair.getFirst();
551      this.hbckOutFd = pair.getSecond();
552      if (hbckOutFd == null) {
553        setRetCode(-1);
554        LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " +
555            "[If you are sure no other instance is running, delete the lock file " +
556            HBCK_LOCK_PATH + " and rerun the tool]");
557        throw new IOException("Duplicate hbck - Abort");
558      }
559
560      // Make sure to cleanup the lock
561      hbckLockCleanup.set(true);
562    }
563
564
565    // Add a shutdown hook to this thread, in case user tries to
566    // kill the hbck with a ctrl-c, we want to cleanup the lock so that
567    // it is available for further calls
568    Runtime.getRuntime().addShutdownHook(new Thread() {
569      @Override
570      public void run() {
571        IOUtils.closeQuietly(HBaseFsck.this);
572        cleanupHbckZnode();
573        unlockHbck();
574      }
575    });
576
577    LOG.info("Launching hbck");
578
579    connection = ConnectionFactory.createConnection(getConf());
580    admin = connection.getAdmin();
581    meta = connection.getTable(TableName.META_TABLE_NAME);
582    status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS,
583      Option.DEAD_SERVERS, Option.MASTER, Option.BACKUP_MASTERS,
584      Option.REGIONS_IN_TRANSITION, Option.HBASE_VERSION));
585  }
586
587  /**
588   * Get deployed regions according to the region servers.
589   */
590  private void loadDeployedRegions() throws IOException, InterruptedException {
591    // From the master, get a list of all known live region servers
592    Collection<ServerName> regionServers = status.getLiveServerMetrics().keySet();
593    errors.print("Number of live region servers: " + regionServers.size());
594    if (details) {
595      for (ServerName rsinfo: regionServers) {
596        errors.print("  " + rsinfo.getServerName());
597      }
598    }
599
600    // From the master, get a list of all dead region servers
601    Collection<ServerName> deadRegionServers = status.getDeadServerNames();
602    errors.print("Number of dead region servers: " + deadRegionServers.size());
603    if (details) {
604      for (ServerName name: deadRegionServers) {
605        errors.print("  " + name);
606      }
607    }
608
609    // Print the current master name and state
610    errors.print("Master: " + status.getMasterName());
611
612    // Print the list of all backup masters
613    Collection<ServerName> backupMasters = status.getBackupMasterNames();
614    errors.print("Number of backup masters: " + backupMasters.size());
615    if (details) {
616      for (ServerName name: backupMasters) {
617        errors.print("  " + name);
618      }
619    }
620
621    errors.print("Average load: " + status.getAverageLoad());
622    errors.print("Number of requests: " + status.getRequestCount());
623    errors.print("Number of regions: " + status.getRegionCount());
624
625    List<RegionState> rits = status.getRegionStatesInTransition();
626    errors.print("Number of regions in transition: " + rits.size());
627    if (details) {
628      for (RegionState state: rits) {
629        errors.print("  " + state.toDescriptiveString());
630      }
631    }
632
633    // Determine what's deployed
634    processRegionServers(regionServers);
635  }
636
637  /**
638   * Clear the current state of hbck.
639   */
640  private void clearState() {
641    // Make sure regionInfo is empty before starting
642    fixes = 0;
643    regionInfoMap.clear();
644    emptyRegionInfoQualifiers.clear();
645    tableStates.clear();
646    errors.clear();
647    tablesInfo.clear();
648    orphanHdfsDirs.clear();
649    skippedRegions.clear();
650  }
651
652  /**
653   * This repair method analyzes hbase data in hdfs and repairs it to satisfy
654   * the table integrity rules.  HBase doesn't need to be online for this
655   * operation to work.
656   */
657  public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
658    // Initial pass to fix orphans.
659    if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
660        || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
661      LOG.info("Loading regioninfos HDFS");
662      // if nothing is happening this should always complete in two iterations.
663      int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
664      int curIter = 0;
665      do {
666        clearState(); // clears hbck state and reset fixes to 0 and.
667        // repair what's on HDFS
668        restoreHdfsIntegrity();
669        curIter++;// limit the number of iterations.
670      } while (fixes > 0 && curIter <= maxIterations);
671
672      // Repairs should be done in the first iteration and verification in the second.
673      // If there are more than 2 passes, something funny has happened.
674      if (curIter > 2) {
675        if (curIter == maxIterations) {
676          LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
677              + "Tables integrity may not be fully repaired!");
678        } else {
679          LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
680        }
681      }
682    }
683  }
684
685  /**
686   * This repair method requires the cluster to be online since it contacts
687   * region servers and the masters.  It makes each region's state in HDFS, in
688   * hbase:meta, and deployments consistent.
689   *
690   * @return If &gt; 0 , number of errors detected, if &lt; 0 there was an unrecoverable
691   *     error.  If 0, we have a clean hbase.
692   */
693  public int onlineConsistencyRepair() throws IOException, KeeperException,
694    InterruptedException {
695
696    // get regions according to what is online on each RegionServer
697    loadDeployedRegions();
698    // check whether hbase:meta is deployed and online
699    recordMetaRegion();
700    // Check if hbase:meta is found only once and in the right place
701    if (!checkMetaRegion()) {
702      String errorMsg = "hbase:meta table is not consistent. ";
703      if (shouldFixAssignments()) {
704        errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
705      } else {
706        errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
707      }
708      errors.reportError(errorMsg + " Exiting...");
709      return -2;
710    }
711    // Not going with further consistency check for tables when hbase:meta itself is not consistent.
712    LOG.info("Loading regionsinfo from the hbase:meta table");
713    boolean success = loadMetaEntries();
714    if (!success) return -1;
715
716    // Empty cells in hbase:meta?
717    reportEmptyMetaCells();
718
719    // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
720    if (shouldFixEmptyMetaCells()) {
721      fixEmptyMetaCells();
722    }
723
724    // get a list of all tables that have not changed recently.
725    if (!checkMetaOnly) {
726      reportTablesInFlux();
727    }
728
729    // Get disabled tables states
730    loadTableStates();
731
732    // load regiondirs and regioninfos from HDFS
733    if (shouldCheckHdfs()) {
734      LOG.info("Loading region directories from HDFS");
735      loadHdfsRegionDirs();
736      LOG.info("Loading region information from HDFS");
737      loadHdfsRegionInfos();
738    }
739
740    // fix the orphan tables
741    fixOrphanTables();
742
743    LOG.info("Checking and fixing region consistency");
744    // Check and fix consistency
745    checkAndFixConsistency();
746
747    // Check integrity (does not fix)
748    checkIntegrity();
749    return errors.getErrorList().size();
750  }
751
752  /**
753   * This method maintains an ephemeral znode. If the creation fails we return false or throw
754   * exception
755   *
756   * @return true if creating znode succeeds; false otherwise
757   * @throws IOException if IO failure occurs
758   */
759  private boolean setMasterInMaintenanceMode() throws IOException {
760    RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
761    hbckEphemeralNodePath = ZNodePaths.joinZNode(
762      zkw.getZNodePaths().masterMaintZNode,
763      "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
764    do {
765      try {
766        hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
767        if (hbckZodeCreated) {
768          break;
769        }
770      } catch (KeeperException e) {
771        if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
772           throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
773        }
774        // fall through and retry
775      }
776
777      LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try=" +
778          (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
779
780      try {
781        retryCounter.sleepUntilNextRetry();
782      } catch (InterruptedException ie) {
783        throw (InterruptedIOException) new InterruptedIOException(
784              "Can't create znode " + hbckEphemeralNodePath).initCause(ie);
785      }
786    } while (retryCounter.shouldRetry());
787    return hbckZodeCreated;
788  }
789
790  private void cleanupHbckZnode() {
791    try {
792      if (zkw != null && hbckZodeCreated) {
793        ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
794        hbckZodeCreated = false;
795      }
796    } catch (KeeperException e) {
797      // Ignore
798      if (!e.code().equals(KeeperException.Code.NONODE)) {
799        LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
800      }
801    }
802  }
803
804  /**
805   * Contacts the master and prints out cluster-wide information
806   * @return 0 on success, non-zero on failure
807   */
808  public int onlineHbck()
809      throws IOException, KeeperException, InterruptedException, ReplicationException {
810    // print hbase server version
811    errors.print("Version: " + status.getHBaseVersion());
812
813    // Clean start
814    clearState();
815    // Do offline check and repair first
816    offlineHdfsIntegrityRepair();
817    offlineReferenceFileRepair();
818    offlineHLinkFileRepair();
819    // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
820    // hbck, it is likely that hbck would be misled and report transient errors.  Therefore, it
821    // is better to set Master into maintenance mode during online hbck.
822    //
823    if (!setMasterInMaintenanceMode()) {
824      LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
825        + "error.  Please run HBCK multiple times to reduce the chance of transient error.");
826    }
827
828    onlineConsistencyRepair();
829
830    if (checkRegionBoundaries) {
831      checkRegionBoundaries();
832    }
833
834    checkAndFixReplication();
835
836    cleanReplicationBarrier();
837
838    // Remove the hbck znode
839    cleanupHbckZnode();
840
841    // Remove the hbck lock
842    unlockHbck();
843
844    // Print table summary
845    printTableSummary(tablesInfo);
846    return errors.summarize();
847  }
848
849  public static byte[] keyOnly (byte[] b) {
850    if (b == null)
851      return b;
852    int rowlength = Bytes.toShort(b, 0);
853    byte[] result = new byte[rowlength];
854    System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
855    return result;
856  }
857
858  @Override
859  public void close() throws IOException {
860    try {
861      cleanupHbckZnode();
862      unlockHbck();
863    } catch (Exception io) {
864      LOG.warn(io.toString(), io);
865    } finally {
866      if (zkw != null) {
867        zkw.close();
868        zkw = null;
869      }
870      IOUtils.closeQuietly(admin);
871      IOUtils.closeQuietly(meta);
872      IOUtils.closeQuietly(connection);
873    }
874  }
875
876  private static class RegionBoundariesInformation {
877    public byte [] regionName;
878    public byte [] metaFirstKey;
879    public byte [] metaLastKey;
880    public byte [] storesFirstKey;
881    public byte [] storesLastKey;
882    @Override
883    public String toString () {
884      return "regionName=" + Bytes.toStringBinary(regionName) +
885             "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
886             "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
887             "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
888             "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
889    }
890  }
891
892  public void checkRegionBoundaries() {
893    try {
894      ByteArrayComparator comparator = new ByteArrayComparator();
895      List<RegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
896      final RegionBoundariesInformation currentRegionBoundariesInformation =
897          new RegionBoundariesInformation();
898      Path hbaseRoot = FSUtils.getRootDir(getConf());
899      for (RegionInfo regionInfo : regions) {
900        Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
901        currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
902        // For each region, get the start and stop key from the META and compare them to the
903        // same information from the Stores.
904        Path path = new Path(tableDir, regionInfo.getEncodedName());
905        FileSystem fs = path.getFileSystem(getConf());
906        FileStatus[] files = fs.listStatus(path);
907        // For all the column families in this region...
908        byte[] storeFirstKey = null;
909        byte[] storeLastKey = null;
910        for (FileStatus file : files) {
911          String fileName = file.getPath().toString();
912          fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
913          if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
914            FileStatus[] storeFiles = fs.listStatus(file.getPath());
915            // For all the stores in this column family.
916            for (FileStatus storeFile : storeFiles) {
917              HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(),
918                CacheConfig.DISABLED, true, getConf());
919              if ((reader.getFirstKey() != null)
920                  && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
921                      ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey()) > 0))) {
922                storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey().get()).getKey();
923              }
924              if ((reader.getLastKey() != null)
925                  && ((storeLastKey == null) || (comparator.compare(storeLastKey,
926                      ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey())) < 0)) {
927                storeLastKey = ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey();
928              }
929              reader.close();
930            }
931          }
932        }
933        currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
934        currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
935        currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
936        currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
937        if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
938          currentRegionBoundariesInformation.metaFirstKey = null;
939        if (currentRegionBoundariesInformation.metaLastKey.length == 0)
940          currentRegionBoundariesInformation.metaLastKey = null;
941
942        // For a region to be correct, we need the META start key to be smaller or equal to the
943        // smallest start key from all the stores, and the start key from the next META entry to
944        // be bigger than the last key from all the current stores. First region start key is null;
945        // Last region end key is null; some regions can be empty and not have any store.
946
947        boolean valid = true;
948        // Checking start key.
949        if ((currentRegionBoundariesInformation.storesFirstKey != null)
950            && (currentRegionBoundariesInformation.metaFirstKey != null)) {
951          valid = valid
952              && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
953                currentRegionBoundariesInformation.metaFirstKey) >= 0;
954        }
955        // Checking stop key.
956        if ((currentRegionBoundariesInformation.storesLastKey != null)
957            && (currentRegionBoundariesInformation.metaLastKey != null)) {
958          valid = valid
959              && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
960                currentRegionBoundariesInformation.metaLastKey) < 0;
961        }
962        if (!valid) {
963          errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
964            tablesInfo.get(regionInfo.getTable()));
965          LOG.warn("Region's boundaries not aligned between stores and META for:");
966          LOG.warn(Objects.toString(currentRegionBoundariesInformation));
967        }
968      }
969    } catch (IOException e) {
970      LOG.error(e.toString(), e);
971    }
972  }
973
974  /**
975   * Iterates through the list of all orphan/invalid regiondirs.
976   */
977  private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
978    for (HbckInfo hi : orphanHdfsDirs) {
979      LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
980      adoptHdfsOrphan(hi);
981    }
982  }
983
984  /**
985   * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
986   * these orphans by creating a new region, and moving the column families,
987   * recovered edits, WALs, into the new region dir.  We determine the region
988   * startkey and endkeys by looking at all of the hfiles inside the column
989   * families to identify the min and max keys. The resulting region will
990   * likely violate table integrity but will be dealt with by merging
991   * overlapping regions.
992   */
993  @SuppressWarnings("deprecation")
994  private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
995    Path p = hi.getHdfsRegionDir();
996    FileSystem fs = p.getFileSystem(getConf());
997    FileStatus[] dirs = fs.listStatus(p);
998    if (dirs == null) {
999      LOG.warn("Attempt to adopt orphan hdfs region skipped because no files present in " +
1000          p + ". This dir could probably be deleted.");
1001      return ;
1002    }
1003
1004    TableName tableName = hi.getTableName();
1005    TableInfo tableInfo = tablesInfo.get(tableName);
1006    Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
1007    TableDescriptor template = tableInfo.getHTD();
1008
1009    // find min and max key values
1010    Pair<byte[],byte[]> orphanRegionRange = null;
1011    for (FileStatus cf : dirs) {
1012      String cfName= cf.getPath().getName();
1013      // TODO Figure out what the special dirs are
1014      if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
1015
1016      FileStatus[] hfiles = fs.listStatus(cf.getPath());
1017      for (FileStatus hfile : hfiles) {
1018        byte[] start, end;
1019        HFile.Reader hf = null;
1020        try {
1021          hf = HFile.createReader(fs, hfile.getPath(), CacheConfig.DISABLED, true, getConf());
1022          hf.loadFileInfo();
1023          Optional<Cell> startKv = hf.getFirstKey();
1024          start = CellUtil.cloneRow(startKv.get());
1025          Optional<Cell> endKv = hf.getLastKey();
1026          end = CellUtil.cloneRow(endKv.get());
1027        } catch (IOException ioe) {
1028          LOG.warn("Problem reading orphan file " + hfile + ", skipping");
1029          continue;
1030        } catch (NullPointerException ioe) {
1031          LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
1032          continue;
1033        } finally {
1034          if (hf != null) {
1035            hf.close();
1036          }
1037        }
1038
1039        // expand the range to include the range of all hfiles
1040        if (orphanRegionRange == null) {
1041          // first range
1042          orphanRegionRange = new Pair<>(start, end);
1043        } else {
1044          // TODO add test
1045
1046          // expand range only if the hfile is wider.
1047          if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
1048            orphanRegionRange.setFirst(start);
1049          }
1050          if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
1051            orphanRegionRange.setSecond(end);
1052          }
1053        }
1054      }
1055    }
1056    if (orphanRegionRange == null) {
1057      LOG.warn("No data in dir " + p + ", sidelining data");
1058      fixes++;
1059      sidelineRegionDir(fs, hi);
1060      return;
1061    }
1062    LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
1063        Bytes.toString(orphanRegionRange.getSecond()) + ")");
1064
1065    // create new region on hdfs. move data into place.
1066    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(template.getTableName())
1067        .setStartKey(orphanRegionRange.getFirst())
1068        .setEndKey(Bytes.add(orphanRegionRange.getSecond(), new byte[1]))
1069        .build();
1070    LOG.info("Creating new region : " + regionInfo);
1071    HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), regionInfo, template);
1072    Path target = region.getRegionFileSystem().getRegionDir();
1073
1074    // rename all the data to new region
1075    mergeRegionDirs(target, hi);
1076    fixes++;
1077  }
1078
1079  /**
1080   * This method determines if there are table integrity errors in HDFS.  If
1081   * there are errors and the appropriate "fix" options are enabled, the method
1082   * will first correct orphan regions making them into legit regiondirs, and
1083   * then reload to merge potentially overlapping regions.
1084   *
1085   * @return number of table integrity errors found
1086   */
1087  private int restoreHdfsIntegrity() throws IOException, InterruptedException {
1088    // Determine what's on HDFS
1089    LOG.info("Loading HBase regioninfo from HDFS...");
1090    loadHdfsRegionDirs(); // populating regioninfo table.
1091
1092    int errs = errors.getErrorList().size();
1093    // First time just get suggestions.
1094    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1095    checkHdfsIntegrity(false, false);
1096
1097    if (errors.getErrorList().size() == errs) {
1098      LOG.info("No integrity errors.  We are done with this phase. Glorious.");
1099      return 0;
1100    }
1101
1102    if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
1103      adoptHdfsOrphans(orphanHdfsDirs);
1104      // TODO optimize by incrementally adding instead of reloading.
1105    }
1106
1107    // Make sure there are no holes now.
1108    if (shouldFixHdfsHoles()) {
1109      clearState(); // this also resets # fixes.
1110      loadHdfsRegionDirs();
1111      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1112      tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1113    }
1114
1115    // Now we fix overlaps
1116    if (shouldFixHdfsOverlaps()) {
1117      // second pass we fix overlaps.
1118      clearState(); // this also resets # fixes.
1119      loadHdfsRegionDirs();
1120      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1121      tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1122    }
1123
1124    return errors.getErrorList().size();
1125  }
1126
1127  /**
1128   * Scan all the store file names to find any lingering reference files,
1129   * which refer to some none-exiting files. If "fix" option is enabled,
1130   * any lingering reference file will be sidelined if found.
1131   * <p>
1132   * Lingering reference file prevents a region from opening. It has to
1133   * be fixed before a cluster can start properly.
1134   */
1135  private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1136    clearState();
1137    Configuration conf = getConf();
1138    Path hbaseRoot = FSUtils.getRootDir(conf);
1139    FileSystem fs = hbaseRoot.getFileSystem(conf);
1140    LOG.info("Computing mapping of all store files");
1141    Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1142      new FSUtils.ReferenceFileFilter(fs), executor, errors);
1143    errors.print("");
1144    LOG.info("Validating mapping using HDFS state");
1145    for (Path path: allFiles.values()) {
1146      Path referredToFile = StoreFileInfo.getReferredToFile(path);
1147      if (fs.exists(referredToFile)) continue;  // good, expected
1148
1149      // Found a lingering reference file
1150      errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1151        "Found lingering reference file " + path);
1152      if (!shouldFixReferenceFiles()) continue;
1153
1154      // Now, trying to fix it since requested
1155      boolean success = false;
1156      String pathStr = path.toString();
1157
1158      // A reference file path should be like
1159      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1160      // Up 5 directories to get the root folder.
1161      // So the file will be sidelined to a similar folder structure.
1162      int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1163      for (int i = 0; index > 0 && i < 5; i++) {
1164        index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1165      }
1166      if (index > 0) {
1167        Path rootDir = getSidelineDir();
1168        Path dst = new Path(rootDir, pathStr.substring(index + 1));
1169        fs.mkdirs(dst.getParent());
1170        LOG.info("Trying to sideline reference file "
1171          + path + " to " + dst);
1172        setShouldRerun();
1173
1174        success = fs.rename(path, dst);
1175        debugLsr(dst);
1176
1177      }
1178      if (!success) {
1179        LOG.error("Failed to sideline reference file " + path);
1180      }
1181    }
1182  }
1183
1184  /**
1185   * Scan all the store file names to find any lingering HFileLink files,
1186   * which refer to some none-exiting files. If "fix" option is enabled,
1187   * any lingering HFileLink file will be sidelined if found.
1188   */
1189  private void offlineHLinkFileRepair() throws IOException, InterruptedException {
1190    Configuration conf = getConf();
1191    Path hbaseRoot = FSUtils.getRootDir(conf);
1192    FileSystem fs = hbaseRoot.getFileSystem(conf);
1193    LOG.info("Computing mapping of all link files");
1194    Map<String, Path> allFiles = FSUtils
1195        .getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors);
1196    errors.print("");
1197
1198    LOG.info("Validating mapping using HDFS state");
1199    for (Path path : allFiles.values()) {
1200      // building HFileLink object to gather locations
1201      HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
1202      if (actualLink.exists(fs)) continue; // good, expected
1203
1204      // Found a lingering HFileLink
1205      errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
1206      if (!shouldFixHFileLinks()) continue;
1207
1208      // Now, trying to fix it since requested
1209      setShouldRerun();
1210
1211      // An HFileLink path should be like
1212      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
1213      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1214      boolean success = sidelineFile(fs, hbaseRoot, path);
1215
1216      if (!success) {
1217        LOG.error("Failed to sideline HFileLink file " + path);
1218      }
1219
1220      // An HFileLink backreference path should be like
1221      // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
1222      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1223      Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil
1224              .getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()),
1225                  HFileLink.getReferencedRegionName(path.getName().toString()),
1226                  path.getParent().getName()),
1227          HFileLink.getReferencedHFileName(path.getName().toString()));
1228      success = sidelineFile(fs, hbaseRoot, backRefPath);
1229
1230      if (!success) {
1231        LOG.error("Failed to sideline HFileLink backreference file " + path);
1232      }
1233    }
1234  }
1235
1236  private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
1237    URI uri = hbaseRoot.toUri().relativize(path.toUri());
1238    if (uri.isAbsolute()) return false;
1239    String relativePath = uri.getPath();
1240    Path rootDir = getSidelineDir();
1241    Path dst = new Path(rootDir, relativePath);
1242    boolean pathCreated = fs.mkdirs(dst.getParent());
1243    if (!pathCreated) {
1244      LOG.error("Failed to create path: " + dst.getParent());
1245      return false;
1246    }
1247    LOG.info("Trying to sideline file " + path + " to " + dst);
1248    return fs.rename(path, dst);
1249  }
1250
1251  /**
1252   * TODO -- need to add tests for this.
1253   */
1254  private void reportEmptyMetaCells() {
1255    errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1256      emptyRegionInfoQualifiers.size());
1257    if (details) {
1258      for (Result r: emptyRegionInfoQualifiers) {
1259        errors.print("  " + r);
1260      }
1261    }
1262  }
1263
1264  /**
1265   * TODO -- need to add tests for this.
1266   */
1267  private void reportTablesInFlux() {
1268    AtomicInteger numSkipped = new AtomicInteger(0);
1269    TableDescriptor[] allTables = getTables(numSkipped);
1270    errors.print("Number of Tables: " + allTables.length);
1271    if (details) {
1272      if (numSkipped.get() > 0) {
1273        errors.detail("Number of Tables in flux: " + numSkipped.get());
1274      }
1275      for (TableDescriptor td : allTables) {
1276        errors.detail("  Table: " + td.getTableName() + "\t" +
1277                           (td.isReadOnly() ? "ro" : "rw") + "\t" +
1278                            (td.isMetaRegion() ? "META" : "    ") + "\t" +
1279                           " families: " + td.getColumnFamilyCount());
1280      }
1281    }
1282  }
1283
1284  public ErrorReporter getErrors() {
1285    return errors;
1286  }
1287
1288  /**
1289   * Read the .regioninfo file from the file system.  If there is no
1290   * .regioninfo, add it to the orphan hdfs region list.
1291   */
1292  private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
1293    Path regionDir = hbi.getHdfsRegionDir();
1294    if (regionDir == null) {
1295      if (hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1296        // Log warning only for default/ primary replica with no region dir
1297        LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
1298      }
1299      return;
1300    }
1301
1302    if (hbi.hdfsEntry.hri != null) {
1303      // already loaded data
1304      return;
1305    }
1306
1307    FileSystem fs = FileSystem.get(getConf());
1308    RegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
1309    LOG.debug("RegionInfo read: " + hri.toString());
1310    hbi.hdfsEntry.hri = hri;
1311  }
1312
1313  /**
1314   * Exception thrown when a integrity repair operation fails in an
1315   * unresolvable way.
1316   */
1317  public static class RegionRepairException extends IOException {
1318    private static final long serialVersionUID = 1L;
1319    final IOException ioe;
1320    public RegionRepairException(String s, IOException ioe) {
1321      super(s);
1322      this.ioe = ioe;
1323    }
1324  }
1325
1326  /**
1327   * Populate hbi's from regionInfos loaded from file system.
1328   */
1329  private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1330      throws IOException, InterruptedException {
1331    tablesInfo.clear(); // regenerating the data
1332    // generate region split structure
1333    Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1334
1335    // Parallelized read of .regioninfo files.
1336    List<WorkItemHdfsRegionInfo> hbis = new ArrayList<>(hbckInfos.size());
1337    List<Future<Void>> hbiFutures;
1338
1339    for (HbckInfo hbi : hbckInfos) {
1340      WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1341      hbis.add(work);
1342    }
1343
1344    // Submit and wait for completion
1345    hbiFutures = executor.invokeAll(hbis);
1346
1347    for(int i=0; i<hbiFutures.size(); i++) {
1348      WorkItemHdfsRegionInfo work = hbis.get(i);
1349      Future<Void> f = hbiFutures.get(i);
1350      try {
1351        f.get();
1352      } catch(ExecutionException e) {
1353        LOG.warn("Failed to read .regioninfo file for region " +
1354              work.hbi.getRegionNameAsString(), e.getCause());
1355      }
1356    }
1357
1358    Path hbaseRoot = FSUtils.getRootDir(getConf());
1359    FileSystem fs = hbaseRoot.getFileSystem(getConf());
1360    // serialized table info gathering.
1361    for (HbckInfo hbi: hbckInfos) {
1362
1363      if (hbi.getHdfsHRI() == null) {
1364        // was an orphan
1365        continue;
1366      }
1367
1368
1369      // get table name from hdfs, populate various HBaseFsck tables.
1370      TableName tableName = hbi.getTableName();
1371      if (tableName == null) {
1372        // There was an entry in hbase:meta not in the HDFS?
1373        LOG.warn("tableName was null for: " + hbi);
1374        continue;
1375      }
1376
1377      TableInfo modTInfo = tablesInfo.get(tableName);
1378      if (modTInfo == null) {
1379        // only executed once per table.
1380        modTInfo = new TableInfo(tableName);
1381        tablesInfo.put(tableName, modTInfo);
1382        try {
1383          TableDescriptor htd =
1384              FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1385          modTInfo.htds.add(htd);
1386        } catch (IOException ioe) {
1387          if (!orphanTableDirs.containsKey(tableName)) {
1388            LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1389            //should only report once for each table
1390            errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1391                "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1392            Set<String> columns = new HashSet<>();
1393            orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1394          }
1395        }
1396      }
1397      if (!hbi.isSkipChecks()) {
1398        modTInfo.addRegionInfo(hbi);
1399      }
1400    }
1401
1402    loadTableInfosForTablesWithNoRegion();
1403    errors.print("");
1404
1405    return tablesInfo;
1406  }
1407
1408  /**
1409   * To get the column family list according to the column family dirs
1410   * @param columns
1411   * @param hbi
1412   * @return a set of column families
1413   * @throws IOException
1414   */
1415  private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1416    Path regionDir = hbi.getHdfsRegionDir();
1417    FileSystem fs = regionDir.getFileSystem(getConf());
1418    FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1419    for (FileStatus subdir : subDirs) {
1420      String columnfamily = subdir.getPath().getName();
1421      columns.add(columnfamily);
1422    }
1423    return columns;
1424  }
1425
1426  /**
1427   * To fabricate a .tableinfo file with following contents<br>
1428   * 1. the correct tablename <br>
1429   * 2. the correct colfamily list<br>
1430   * 3. the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1431   * @throws IOException
1432   */
1433  private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1434      Set<String> columns) throws IOException {
1435    if (columns ==null || columns.isEmpty()) return false;
1436    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
1437    for (String columnfamimly : columns) {
1438      builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(columnfamimly));
1439    }
1440    fstd.createTableDescriptor(builder.build(), true);
1441    return true;
1442  }
1443
1444  /**
1445   * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1446   * @throws IOException
1447   */
1448  public void fixEmptyMetaCells() throws IOException {
1449    if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1450      LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1451      for (Result region : emptyRegionInfoQualifiers) {
1452        deleteMetaRegion(region.getRow());
1453        errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1454      }
1455      emptyRegionInfoQualifiers.clear();
1456    }
1457  }
1458
1459  /**
1460   * To fix orphan table by creating a .tableinfo file under tableDir <br>
1461   * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1462   * 2. else create a default .tableinfo file with following items<br>
1463   * &nbsp;2.1 the correct tablename <br>
1464   * &nbsp;2.2 the correct colfamily list<br>
1465   * &nbsp;2.3 the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1466   * @throws IOException
1467   */
1468  public void fixOrphanTables() throws IOException {
1469    if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1470
1471      List<TableName> tmpList = new ArrayList<>(orphanTableDirs.keySet().size());
1472      tmpList.addAll(orphanTableDirs.keySet());
1473      TableDescriptor[] htds = getTableDescriptors(tmpList);
1474      Iterator<Entry<TableName, Set<String>>> iter =
1475          orphanTableDirs.entrySet().iterator();
1476      int j = 0;
1477      int numFailedCase = 0;
1478      FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1479      while (iter.hasNext()) {
1480        Entry<TableName, Set<String>> entry =
1481            iter.next();
1482        TableName tableName = entry.getKey();
1483        LOG.info("Trying to fix orphan table error: " + tableName);
1484        if (j < htds.length) {
1485          if (tableName.equals(htds[j].getTableName())) {
1486            TableDescriptor htd = htds[j];
1487            LOG.info("fixing orphan table: " + tableName + " from cache");
1488            fstd.createTableDescriptor(htd, true);
1489            j++;
1490            iter.remove();
1491          }
1492        } else {
1493          if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1494            LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1495            LOG.warn("Strongly recommend to modify the TableDescriptor if necessary for: " + tableName);
1496            iter.remove();
1497          } else {
1498            LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1499            numFailedCase++;
1500          }
1501        }
1502        fixes++;
1503      }
1504
1505      if (orphanTableDirs.isEmpty()) {
1506        // all orphanTableDirs are luckily recovered
1507        // re-run doFsck after recovering the .tableinfo file
1508        setShouldRerun();
1509        LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1510      } else if (numFailedCase > 0) {
1511        LOG.error("Failed to fix " + numFailedCase
1512            + " OrphanTables with default .tableinfo files");
1513      }
1514
1515    }
1516    //cleanup the list
1517    orphanTableDirs.clear();
1518
1519  }
1520
1521  /**
1522   * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1523   */
1524  private void logParallelMerge() {
1525    if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1526      LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1527          " false to run serially.");
1528    } else {
1529      LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
1530          " true to run in parallel.");
1531    }
1532  }
1533
1534  private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1535      boolean fixOverlaps) throws IOException {
1536    LOG.info("Checking HBase region split map from HDFS data...");
1537    logParallelMerge();
1538    for (TableInfo tInfo : tablesInfo.values()) {
1539      TableIntegrityErrorHandler handler;
1540      if (fixHoles || fixOverlaps) {
1541        handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1542          fixHoles, fixOverlaps);
1543      } else {
1544        handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1545      }
1546      if (!tInfo.checkRegionChain(handler)) {
1547        // should dump info as well.
1548        errors.report("Found inconsistency in table " + tInfo.getName());
1549      }
1550    }
1551    return tablesInfo;
1552  }
1553
1554  private Path getSidelineDir() throws IOException {
1555    if (sidelineDir == null) {
1556      Path hbaseDir = FSUtils.getRootDir(getConf());
1557      Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1558      sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1559          + startMillis);
1560    }
1561    return sidelineDir;
1562  }
1563
1564  /**
1565   * Sideline a region dir (instead of deleting it)
1566   */
1567  Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1568    return sidelineRegionDir(fs, null, hi);
1569  }
1570
1571  /**
1572   * Sideline a region dir (instead of deleting it)
1573   *
1574   * @param parentDir if specified, the region will be sidelined to folder like
1575   *     {@literal .../parentDir/<table name>/<region name>}. The purpose is to group together
1576   *     similar regions sidelined, for example, those regions should be bulk loaded back later
1577   *     on. If NULL, it is ignored.
1578   */
1579  Path sidelineRegionDir(FileSystem fs,
1580      String parentDir, HbckInfo hi) throws IOException {
1581    TableName tableName = hi.getTableName();
1582    Path regionDir = hi.getHdfsRegionDir();
1583
1584    if (!fs.exists(regionDir)) {
1585      LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1586      return null;
1587    }
1588
1589    Path rootDir = getSidelineDir();
1590    if (parentDir != null) {
1591      rootDir = new Path(rootDir, parentDir);
1592    }
1593    Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1594    Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1595    fs.mkdirs(sidelineRegionDir);
1596    boolean success = false;
1597    FileStatus[] cfs =  fs.listStatus(regionDir);
1598    if (cfs == null) {
1599      LOG.info("Region dir is empty: " + regionDir);
1600    } else {
1601      for (FileStatus cf : cfs) {
1602        Path src = cf.getPath();
1603        Path dst =  new Path(sidelineRegionDir, src.getName());
1604        if (fs.isFile(src)) {
1605          // simple file
1606          success = fs.rename(src, dst);
1607          if (!success) {
1608            String msg = "Unable to rename file " + src +  " to " + dst;
1609            LOG.error(msg);
1610            throw new IOException(msg);
1611          }
1612          continue;
1613        }
1614
1615        // is a directory.
1616        fs.mkdirs(dst);
1617
1618        LOG.info("Sidelining files from " + src + " into containing region " + dst);
1619        // FileSystem.rename is inconsistent with directories -- if the
1620        // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1621        // it moves the src into the dst dir resulting in (foo/a/b).  If
1622        // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1623        FileStatus[] hfiles = fs.listStatus(src);
1624        if (hfiles != null && hfiles.length > 0) {
1625          for (FileStatus hfile : hfiles) {
1626            success = fs.rename(hfile.getPath(), dst);
1627            if (!success) {
1628              String msg = "Unable to rename file " + src +  " to " + dst;
1629              LOG.error(msg);
1630              throw new IOException(msg);
1631            }
1632          }
1633        }
1634        LOG.debug("Sideline directory contents:");
1635        debugLsr(sidelineRegionDir);
1636      }
1637    }
1638
1639    LOG.info("Removing old region dir: " + regionDir);
1640    success = fs.delete(regionDir, true);
1641    if (!success) {
1642      String msg = "Unable to delete dir " + regionDir;
1643      LOG.error(msg);
1644      throw new IOException(msg);
1645    }
1646    return sidelineRegionDir;
1647  }
1648
1649  /**
1650   * Load the list of disabled tables in ZK into local set.
1651   * @throws ZooKeeperConnectionException
1652   * @throws IOException
1653   */
1654  private void loadTableStates()
1655  throws IOException {
1656    tableStates = MetaTableAccessor.getTableStates(connection);
1657    // Add hbase:meta so this tool keeps working. In hbase2, meta is always enabled though it
1658    // has no entry in the table states. HBCK doesn't work right w/ hbase2 but just do this in
1659    // meantime.
1660    this.tableStates.put(TableName.META_TABLE_NAME,
1661        new TableState(TableName.META_TABLE_NAME, TableState.State.ENABLED));
1662  }
1663
1664  /**
1665   * Check if the specified region's table is disabled.
1666   * @param tableName table to check status of
1667   */
1668  private boolean isTableDisabled(TableName tableName) {
1669    return tableStates.containsKey(tableName)
1670        && tableStates.get(tableName)
1671        .inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1672  }
1673
1674  /**
1675   * Scan HDFS for all regions, recording their information into
1676   * regionInfoMap
1677   */
1678  public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1679    Path rootDir = FSUtils.getRootDir(getConf());
1680    FileSystem fs = rootDir.getFileSystem(getConf());
1681
1682    // list all tables from HDFS
1683    List<FileStatus> tableDirs = Lists.newArrayList();
1684
1685    boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1686
1687    List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1688    for (Path path : paths) {
1689      TableName tableName = FSUtils.getTableName(path);
1690       if ((!checkMetaOnly &&
1691           isTableIncluded(tableName)) ||
1692           tableName.equals(TableName.META_TABLE_NAME)) {
1693         tableDirs.add(fs.getFileStatus(path));
1694       }
1695    }
1696
1697    // verify that version file exists
1698    if (!foundVersionFile) {
1699      errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1700          "Version file does not exist in root dir " + rootDir);
1701      if (shouldFixVersionFile()) {
1702        LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1703            + " file.");
1704        setShouldRerun();
1705        FSUtils.setVersion(fs, rootDir, getConf().getInt(
1706            HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1707            HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1708            HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1709      }
1710    }
1711
1712    // Avoid multithreading at table-level because already multithreaded internally at
1713    // region-level.  Additionally multithreading at table-level can lead to deadlock
1714    // if there are many tables in the cluster.  Since there are a limited # of threads
1715    // in the executor's thread pool and if we multithread at the table-level by putting
1716    // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1717    // executor tied up solely in waiting for the tables' region-level calls to complete.
1718    // If there are enough tables then there will be no actual threads in the pool left
1719    // for the region-level callables to be serviced.
1720    for (FileStatus tableDir : tableDirs) {
1721      LOG.debug("Loading region dirs from " +tableDir.getPath());
1722      WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1723      try {
1724        item.call();
1725      } catch (ExecutionException e) {
1726        LOG.warn("Could not completely load table dir " +
1727            tableDir.getPath(), e.getCause());
1728      }
1729    }
1730    errors.print("");
1731  }
1732
1733  /**
1734   * Record the location of the hbase:meta region as found in ZooKeeper.
1735   */
1736  private boolean recordMetaRegion() throws IOException {
1737    List<HRegionLocation> locs;
1738    try (RegionLocator locator = connection.getRegionLocator(TableName.META_TABLE_NAME)) {
1739      locs = locator.getRegionLocations(HConstants.EMPTY_START_ROW, true);
1740    }
1741    if (locs == null || locs.isEmpty()) {
1742      errors.reportError(ERROR_CODE.NULL_META_REGION, "META region was not found in ZooKeeper");
1743      return false;
1744    }
1745    for (HRegionLocation metaLocation : locs) {
1746      // Check if Meta region is valid and existing
1747      if (metaLocation == null) {
1748        errors.reportError(ERROR_CODE.NULL_META_REGION, "META region location is null");
1749        return false;
1750      }
1751      if (metaLocation.getRegion() == null) {
1752        errors.reportError(ERROR_CODE.NULL_META_REGION, "META location regionInfo is null");
1753        return false;
1754      }
1755      if (metaLocation.getHostname() == null) {
1756        errors.reportError(ERROR_CODE.NULL_META_REGION, "META location hostName is null");
1757        return false;
1758      }
1759      ServerName sn = metaLocation.getServerName();
1760      MetaEntry m =
1761        new MetaEntry(metaLocation.getRegion(), sn, EnvironmentEdgeManager.currentTime());
1762      HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegion().getEncodedName());
1763      if (hbckInfo == null) {
1764        regionInfoMap.put(metaLocation.getRegion().getEncodedName(), new HbckInfo(m));
1765      } else {
1766        hbckInfo.metaEntry = m;
1767      }
1768    }
1769    return true;
1770  }
1771
1772  private ZKWatcher createZooKeeperWatcher() throws IOException {
1773    return new ZKWatcher(getConf(), "hbase Fsck", new Abortable() {
1774      @Override
1775      public void abort(String why, Throwable e) {
1776        LOG.error(why, e);
1777        System.exit(1);
1778      }
1779
1780      @Override
1781      public boolean isAborted() {
1782        return false;
1783      }
1784
1785    });
1786  }
1787
1788  /**
1789   * Contacts each regionserver and fetches metadata about regions.
1790   * @param regionServerList - the list of region servers to connect to
1791   * @throws IOException if a remote or network exception occurs
1792   */
1793  void processRegionServers(Collection<ServerName> regionServerList)
1794    throws IOException, InterruptedException {
1795
1796    List<WorkItemRegion> workItems = new ArrayList<>(regionServerList.size());
1797    List<Future<Void>> workFutures;
1798
1799    // loop to contact each region server in parallel
1800    for (ServerName rsinfo: regionServerList) {
1801      workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1802    }
1803
1804    workFutures = executor.invokeAll(workItems);
1805
1806    for(int i=0; i<workFutures.size(); i++) {
1807      WorkItemRegion item = workItems.get(i);
1808      Future<Void> f = workFutures.get(i);
1809      try {
1810        f.get();
1811      } catch(ExecutionException e) {
1812        LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1813            e.getCause());
1814      }
1815    }
1816  }
1817
1818  /**
1819   * Check consistency of all regions that have been found in previous phases.
1820   */
1821  private void checkAndFixConsistency()
1822  throws IOException, KeeperException, InterruptedException {
1823    // Divide the checks in two phases. One for default/primary replicas and another
1824    // for the non-primary ones. Keeps code cleaner this way.
1825
1826    List<CheckRegionConsistencyWorkItem> workItems = new ArrayList<>(regionInfoMap.size());
1827    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1828      if (e.getValue().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1829        workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1830      }
1831    }
1832    checkRegionConsistencyConcurrently(workItems);
1833
1834    boolean prevHdfsCheck = shouldCheckHdfs();
1835    setCheckHdfs(false); //replicas don't have any hdfs data
1836    // Run a pass over the replicas and fix any assignment issues that exist on the currently
1837    // deployed/undeployed replicas.
1838    List<CheckRegionConsistencyWorkItem> replicaWorkItems = new ArrayList<>(regionInfoMap.size());
1839    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1840      if (e.getValue().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
1841        replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1842      }
1843    }
1844    checkRegionConsistencyConcurrently(replicaWorkItems);
1845    setCheckHdfs(prevHdfsCheck);
1846
1847    // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
1848    // not get accurate state of the hbase if continuing. The config here allows users to tune
1849    // the tolerance of number of skipped region.
1850    // TODO: evaluate the consequence to continue the hbck operation without config.
1851    int terminateThreshold =  getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
1852    int numOfSkippedRegions = skippedRegions.size();
1853    if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
1854      throw new IOException(numOfSkippedRegions
1855        + " region(s) could not be checked or repaired.  See logs for detail.");
1856    }
1857
1858    if (shouldCheckHdfs()) {
1859      checkAndFixTableStates();
1860    }
1861  }
1862
1863  /**
1864   * Check consistency of all regions using mulitple threads concurrently.
1865   */
1866  private void checkRegionConsistencyConcurrently(
1867    final List<CheckRegionConsistencyWorkItem> workItems)
1868    throws IOException, KeeperException, InterruptedException {
1869    if (workItems.isEmpty()) {
1870      return;  // nothing to check
1871    }
1872
1873    List<Future<Void>> workFutures = executor.invokeAll(workItems);
1874    for(Future<Void> f: workFutures) {
1875      try {
1876        f.get();
1877      } catch(ExecutionException e1) {
1878        LOG.warn("Could not check region consistency " , e1.getCause());
1879        if (e1.getCause() instanceof IOException) {
1880          throw (IOException)e1.getCause();
1881        } else if (e1.getCause() instanceof KeeperException) {
1882          throw (KeeperException)e1.getCause();
1883        } else if (e1.getCause() instanceof InterruptedException) {
1884          throw (InterruptedException)e1.getCause();
1885        } else {
1886          throw new IOException(e1.getCause());
1887        }
1888      }
1889    }
1890  }
1891
1892  class CheckRegionConsistencyWorkItem implements Callable<Void> {
1893    private final String key;
1894    private final HbckInfo hbi;
1895
1896    CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
1897      this.key = key;
1898      this.hbi = hbi;
1899    }
1900
1901    @Override
1902    public synchronized Void call() throws Exception {
1903      try {
1904        checkRegionConsistency(key, hbi);
1905      } catch (Exception e) {
1906        // If the region is non-META region, skip this region and send warning/error message; if
1907        // the region is META region, we should not continue.
1908        LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()
1909          + "'.", e);
1910        if (hbi.getHdfsHRI().isMetaRegion()) {
1911          throw e;
1912        }
1913        LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
1914        addSkippedRegion(hbi);
1915      }
1916      return null;
1917    }
1918  }
1919
1920  private void addSkippedRegion(final HbckInfo hbi) {
1921    Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
1922    if (skippedRegionNames == null) {
1923      skippedRegionNames = new HashSet<>();
1924    }
1925    skippedRegionNames.add(hbi.getRegionNameAsString());
1926    skippedRegions.put(hbi.getTableName(), skippedRegionNames);
1927  }
1928
1929  /**
1930   * Check and fix table states, assumes full info available:
1931   * - tableInfos
1932   * - empty tables loaded
1933   */
1934  private void checkAndFixTableStates() throws IOException {
1935    // first check dangling states
1936    for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
1937      TableName tableName = entry.getKey();
1938      TableState tableState = entry.getValue();
1939      TableInfo tableInfo = tablesInfo.get(tableName);
1940      if (isTableIncluded(tableName)
1941          && !tableName.isSystemTable()
1942          && tableInfo == null) {
1943        if (fixMeta) {
1944          MetaTableAccessor.deleteTableState(connection, tableName);
1945          TableState state = MetaTableAccessor.getTableState(connection, tableName);
1946          if (state != null) {
1947            errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1948                tableName + " unable to delete dangling table state " + tableState);
1949          }
1950        } else if (!checkMetaOnly) {
1951          // dangling table state in meta if checkMetaOnly is false. If checkMetaOnly is
1952          // true, tableInfo will be null as tablesInfo are not polulated for all tables from hdfs
1953          errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1954              tableName + " has dangling table state " + tableState);
1955        }
1956      }
1957    }
1958    // check that all tables have states
1959    for (TableName tableName : tablesInfo.keySet()) {
1960      if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
1961        if (fixMeta) {
1962          MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
1963          TableState newState = MetaTableAccessor.getTableState(connection, tableName);
1964          if (newState == null) {
1965            errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1966                "Unable to change state for table " + tableName + " in meta ");
1967          }
1968        } else {
1969          errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1970              tableName + " has no state in meta ");
1971        }
1972      }
1973    }
1974  }
1975
1976  private void preCheckPermission() throws IOException, AccessDeniedException {
1977    if (shouldIgnorePreCheckPermission()) {
1978      return;
1979    }
1980
1981    Path hbaseDir = FSUtils.getRootDir(getConf());
1982    FileSystem fs = hbaseDir.getFileSystem(getConf());
1983    UserProvider userProvider = UserProvider.instantiate(getConf());
1984    UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1985    FileStatus[] files = fs.listStatus(hbaseDir);
1986    for (FileStatus file : files) {
1987      try {
1988        FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1989      } catch (AccessDeniedException ace) {
1990        LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1991        errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1992          + " does not have write perms to " + file.getPath()
1993          + ". Please rerun hbck as hdfs user " + file.getOwner());
1994        throw ace;
1995      }
1996    }
1997  }
1998
1999  /**
2000   * Deletes region from meta table
2001   */
2002  private void deleteMetaRegion(HbckInfo hi) throws IOException {
2003    deleteMetaRegion(hi.metaEntry.getRegionName());
2004  }
2005
2006  /**
2007   * Deletes region from meta table
2008   */
2009  private void deleteMetaRegion(byte[] metaKey) throws IOException {
2010    Delete d = new Delete(metaKey);
2011    meta.delete(d);
2012    LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
2013  }
2014
2015  /**
2016   * Reset the split parent region info in meta table
2017   */
2018  private void resetSplitParent(HbckInfo hi) throws IOException {
2019    RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
2020    Delete d = new Delete(hi.metaEntry.getRegionName());
2021    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
2022    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
2023    mutations.add(d);
2024
2025    RegionInfo hri = RegionInfoBuilder.newBuilder(hi.metaEntry)
2026        .setOffline(false)
2027        .setSplit(false)
2028        .build();
2029    Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime());
2030    mutations.add(p);
2031
2032    meta.mutateRow(mutations);
2033    LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
2034  }
2035
2036  /**
2037   * This backwards-compatibility wrapper for permanently offlining a region
2038   * that should not be alive.  If the region server does not support the
2039   * "offline" method, it will use the closest unassign method instead.  This
2040   * will basically work until one attempts to disable or delete the affected
2041   * table.  The problem has to do with in-memory only master state, so
2042   * restarting the HMaster or failing over to another should fix this.
2043   */
2044  private void offline(byte[] regionName) throws IOException {
2045    String regionString = Bytes.toStringBinary(regionName);
2046    if (!rsSupportsOffline) {
2047      LOG.warn("Using unassign region " + regionString
2048          + " instead of using offline method, you should"
2049          + " restart HMaster after these repairs");
2050      admin.unassign(regionName, true);
2051      return;
2052    }
2053
2054    // first time we assume the rs's supports #offline.
2055    try {
2056      LOG.info("Offlining region " + regionString);
2057      admin.offline(regionName);
2058    } catch (IOException ioe) {
2059      String notFoundMsg = "java.lang.NoSuchMethodException: " +
2060        "org.apache.hadoop.hbase.master.HMaster.offline([B)";
2061      if (ioe.getMessage().contains(notFoundMsg)) {
2062        LOG.warn("Using unassign region " + regionString
2063            + " instead of using offline method, you should"
2064            + " restart HMaster after these repairs");
2065        rsSupportsOffline = false; // in the future just use unassign
2066        admin.unassign(regionName, true);
2067        return;
2068      }
2069      throw ioe;
2070    }
2071  }
2072
2073  private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
2074    undeployRegionsForHbi(hi);
2075    // undeploy replicas of the region (but only if the method is invoked for the primary)
2076    if (hi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2077      return;
2078    }
2079    int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2080    for (int i = 1; i < numReplicas; i++) {
2081      if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2082      RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
2083          hi.getPrimaryHRIForDeployedReplica(), i);
2084      HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2085      if (h != null) {
2086        undeployRegionsForHbi(h);
2087        //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2088        //in consistency checks
2089        h.setSkipChecks(true);
2090      }
2091    }
2092  }
2093
2094  private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {
2095    for (OnlineEntry rse : hi.deployedEntries) {
2096      LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
2097      try {
2098        HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);
2099        offline(rse.hri.getRegionName());
2100      } catch (IOException ioe) {
2101        LOG.warn("Got exception when attempting to offline region "
2102            + Bytes.toString(rse.hri.getRegionName()), ioe);
2103      }
2104    }
2105  }
2106
2107  /**
2108   * Attempts to undeploy a region from a region server based in information in
2109   * META.  Any operations that modify the file system should make sure that
2110   * its corresponding region is not deployed to prevent data races.
2111   *
2112   * A separate call is required to update the master in-memory region state
2113   * kept in the AssignementManager.  Because disable uses this state instead of
2114   * that found in META, we can't seem to cleanly disable/delete tables that
2115   * have been hbck fixed.  When used on a version of HBase that does not have
2116   * the offline ipc call exposed on the master (&lt;0.90.5, &lt;0.92.0) a master
2117   * restart or failover may be required.
2118   */
2119  private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
2120    if (hi.metaEntry == null && hi.hdfsEntry == null) {
2121      undeployRegions(hi);
2122      return;
2123    }
2124
2125    // get assignment info and hregioninfo from meta.
2126    Get get = new Get(hi.getRegionName());
2127    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2128    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2129    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2130    // also get the locations of the replicas to close if the primary region is being closed
2131    if (hi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2132      int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2133      for (int i = 0; i < numReplicas; i++) {
2134        get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
2135        get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
2136      }
2137    }
2138    Result r = meta.get(get);
2139    RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2140    if (rl == null) {
2141      LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2142          " since meta does not have handle to reach it");
2143      return;
2144    }
2145    for (HRegionLocation h : rl.getRegionLocations()) {
2146      ServerName serverName = h.getServerName();
2147      if (serverName == null) {
2148        errors.reportError("Unable to close region "
2149            + hi.getRegionNameAsString() +  " because meta does not "
2150            + "have handle to reach it.");
2151        continue;
2152      }
2153      RegionInfo hri = h.getRegion();
2154      if (hri == null) {
2155        LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2156            + " because hbase:meta had invalid or missing "
2157            + HConstants.CATALOG_FAMILY_STR + ":"
2158            + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2159            + " qualifier value.");
2160        continue;
2161      }
2162      // close the region -- close files and remove assignment
2163      HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2164    }
2165  }
2166
2167  private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
2168    KeeperException, InterruptedException {
2169    // If we are trying to fix the errors
2170    if (shouldFixAssignments()) {
2171      errors.print(msg);
2172      undeployRegions(hbi);
2173      setShouldRerun();
2174      RegionInfo hri = hbi.getHdfsHRI();
2175      if (hri == null) {
2176        hri = hbi.metaEntry;
2177      }
2178      HBaseFsckRepair.fixUnassigned(admin, hri);
2179      HBaseFsckRepair.waitUntilAssigned(admin, hri);
2180
2181      // also assign replicas if needed (do it only when this call operates on a primary replica)
2182      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) return;
2183      int replicationCount = admin.getDescriptor(hri.getTable()).getRegionReplication();
2184      for (int i = 1; i < replicationCount; i++) {
2185        hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2186        HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2187        if (h != null) {
2188          undeployRegions(h);
2189          //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2190          //in consistency checks
2191          h.setSkipChecks(true);
2192        }
2193        HBaseFsckRepair.fixUnassigned(admin, hri);
2194        HBaseFsckRepair.waitUntilAssigned(admin, hri);
2195      }
2196
2197    }
2198  }
2199
2200  /**
2201   * Check a single region for consistency and correct deployment.
2202   */
2203  private void checkRegionConsistency(final String key, final HbckInfo hbi)
2204  throws IOException, KeeperException, InterruptedException {
2205
2206    if (hbi.isSkipChecks()) return;
2207    String descriptiveName = hbi.toString();
2208    boolean inMeta = hbi.metaEntry != null;
2209    // In case not checking HDFS, assume the region is on HDFS
2210    boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2211    boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
2212    boolean isDeployed = !hbi.deployedOn.isEmpty();
2213    boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
2214    boolean deploymentMatchesMeta =
2215      hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2216      hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
2217    boolean splitParent =
2218        inMeta && hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
2219    boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry.getTable());
2220    boolean recentlyModified = inHdfs &&
2221      hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2222
2223    // ========== First the healthy cases =============
2224    if (hbi.containsOnlyHdfsEdits()) {
2225      return;
2226    }
2227    if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2228      return;
2229    } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2230      LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2231        "tabled that is not deployed");
2232      return;
2233    } else if (recentlyModified) {
2234      LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2235      return;
2236    }
2237    // ========== Cases where the region is not in hbase:meta =============
2238    else if (!inMeta && !inHdfs && !isDeployed) {
2239      // We shouldn't have record of this region at all then!
2240      assert false : "Entry for region with no data";
2241    } else if (!inMeta && !inHdfs && isDeployed) {
2242      errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2243          + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2244          "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2245      if (shouldFixAssignments()) {
2246        undeployRegions(hbi);
2247      }
2248
2249    } else if (!inMeta && inHdfs && !isDeployed) {
2250      if (hbi.isMerged()) {
2251        // This region has already been merged, the remaining hdfs file will be
2252        // cleaned by CatalogJanitor later
2253        hbi.setSkipChecks(true);
2254        LOG.info("Region " + descriptiveName
2255            + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2256        return;
2257      }
2258      errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2259          + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2260          "or deployed on any region server");
2261      // restore region consistency of an adopted orphan
2262      if (shouldFixMeta()) {
2263        if (!hbi.isHdfsRegioninfoPresent()) {
2264          LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2265              +  " in table integrity repair phase if -fixHdfsOrphans was" +
2266              " used.");
2267          return;
2268        }
2269
2270        RegionInfo hri = hbi.getHdfsHRI();
2271        TableInfo tableInfo = tablesInfo.get(hri.getTable());
2272
2273        for (RegionInfo region : tableInfo.getRegionsFromMeta()) {
2274          if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2275              && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2276                hri.getEndKey()) >= 0)
2277              && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2278            if(region.isSplit() || region.isOffline()) continue;
2279            Path regionDir = hbi.getHdfsRegionDir();
2280            FileSystem fs = regionDir.getFileSystem(getConf());
2281            List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2282            for (Path familyDir : familyDirs) {
2283              List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2284              for (Path referenceFilePath : referenceFilePaths) {
2285                Path parentRegionDir =
2286                    StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2287                if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2288                  LOG.warn(hri + " start and stop keys are in the range of " + region
2289                      + ". The region might not be cleaned up from hdfs when region " + region
2290                      + " split failed. Hence deleting from hdfs.");
2291                  HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2292                    regionDir.getParent(), hri);
2293                  return;
2294                }
2295              }
2296            }
2297          }
2298        }
2299        LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2300        int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
2301        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2302            admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
2303              .getLiveServerMetrics().keySet(), numReplicas);
2304
2305        tryAssignmentRepair(hbi, "Trying to reassign region...");
2306      }
2307
2308    } else if (!inMeta && inHdfs && isDeployed) {
2309      errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2310          + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2311      debugLsr(hbi.getHdfsRegionDir());
2312      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2313        // for replicas, this means that we should undeploy the region (we would have
2314        // gone over the primaries and fixed meta holes in first phase under
2315        // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2316        // this stage unless unwanted replica)
2317        if (shouldFixAssignments()) {
2318          undeployRegionsForHbi(hbi);
2319        }
2320      }
2321      if (shouldFixMeta() && hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2322        if (!hbi.isHdfsRegioninfoPresent()) {
2323          LOG.error("This should have been repaired in table integrity repair phase");
2324          return;
2325        }
2326
2327        LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2328        int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
2329        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2330            admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
2331              .getLiveServerMetrics().keySet(), numReplicas);
2332        tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2333      }
2334
2335    // ========== Cases where the region is in hbase:meta =============
2336    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2337      // check whether this is an actual error, or just transient state where parent
2338      // is not cleaned
2339      if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2340        // check that split daughters are there
2341        HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2342        HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2343        if (infoA != null && infoB != null) {
2344          // we already processed or will process daughters. Move on, nothing to see here.
2345          hbi.setSkipChecks(true);
2346          return;
2347        }
2348      }
2349
2350      // For Replica region, we need to do a similar check. If replica is not split successfully,
2351      // error is going to be reported against primary daughter region.
2352      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2353        LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, "
2354            + "and not deployed on any region server. This may be transient.");
2355        hbi.setSkipChecks(true);
2356        return;
2357      }
2358
2359      errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2360          + descriptiveName + " is a split parent in META, in HDFS, "
2361          + "and not deployed on any region server. This could be transient, "
2362          + "consider to run the catalog janitor first!");
2363      if (shouldFixSplitParents()) {
2364        setShouldRerun();
2365        resetSplitParent(hbi);
2366      }
2367    } else if (inMeta && !inHdfs && !isDeployed) {
2368      errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2369          + descriptiveName + " found in META, but not in HDFS "
2370          + "or deployed on any region server.");
2371      if (shouldFixMeta()) {
2372        deleteMetaRegion(hbi);
2373      }
2374    } else if (inMeta && !inHdfs && isDeployed) {
2375      errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2376          + " found in META, but not in HDFS, " +
2377          "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2378      // We treat HDFS as ground truth.  Any information in meta is transient
2379      // and equivalent data can be regenerated.  So, lets unassign and remove
2380      // these problems from META.
2381      if (shouldFixAssignments()) {
2382        errors.print("Trying to fix unassigned region...");
2383        undeployRegions(hbi);
2384      }
2385      if (shouldFixMeta()) {
2386        // wait for it to complete
2387        deleteMetaRegion(hbi);
2388      }
2389    } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2390      errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2391          + " not deployed on any region server.");
2392      tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2393    } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2394      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2395          "Region " + descriptiveName + " should not be deployed according " +
2396          "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2397      if (shouldFixAssignments()) {
2398        errors.print("Trying to close the region " + descriptiveName);
2399        setShouldRerun();
2400        HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2401      }
2402    } else if (inMeta && inHdfs && isMultiplyDeployed) {
2403      errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2404          + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2405          + " but is multiply assigned to region servers " +
2406          Joiner.on(", ").join(hbi.deployedOn));
2407      // If we are trying to fix the errors
2408      if (shouldFixAssignments()) {
2409        errors.print("Trying to fix assignment error...");
2410        setShouldRerun();
2411        HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2412      }
2413    } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2414      errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2415          + descriptiveName + " listed in hbase:meta on region server " +
2416          hbi.metaEntry.regionServer + " but found on region server " +
2417          hbi.deployedOn.get(0));
2418      // If we are trying to fix the errors
2419      if (shouldFixAssignments()) {
2420        errors.print("Trying to fix assignment error...");
2421        setShouldRerun();
2422        HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2423        HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2424      }
2425    } else {
2426      errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2427          " is in an unforeseen state:" +
2428          " inMeta=" + inMeta +
2429          " inHdfs=" + inHdfs +
2430          " isDeployed=" + isDeployed +
2431          " isMultiplyDeployed=" + isMultiplyDeployed +
2432          " deploymentMatchesMeta=" + deploymentMatchesMeta +
2433          " shouldBeDeployed=" + shouldBeDeployed);
2434    }
2435  }
2436
2437  /**
2438   * Checks tables integrity. Goes over all regions and scans the tables.
2439   * Collects all the pieces for each table and checks if there are missing,
2440   * repeated or overlapping ones.
2441   * @throws IOException
2442   */
2443  SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2444    tablesInfo = new TreeMap<>();
2445    LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2446    for (HbckInfo hbi : regionInfoMap.values()) {
2447      // Check only valid, working regions
2448      if (hbi.metaEntry == null) {
2449        // this assumes that consistency check has run loadMetaEntry
2450        Path p = hbi.getHdfsRegionDir();
2451        if (p == null) {
2452          errors.report("No regioninfo in Meta or HDFS. " + hbi);
2453        }
2454
2455        // TODO test.
2456        continue;
2457      }
2458      if (hbi.metaEntry.regionServer == null) {
2459        errors.detail("Skipping region because no region server: " + hbi);
2460        continue;
2461      }
2462      if (hbi.metaEntry.isOffline()) {
2463        errors.detail("Skipping region because it is offline: " + hbi);
2464        continue;
2465      }
2466      if (hbi.containsOnlyHdfsEdits()) {
2467        errors.detail("Skipping region because it only contains edits" + hbi);
2468        continue;
2469      }
2470
2471      // Missing regionDir or over-deployment is checked elsewhere. Include
2472      // these cases in modTInfo, so we can evaluate those regions as part of
2473      // the region chain in META
2474      //if (hbi.foundRegionDir == null) continue;
2475      //if (hbi.deployedOn.size() != 1) continue;
2476      if (hbi.deployedOn.isEmpty()) continue;
2477
2478      // We should be safe here
2479      TableName tableName = hbi.metaEntry.getTable();
2480      TableInfo modTInfo = tablesInfo.get(tableName);
2481      if (modTInfo == null) {
2482        modTInfo = new TableInfo(tableName);
2483      }
2484      for (ServerName server : hbi.deployedOn) {
2485        modTInfo.addServer(server);
2486      }
2487
2488      if (!hbi.isSkipChecks()) {
2489        modTInfo.addRegionInfo(hbi);
2490      }
2491
2492      tablesInfo.put(tableName, modTInfo);
2493    }
2494
2495    loadTableInfosForTablesWithNoRegion();
2496
2497    logParallelMerge();
2498    for (TableInfo tInfo : tablesInfo.values()) {
2499      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2500      if (!tInfo.checkRegionChain(handler)) {
2501        errors.report("Found inconsistency in table " + tInfo.getName());
2502      }
2503    }
2504    return tablesInfo;
2505  }
2506
2507  /** Loads table info's for tables that may not have been included, since there are no
2508   * regions reported for the table, but table dir is there in hdfs
2509   */
2510  private void loadTableInfosForTablesWithNoRegion() throws IOException {
2511    Map<String, TableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2512    for (TableDescriptor htd : allTables.values()) {
2513      if (checkMetaOnly && !htd.isMetaTable()) {
2514        continue;
2515      }
2516
2517      TableName tableName = htd.getTableName();
2518      if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2519        TableInfo tableInfo = new TableInfo(tableName);
2520        tableInfo.htds.add(htd);
2521        tablesInfo.put(htd.getTableName(), tableInfo);
2522      }
2523    }
2524  }
2525
2526  /**
2527   * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2528   * @return number of file move fixes done to merge regions.
2529   */
2530  public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2531    int fileMoves = 0;
2532    String thread = Thread.currentThread().getName();
2533    LOG.debug("[" + thread + "] Contained region dir after close and pause");
2534    debugLsr(contained.getHdfsRegionDir());
2535
2536    // rename the contained into the container.
2537    FileSystem fs = targetRegionDir.getFileSystem(getConf());
2538    FileStatus[] dirs = null;
2539    try {
2540      dirs = fs.listStatus(contained.getHdfsRegionDir());
2541    } catch (FileNotFoundException fnfe) {
2542      // region we are attempting to merge in is not present!  Since this is a merge, there is
2543      // no harm skipping this region if it does not exist.
2544      if (!fs.exists(contained.getHdfsRegionDir())) {
2545        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2546            + " is missing. Assuming already sidelined or moved.");
2547      } else {
2548        sidelineRegionDir(fs, contained);
2549      }
2550      return fileMoves;
2551    }
2552
2553    if (dirs == null) {
2554      if (!fs.exists(contained.getHdfsRegionDir())) {
2555        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2556            + " already sidelined.");
2557      } else {
2558        sidelineRegionDir(fs, contained);
2559      }
2560      return fileMoves;
2561    }
2562
2563    for (FileStatus cf : dirs) {
2564      Path src = cf.getPath();
2565      Path dst =  new Path(targetRegionDir, src.getName());
2566
2567      if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2568        // do not copy the old .regioninfo file.
2569        continue;
2570      }
2571
2572      if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2573        // do not copy the .oldlogs files
2574        continue;
2575      }
2576
2577      LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2578      // FileSystem.rename is inconsistent with directories -- if the
2579      // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2580      // it moves the src into the dst dir resulting in (foo/a/b).  If
2581      // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2582      for (FileStatus hfile : fs.listStatus(src)) {
2583        boolean success = fs.rename(hfile.getPath(), dst);
2584        if (success) {
2585          fileMoves++;
2586        }
2587      }
2588      LOG.debug("[" + thread + "] Sideline directory contents:");
2589      debugLsr(targetRegionDir);
2590    }
2591
2592    // if all success.
2593    sidelineRegionDir(fs, contained);
2594    LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2595        getSidelineDir());
2596    debugLsr(contained.getHdfsRegionDir());
2597
2598    return fileMoves;
2599  }
2600
2601
2602  static class WorkItemOverlapMerge implements Callable<Void> {
2603    private TableIntegrityErrorHandler handler;
2604    Collection<HbckInfo> overlapgroup;
2605
2606    WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2607      this.handler = handler;
2608      this.overlapgroup = overlapgroup;
2609    }
2610
2611    @Override
2612    public Void call() throws Exception {
2613      handler.handleOverlapGroup(overlapgroup);
2614      return null;
2615    }
2616  }
2617
2618  /**
2619   * Maintain information about a particular table.
2620   */
2621  public class TableInfo {
2622    TableName tableName;
2623    TreeSet <ServerName> deployedOn;
2624
2625    // backwards regions
2626    final List<HbckInfo> backwards = new ArrayList<>();
2627
2628    // sidelined big overlapped regions
2629    final Map<Path, HbckInfo> sidelinedRegions = new HashMap<>();
2630
2631    // region split calculator
2632    final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<>(cmp);
2633
2634    // Histogram of different TableDescriptors found.  Ideally there is only one!
2635    final Set<TableDescriptor> htds = new HashSet<>();
2636
2637    // key = start split, values = set of splits in problem group
2638    final Multimap<byte[], HbckInfo> overlapGroups =
2639      TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2640
2641    // list of regions derived from meta entries.
2642    private ImmutableList<RegionInfo> regionsFromMeta = null;
2643
2644    TableInfo(TableName name) {
2645      this.tableName = name;
2646      deployedOn = new TreeSet <>();
2647    }
2648
2649    /**
2650     * @return descriptor common to all regions.  null if are none or multiple!
2651     */
2652    private TableDescriptor getHTD() {
2653      if (htds.size() == 1) {
2654        return (TableDescriptor)htds.toArray()[0];
2655      } else {
2656        LOG.error("None/Multiple table descriptors found for table '"
2657          + tableName + "' regions: " + htds);
2658      }
2659      return null;
2660    }
2661
2662    public void addRegionInfo(HbckInfo hir) {
2663      if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2664        // end key is absolute end key, just add it.
2665        // ignore replicas other than primary for these checks
2666        if (hir.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2667        return;
2668      }
2669
2670      // if not the absolute end key, check for cycle
2671      if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2672        errors.reportError(
2673            ERROR_CODE.REGION_CYCLE,
2674            String.format("The endkey for this region comes before the "
2675                + "startkey, startkey=%s, endkey=%s",
2676                Bytes.toStringBinary(hir.getStartKey()),
2677                Bytes.toStringBinary(hir.getEndKey())), this, hir);
2678        backwards.add(hir);
2679        return;
2680      }
2681
2682      // main case, add to split calculator
2683      // ignore replicas other than primary for these checks
2684      if (hir.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2685    }
2686
2687    public void addServer(ServerName server) {
2688      this.deployedOn.add(server);
2689    }
2690
2691    public TableName getName() {
2692      return tableName;
2693    }
2694
2695    public int getNumRegions() {
2696      return sc.getStarts().size() + backwards.size();
2697    }
2698
2699    public synchronized ImmutableList<RegionInfo> getRegionsFromMeta() {
2700      // lazy loaded, synchronized to ensure a single load
2701      if (regionsFromMeta == null) {
2702        List<RegionInfo> regions = new ArrayList<>();
2703        for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2704          if (tableName.equals(h.getTableName())) {
2705            if (h.metaEntry != null) {
2706              regions.add(h.metaEntry);
2707            }
2708          }
2709        }
2710        regionsFromMeta = Ordering.from(RegionInfo.COMPARATOR).immutableSortedCopy(regions);
2711      }
2712
2713      return regionsFromMeta;
2714    }
2715
2716    private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2717      ErrorReporter errors;
2718
2719      IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2720        this.errors = errors;
2721        setTableInfo(ti);
2722      }
2723
2724      @Override
2725      public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2726        errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2727            "First region should start with an empty key.  You need to "
2728            + " create a new region and regioninfo in HDFS to plug the hole.",
2729            getTableInfo(), hi);
2730      }
2731
2732      @Override
2733      public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2734        errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2735            "Last region should end with an empty key. You need to "
2736                + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2737      }
2738
2739      @Override
2740      public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2741        errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2742            "Region has the same start and end key.", getTableInfo(), hi);
2743      }
2744
2745      @Override
2746      public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2747        byte[] key = r1.getStartKey();
2748        // dup start key
2749        errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2750            "Multiple regions have the same startkey: "
2751            + Bytes.toStringBinary(key), getTableInfo(), r1);
2752        errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2753            "Multiple regions have the same startkey: "
2754            + Bytes.toStringBinary(key), getTableInfo(), r2);
2755      }
2756
2757      @Override
2758      public void handleSplit(HbckInfo r1, HbckInfo r2) throws IOException{
2759        byte[] key = r1.getStartKey();
2760        // dup start key
2761        errors.reportError(ERROR_CODE.DUPE_ENDKEYS,
2762          "Multiple regions have the same regionID: "
2763            + Bytes.toStringBinary(key), getTableInfo(), r1);
2764        errors.reportError(ERROR_CODE.DUPE_ENDKEYS,
2765          "Multiple regions have the same regionID: "
2766            + Bytes.toStringBinary(key), getTableInfo(), r2);
2767      }
2768
2769      @Override
2770      public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2771        errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2772            "There is an overlap in the region chain.",
2773            getTableInfo(), hi1, hi2);
2774      }
2775
2776      @Override
2777      public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2778        errors.reportError(
2779            ERROR_CODE.HOLE_IN_REGION_CHAIN,
2780            "There is a hole in the region chain between "
2781                + Bytes.toStringBinary(holeStart) + " and "
2782                + Bytes.toStringBinary(holeStop)
2783                + ".  You need to create a new .regioninfo and region "
2784                + "dir in hdfs to plug the hole.");
2785      }
2786    }
2787
2788    /**
2789     * This handler fixes integrity errors from hdfs information.  There are
2790     * basically three classes of integrity problems 1) holes, 2) overlaps, and
2791     * 3) invalid regions.
2792     *
2793     * This class overrides methods that fix holes and the overlap group case.
2794     * Individual cases of particular overlaps are handled by the general
2795     * overlap group merge repair case.
2796     *
2797     * If hbase is online, this forces regions offline before doing merge
2798     * operations.
2799     */
2800    private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2801      Configuration conf;
2802
2803      boolean fixOverlaps = true;
2804
2805      HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2806          boolean fixHoles, boolean fixOverlaps) {
2807        super(ti, errors);
2808        this.conf = conf;
2809        this.fixOverlaps = fixOverlaps;
2810        // TODO properly use fixHoles
2811      }
2812
2813      /**
2814       * This is a special case hole -- when the first region of a table is
2815       * missing from META, HBase doesn't acknowledge the existance of the
2816       * table.
2817       */
2818      @Override
2819      public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2820        errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2821            "First region should start with an empty key.  Creating a new " +
2822            "region and regioninfo in HDFS to plug the hole.",
2823            getTableInfo(), next);
2824        TableDescriptor htd = getTableInfo().getHTD();
2825        // from special EMPTY_START_ROW to next region's startKey
2826        RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
2827            .setStartKey(HConstants.EMPTY_START_ROW)
2828            .setEndKey(next.getStartKey())
2829            .build();
2830
2831        // TODO test
2832        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2833        LOG.info("Table region start key was not empty.  Created new empty region: "
2834            + newRegion + " " +region);
2835        fixes++;
2836      }
2837
2838      @Override
2839      public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2840        errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2841            "Last region should end with an empty key.  Creating a new "
2842                + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2843        TableDescriptor htd = getTableInfo().getHTD();
2844        // from curEndKey to EMPTY_START_ROW
2845        RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
2846            .setStartKey(curEndKey)
2847            .setEndKey(HConstants.EMPTY_START_ROW)
2848            .build();
2849
2850        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2851        LOG.info("Table region end key was not empty.  Created new empty region: " + newRegion
2852            + " " + region);
2853        fixes++;
2854      }
2855
2856      /**
2857       * There is a hole in the hdfs regions that violates the table integrity
2858       * rules.  Create a new empty region that patches the hole.
2859       */
2860      @Override
2861      public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2862        errors.reportError(
2863            ERROR_CODE.HOLE_IN_REGION_CHAIN,
2864            "There is a hole in the region chain between "
2865                + Bytes.toStringBinary(holeStartKey) + " and "
2866                + Bytes.toStringBinary(holeStopKey)
2867                + ".  Creating a new regioninfo and region "
2868                + "dir in hdfs to plug the hole.");
2869        TableDescriptor htd = getTableInfo().getHTD();
2870        RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
2871            .setStartKey(holeStartKey)
2872            .setEndKey(holeStopKey)
2873            .build();
2874        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2875        LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2876        fixes++;
2877      }
2878
2879      /**
2880       * This takes set of overlapping regions and merges them into a single
2881       * region.  This covers cases like degenerate regions, shared start key,
2882       * general overlaps, duplicate ranges, and partial overlapping regions.
2883       *
2884       * Cases:
2885       * - Clean regions that overlap
2886       * - Only .oldlogs regions (can't find start/stop range, or figure out)
2887       *
2888       * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
2889       */
2890      @Override
2891      public void handleOverlapGroup(Collection<HbckInfo> overlap)
2892          throws IOException {
2893        Preconditions.checkNotNull(overlap);
2894        Preconditions.checkArgument(overlap.size() >0);
2895
2896        if (!this.fixOverlaps) {
2897          LOG.warn("Not attempting to repair overlaps.");
2898          return;
2899        }
2900
2901        if (overlap.size() > maxMerge) {
2902          LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2903            "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2904          if (sidelineBigOverlaps) {
2905            // we only sideline big overlapped groups that exceeds the max number of regions to merge
2906            sidelineBigOverlaps(overlap);
2907          }
2908          return;
2909        }
2910        if (shouldRemoveParents()) {
2911          removeParentsAndFixSplits(overlap);
2912        }
2913        mergeOverlaps(overlap);
2914      }
2915
2916      void removeParentsAndFixSplits(Collection<HbckInfo> overlap) throws IOException {
2917        Pair<byte[], byte[]> range = null;
2918        HbckInfo parent = null;
2919        HbckInfo daughterA = null;
2920        HbckInfo daughterB = null;
2921        Collection<HbckInfo> daughters = new ArrayList<HbckInfo>(overlap);
2922
2923        String thread = Thread.currentThread().getName();
2924        LOG.info("== [" + thread + "] Attempting fix splits in overlap state.");
2925
2926        // we only can handle a single split per group at the time
2927        if (overlap.size() > 3) {
2928          LOG.info("Too many overlaps were found on this group, falling back to regular merge.");
2929          return;
2930        }
2931
2932        for (HbckInfo hi : overlap) {
2933          if (range == null) {
2934            range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2935          } else {
2936            if (RegionSplitCalculator.BYTES_COMPARATOR
2937              .compare(hi.getStartKey(), range.getFirst()) < 0) {
2938              range.setFirst(hi.getStartKey());
2939            }
2940            if (RegionSplitCalculator.BYTES_COMPARATOR
2941              .compare(hi.getEndKey(), range.getSecond()) > 0) {
2942              range.setSecond(hi.getEndKey());
2943            }
2944          }
2945        }
2946
2947        LOG.info("This group range is [" + Bytes.toStringBinary(range.getFirst()) + ", "
2948          + Bytes.toStringBinary(range.getSecond()) + "]");
2949
2950        // attempt to find a possible parent for the edge case of a split
2951        for (HbckInfo hi : overlap) {
2952          if (Bytes.compareTo(hi.getHdfsHRI().getStartKey(), range.getFirst()) == 0
2953            && Bytes.compareTo(hi.getHdfsHRI().getEndKey(), range.getSecond()) == 0) {
2954            LOG.info("This is a parent for this group: " + hi.toString());
2955            parent = hi;
2956          }
2957        }
2958
2959        // Remove parent regions from daughters collection
2960        if (parent != null) {
2961          daughters.remove(parent);
2962        }
2963
2964        // Lets verify that daughters share the regionID at split time and they
2965        // were created after the parent
2966        for (HbckInfo hi : daughters) {
2967          if (Bytes.compareTo(hi.getHdfsHRI().getStartKey(), range.getFirst()) == 0) {
2968            if (parent.getHdfsHRI().getRegionId() < hi.getHdfsHRI().getRegionId()) {
2969              daughterA = hi;
2970            }
2971          }
2972          if (Bytes.compareTo(hi.getHdfsHRI().getEndKey(), range.getSecond()) == 0) {
2973            if (parent.getHdfsHRI().getRegionId() < hi.getHdfsHRI().getRegionId()) {
2974              daughterB = hi;
2975            }
2976          }
2977        }
2978
2979        // daughters must share the same regionID and we should have a parent too
2980        if (daughterA.getHdfsHRI().getRegionId() != daughterB.getHdfsHRI().getRegionId() || parent == null)
2981          return;
2982
2983        FileSystem fs = FileSystem.get(conf);
2984        LOG.info("Found parent: " + parent.getRegionNameAsString());
2985        LOG.info("Found potential daughter a: " + daughterA.getRegionNameAsString());
2986        LOG.info("Found potential daughter b: " + daughterB.getRegionNameAsString());
2987        LOG.info("Trying to fix parent in overlap by removing the parent.");
2988        try {
2989          closeRegion(parent);
2990        } catch (IOException ioe) {
2991          LOG.warn("Parent region could not be closed, continuing with regular merge...", ioe);
2992          return;
2993        } catch (InterruptedException ie) {
2994          LOG.warn("Parent region could not be closed, continuing with regular merge...", ie);
2995          return;
2996        }
2997
2998        try {
2999          offline(parent.getRegionName());
3000        } catch (IOException ioe) {
3001          LOG.warn("Unable to offline parent region: " + parent.getRegionNameAsString()
3002            + ".  Just continuing with regular merge... ", ioe);
3003          return;
3004        }
3005
3006        try {
3007          HBaseFsckRepair.removeParentInMeta(conf, parent.getHdfsHRI());
3008        } catch (IOException ioe) {
3009          LOG.warn("Unable to remove parent region in META: " + parent.getRegionNameAsString()
3010            + ".  Just continuing with regular merge... ", ioe);
3011          return;
3012        }
3013
3014        sidelineRegionDir(fs, parent);
3015        LOG.info("[" + thread + "] Sidelined parent region dir "+ parent.getHdfsRegionDir() + " into " +
3016          getSidelineDir());
3017        debugLsr(parent.getHdfsRegionDir());
3018
3019        // Make sure we don't have the parents and daughters around
3020        overlap.remove(parent);
3021        overlap.remove(daughterA);
3022        overlap.remove(daughterB);
3023
3024        LOG.info("Done fixing split.");
3025
3026      }
3027
3028      void mergeOverlaps(Collection<HbckInfo> overlap)
3029          throws IOException {
3030        String thread = Thread.currentThread().getName();
3031        LOG.info("== [" + thread + "] Merging regions into one region: "
3032          + Joiner.on(",").join(overlap));
3033        // get the min / max range and close all concerned regions
3034        Pair<byte[], byte[]> range = null;
3035        for (HbckInfo hi : overlap) {
3036          if (range == null) {
3037            range = new Pair<>(hi.getStartKey(), hi.getEndKey());
3038          } else {
3039            if (RegionSplitCalculator.BYTES_COMPARATOR
3040                .compare(hi.getStartKey(), range.getFirst()) < 0) {
3041              range.setFirst(hi.getStartKey());
3042            }
3043            if (RegionSplitCalculator.BYTES_COMPARATOR
3044                .compare(hi.getEndKey(), range.getSecond()) > 0) {
3045              range.setSecond(hi.getEndKey());
3046            }
3047          }
3048          // need to close files so delete can happen.
3049          LOG.debug("[" + thread + "] Closing region before moving data around: " +  hi);
3050          LOG.debug("[" + thread + "] Contained region dir before close");
3051          debugLsr(hi.getHdfsRegionDir());
3052          try {
3053            LOG.info("[" + thread + "] Closing region: " + hi);
3054            closeRegion(hi);
3055          } catch (IOException ioe) {
3056            LOG.warn("[" + thread + "] Was unable to close region " + hi
3057              + ".  Just continuing... ", ioe);
3058          } catch (InterruptedException e) {
3059            LOG.warn("[" + thread + "] Was unable to close region " + hi
3060              + ".  Just continuing... ", e);
3061          }
3062
3063          try {
3064            LOG.info("[" + thread + "] Offlining region: " + hi);
3065            offline(hi.getRegionName());
3066          } catch (IOException ioe) {
3067            LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
3068              + ".  Just continuing... ", ioe);
3069          }
3070        }
3071
3072        // create new empty container region.
3073        TableDescriptor htd = getTableInfo().getHTD();
3074        // from start key to end Key
3075        RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
3076            .setStartKey(range.getFirst())
3077            .setEndKey(range.getSecond())
3078            .build();
3079        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
3080        LOG.info("[" + thread + "] Created new empty container region: " +
3081            newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
3082        debugLsr(region.getRegionFileSystem().getRegionDir());
3083
3084        // all target regions are closed, should be able to safely cleanup.
3085        boolean didFix= false;
3086        Path target = region.getRegionFileSystem().getRegionDir();
3087        for (HbckInfo contained : overlap) {
3088          LOG.info("[" + thread + "] Merging " + contained  + " into " + target );
3089          int merges = mergeRegionDirs(target, contained);
3090          if (merges > 0) {
3091            didFix = true;
3092          }
3093        }
3094        if (didFix) {
3095          fixes++;
3096        }
3097      }
3098
3099      /**
3100       * Sideline some regions in a big overlap group so that it
3101       * will have fewer regions, and it is easier to merge them later on.
3102       *
3103       * @param bigOverlap the overlapped group with regions more than maxMerge
3104       * @throws IOException
3105       */
3106      void sidelineBigOverlaps(
3107          Collection<HbckInfo> bigOverlap) throws IOException {
3108        int overlapsToSideline = bigOverlap.size() - maxMerge;
3109        if (overlapsToSideline > maxOverlapsToSideline) {
3110          overlapsToSideline = maxOverlapsToSideline;
3111        }
3112        List<HbckInfo> regionsToSideline =
3113          RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
3114        FileSystem fs = FileSystem.get(conf);
3115        for (HbckInfo regionToSideline: regionsToSideline) {
3116          try {
3117            LOG.info("Closing region: " + regionToSideline);
3118            closeRegion(regionToSideline);
3119          } catch (IOException ioe) {
3120            LOG.warn("Was unable to close region " + regionToSideline
3121              + ".  Just continuing... ", ioe);
3122          } catch (InterruptedException e) {
3123            LOG.warn("Was unable to close region " + regionToSideline
3124              + ".  Just continuing... ", e);
3125          }
3126
3127          try {
3128            LOG.info("Offlining region: " + regionToSideline);
3129            offline(regionToSideline.getRegionName());
3130          } catch (IOException ioe) {
3131            LOG.warn("Unable to offline region from master: " + regionToSideline
3132              + ".  Just continuing... ", ioe);
3133          }
3134
3135          LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
3136          Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
3137          if (sidelineRegionDir != null) {
3138            sidelinedRegions.put(sidelineRegionDir, regionToSideline);
3139            LOG.info("After sidelined big overlapped region: "
3140              + regionToSideline.getRegionNameAsString()
3141              + " to " + sidelineRegionDir.toString());
3142            fixes++;
3143          }
3144        }
3145      }
3146    }
3147
3148    /**
3149     * Check the region chain (from META) of this table.  We are looking for
3150     * holes, overlaps, and cycles.
3151     * @return false if there are errors
3152     * @throws IOException
3153     */
3154    public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
3155      // When table is disabled no need to check for the region chain. Some of the regions
3156      // accidently if deployed, this below code might report some issues like missing start
3157      // or end regions or region hole in chain and may try to fix which is unwanted.
3158      if (isTableDisabled(this.tableName)) {
3159        return true;
3160      }
3161      int originalErrorsCount = errors.getErrorList().size();
3162      Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
3163      SortedSet<byte[]> splits = sc.getSplits();
3164
3165      byte[] prevKey = null;
3166      byte[] problemKey = null;
3167
3168      if (splits.isEmpty()) {
3169        // no region for this table
3170        handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
3171      }
3172
3173      for (byte[] key : splits) {
3174        Collection<HbckInfo> ranges = regions.get(key);
3175        if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
3176          for (HbckInfo rng : ranges) {
3177            handler.handleRegionStartKeyNotEmpty(rng);
3178          }
3179        }
3180
3181        // check for degenerate ranges
3182        for (HbckInfo rng : ranges) {
3183          // special endkey case converts '' to null
3184          byte[] endKey = rng.getEndKey();
3185          endKey = (endKey.length == 0) ? null : endKey;
3186          if (Bytes.equals(rng.getStartKey(),endKey)) {
3187            handler.handleDegenerateRegion(rng);
3188          }
3189        }
3190
3191        if (ranges.size() == 1) {
3192          // this split key is ok -- no overlap, not a hole.
3193          if (problemKey != null) {
3194            LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3195          }
3196          problemKey = null; // fell through, no more problem.
3197        } else if (ranges.size() > 1) {
3198          // set the new problem key group name, if already have problem key, just
3199          // keep using it.
3200          if (problemKey == null) {
3201            // only for overlap regions.
3202            LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
3203            problemKey = key;
3204          }
3205          overlapGroups.putAll(problemKey, ranges);
3206
3207          // record errors
3208          ArrayList<HbckInfo> subRange = new ArrayList<>(ranges);
3209          //  this dumb and n^2 but this shouldn't happen often
3210          for (HbckInfo r1 : ranges) {
3211            if (r1.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) continue;
3212            subRange.remove(r1);
3213            for (HbckInfo r2 : subRange) {
3214              if (r2.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) continue;
3215              // general case of same start key
3216              if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
3217                handler.handleDuplicateStartKeys(r1,r2);
3218              } else if (Bytes.compareTo(r1.getEndKey(), r2.getStartKey())==0 &&
3219                r1.getHdfsHRI().getRegionId() == r2.getHdfsHRI().getRegionId()) {
3220                LOG.info("this is a split, log to splits");
3221                handler.handleSplit(r1, r2);
3222              } else {
3223                // overlap
3224                handler.handleOverlapInRegionChain(r1, r2);
3225              }
3226            }
3227          }
3228
3229        } else if (ranges.isEmpty()) {
3230          if (problemKey != null) {
3231            LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3232          }
3233          problemKey = null;
3234
3235          byte[] holeStopKey = sc.getSplits().higher(key);
3236          // if higher key is null we reached the top.
3237          if (holeStopKey != null) {
3238            // hole
3239            handler.handleHoleInRegionChain(key, holeStopKey);
3240          }
3241        }
3242        prevKey = key;
3243      }
3244
3245      // When the last region of a table is proper and having an empty end key, 'prevKey'
3246      // will be null.
3247      if (prevKey != null) {
3248        handler.handleRegionEndKeyNotEmpty(prevKey);
3249      }
3250
3251      // TODO fold this into the TableIntegrityHandler
3252      if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
3253        boolean ok = handleOverlapsParallel(handler, prevKey);
3254        if (!ok) {
3255          return false;
3256        }
3257      } else {
3258        for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3259          handler.handleOverlapGroup(overlap);
3260        }
3261      }
3262
3263      if (details) {
3264        // do full region split map dump
3265        errors.print("---- Table '"  +  this.tableName
3266            + "': region split map");
3267        dump(splits, regions);
3268        errors.print("---- Table '"  +  this.tableName
3269            + "': overlap groups");
3270        dumpOverlapProblems(overlapGroups);
3271        errors.print("There are " + overlapGroups.keySet().size()
3272            + " overlap groups with " + overlapGroups.size()
3273            + " overlapping regions");
3274      }
3275      if (!sidelinedRegions.isEmpty()) {
3276        LOG.warn("Sidelined big overlapped regions, please bulk load them!");
3277        errors.print("---- Table '"  +  this.tableName
3278            + "': sidelined big overlapped regions");
3279        dumpSidelinedRegions(sidelinedRegions);
3280      }
3281      return errors.getErrorList().size() == originalErrorsCount;
3282    }
3283
3284    private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
3285        throws IOException {
3286      // we parallelize overlap handler for the case we have lots of groups to fix.  We can
3287      // safely assume each group is independent.
3288      List<WorkItemOverlapMerge> merges = new ArrayList<>(overlapGroups.size());
3289      List<Future<Void>> rets;
3290      for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3291        //
3292        merges.add(new WorkItemOverlapMerge(overlap, handler));
3293      }
3294      try {
3295        rets = executor.invokeAll(merges);
3296      } catch (InterruptedException e) {
3297        LOG.error("Overlap merges were interrupted", e);
3298        return false;
3299      }
3300      for(int i=0; i<merges.size(); i++) {
3301        WorkItemOverlapMerge work = merges.get(i);
3302        Future<Void> f = rets.get(i);
3303        try {
3304          f.get();
3305        } catch(ExecutionException e) {
3306          LOG.warn("Failed to merge overlap group" + work, e.getCause());
3307        } catch (InterruptedException e) {
3308          LOG.error("Waiting for overlap merges was interrupted", e);
3309          return false;
3310        }
3311      }
3312      return true;
3313    }
3314
3315    /**
3316     * This dumps data in a visually reasonable way for visual debugging
3317     *
3318     * @param splits
3319     * @param regions
3320     */
3321    void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
3322      // we display this way because the last end key should be displayed as well.
3323      StringBuilder sb = new StringBuilder();
3324      for (byte[] k : splits) {
3325        sb.setLength(0); // clear out existing buffer, if any.
3326        sb.append(Bytes.toStringBinary(k) + ":\t");
3327        for (HbckInfo r : regions.get(k)) {
3328          sb.append("[ "+ r.toString() + ", "
3329              + Bytes.toStringBinary(r.getEndKey())+ "]\t");
3330        }
3331        errors.print(sb.toString());
3332      }
3333    }
3334  }
3335
3336  public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
3337    // we display this way because the last end key should be displayed as
3338    // well.
3339    for (byte[] k : regions.keySet()) {
3340      errors.print(Bytes.toStringBinary(k) + ":");
3341      for (HbckInfo r : regions.get(k)) {
3342        errors.print("[ " + r.toString() + ", "
3343            + Bytes.toStringBinary(r.getEndKey()) + "]");
3344      }
3345      errors.print("----");
3346    }
3347  }
3348
3349  public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
3350    for (Map.Entry<Path, HbckInfo> entry : regions.entrySet()) {
3351      TableName tableName = entry.getValue().getTableName();
3352      Path path = entry.getKey();
3353      errors.print("This sidelined region dir should be bulk loaded: " + path.toString());
3354      errors.print("Bulk load command looks like: " + BulkLoadHFilesTool.NAME + " " +
3355        path.toUri().getPath() + " " + tableName);
3356    }
3357  }
3358
3359  public Multimap<byte[], HbckInfo> getOverlapGroups(
3360      TableName table) {
3361    TableInfo ti = tablesInfo.get(table);
3362    return ti.overlapGroups;
3363  }
3364
3365  /**
3366   * Return a list of user-space table names whose metadata have not been
3367   * modified in the last few milliseconds specified by timelag
3368   * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
3369   * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
3370   * milliseconds specified by timelag, then the table is a candidate to be returned.
3371   * @return tables that have not been modified recently
3372   * @throws IOException if an error is encountered
3373   */
3374  TableDescriptor[] getTables(AtomicInteger numSkipped) {
3375    List<TableName> tableNames = new ArrayList<>();
3376    long now = EnvironmentEdgeManager.currentTime();
3377
3378    for (HbckInfo hbi : regionInfoMap.values()) {
3379      MetaEntry info = hbi.metaEntry;
3380
3381      // if the start key is zero, then we have found the first region of a table.
3382      // pick only those tables that were not modified in the last few milliseconds.
3383      if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
3384        if (info.modTime + timelag < now) {
3385          tableNames.add(info.getTable());
3386        } else {
3387          numSkipped.incrementAndGet(); // one more in-flux table
3388        }
3389      }
3390    }
3391    return getTableDescriptors(tableNames);
3392  }
3393
3394  TableDescriptor[] getTableDescriptors(List<TableName> tableNames) {
3395      LOG.info("getTableDescriptors == tableNames => " + tableNames);
3396    try (Connection conn = ConnectionFactory.createConnection(getConf());
3397        Admin admin = conn.getAdmin()) {
3398      List<TableDescriptor> tds = admin.listTableDescriptors(tableNames);
3399      return tds.toArray(new TableDescriptor[tds.size()]);
3400    } catch (IOException e) {
3401      LOG.debug("Exception getting table descriptors", e);
3402    }
3403    return new TableDescriptor[0];
3404  }
3405
3406  /**
3407   * Gets the entry in regionInfo corresponding to the the given encoded
3408   * region name. If the region has not been seen yet, a new entry is added
3409   * and returned.
3410   */
3411  private synchronized HbckInfo getOrCreateInfo(String name) {
3412    HbckInfo hbi = regionInfoMap.get(name);
3413    if (hbi == null) {
3414      hbi = new HbckInfo(null);
3415      regionInfoMap.put(name, hbi);
3416    }
3417    return hbi;
3418  }
3419
3420  private void checkAndFixReplication() throws ReplicationException {
3421    ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, errors);
3422    checker.checkUnDeletedQueues();
3423
3424    if (checker.hasUnDeletedQueues() && this.fixReplication) {
3425      checker.fixUnDeletedQueues();
3426      setShouldRerun();
3427    }
3428  }
3429
3430  /**
3431    * Check values in regionInfo for hbase:meta
3432    * Check if zero or more than one regions with hbase:meta are found.
3433    * If there are inconsistencies (i.e. zero or more than one regions
3434    * pretend to be holding the hbase:meta) try to fix that and report an error.
3435    * @throws IOException from HBaseFsckRepair functions
3436    * @throws KeeperException
3437    * @throws InterruptedException
3438    */
3439  boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3440    Map<Integer, HbckInfo> metaRegions = new HashMap<>();
3441    for (HbckInfo value : regionInfoMap.values()) {
3442      if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3443        metaRegions.put(value.getReplicaId(), value);
3444      }
3445    }
3446    int metaReplication = admin.getDescriptor(TableName.META_TABLE_NAME)
3447        .getRegionReplication();
3448    boolean noProblem = true;
3449    // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
3450    // Check the deployed servers. It should be exactly one server for each replica.
3451    for (int i = 0; i < metaReplication; i++) {
3452      HbckInfo metaHbckInfo = metaRegions.remove(i);
3453      List<ServerName> servers = new ArrayList<>();
3454      if (metaHbckInfo != null) {
3455        servers = metaHbckInfo.deployedOn;
3456      }
3457      if (servers.size() != 1) {
3458        noProblem = false;
3459        if (servers.isEmpty()) {
3460          assignMetaReplica(i);
3461        } else if (servers.size() > 1) {
3462          errors
3463          .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
3464                       metaHbckInfo.getReplicaId() + " is found on more than one region.");
3465          if (shouldFixAssignments()) {
3466            errors.print("Trying to fix a problem with hbase:meta, replicaId " +
3467                         metaHbckInfo.getReplicaId() +"..");
3468            setShouldRerun();
3469            // try fix it (treat is a dupe assignment)
3470            HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers);
3471          }
3472        }
3473      }
3474    }
3475    // unassign whatever is remaining in metaRegions. They are excess replicas.
3476    for (Map.Entry<Integer, HbckInfo> entry : metaRegions.entrySet()) {
3477      noProblem = false;
3478      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
3479          "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
3480          ", deployed " + metaRegions.size());
3481      if (shouldFixAssignments()) {
3482        errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
3483            " of hbase:meta..");
3484        setShouldRerun();
3485        unassignMetaReplica(entry.getValue());
3486      }
3487    }
3488    // if noProblem is false, rerun hbck with hopefully fixed META
3489    // if noProblem is true, no errors, so continue normally
3490    return noProblem;
3491  }
3492
3493  private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException,
3494  KeeperException {
3495    undeployRegions(hi);
3496    ZKUtil.deleteNode(zkw, zkw.getZNodePaths().getZNodeForReplica(hi.metaEntry.getReplicaId()));
3497  }
3498
3499  private void assignMetaReplica(int replicaId)
3500      throws IOException, KeeperException, InterruptedException {
3501    errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
3502        replicaId +" is not found on any region.");
3503    if (shouldFixAssignments()) {
3504      errors.print("Trying to fix a problem with hbase:meta..");
3505      setShouldRerun();
3506      // try to fix it (treat it as unassigned region)
3507      RegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
3508          RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId);
3509      HBaseFsckRepair.fixUnassigned(admin, h);
3510      HBaseFsckRepair.waitUntilAssigned(admin, h);
3511    }
3512  }
3513
3514  /**
3515   * Scan hbase:meta, adding all regions found to the regionInfo map.
3516   * @throws IOException if an error is encountered
3517   */
3518  boolean loadMetaEntries() throws IOException {
3519    MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() {
3520      int countRecord = 1;
3521
3522      // comparator to sort KeyValues with latest modtime
3523      final Comparator<Cell> comp = new Comparator<Cell>() {
3524        @Override
3525        public int compare(Cell k1, Cell k2) {
3526          return Long.compare(k1.getTimestamp(), k2.getTimestamp());
3527        }
3528      };
3529
3530      @Override
3531      public boolean visit(Result result) throws IOException {
3532        try {
3533
3534          // record the latest modification of this META record
3535          long ts =  Collections.max(result.listCells(), comp).getTimestamp();
3536          RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
3537          if (rl == null) {
3538            emptyRegionInfoQualifiers.add(result);
3539            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3540              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3541            return true;
3542          }
3543          ServerName sn = null;
3544          if (rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID) == null ||
3545              rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegion() == null) {
3546            emptyRegionInfoQualifiers.add(result);
3547            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3548              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3549            return true;
3550          }
3551          RegionInfo hri = rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegion();
3552          if (!(isTableIncluded(hri.getTable())
3553              || hri.isMetaRegion())) {
3554            return true;
3555          }
3556          PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(result);
3557          for (HRegionLocation h : rl.getRegionLocations()) {
3558            if (h == null || h.getRegion() == null) {
3559              continue;
3560            }
3561            sn = h.getServerName();
3562            hri = h.getRegion();
3563
3564            MetaEntry m = null;
3565            if (hri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
3566              m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3567            } else {
3568              m = new MetaEntry(hri, sn, ts, null, null);
3569            }
3570            HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3571            if (previous == null) {
3572              regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3573            } else if (previous.metaEntry == null) {
3574              previous.metaEntry = m;
3575            } else {
3576              throw new IOException("Two entries in hbase:meta are same " + previous);
3577            }
3578          }
3579          PairOfSameType<RegionInfo> mergeRegions = MetaTableAccessor.getMergeRegions(result);
3580          for (RegionInfo mergeRegion : new RegionInfo[] {
3581              mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3582            if (mergeRegion != null) {
3583              // This region is already been merged
3584              HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3585              hbInfo.setMerged(true);
3586            }
3587          }
3588
3589          // show proof of progress to the user, once for every 100 records.
3590          if (countRecord % 100 == 0) {
3591            errors.progress();
3592          }
3593          countRecord++;
3594          return true;
3595        } catch (RuntimeException e) {
3596          LOG.error("Result=" + result);
3597          throw e;
3598        }
3599      }
3600    };
3601    if (!checkMetaOnly) {
3602      // Scan hbase:meta to pick up user regions
3603      MetaTableAccessor.fullScanRegions(connection, visitor);
3604    }
3605
3606    errors.print("");
3607    return true;
3608  }
3609
3610  /**
3611   * Stores the regioninfo entries scanned from META
3612   */
3613  static class MetaEntry extends HRegionInfo {
3614    ServerName regionServer;   // server hosting this region
3615    long modTime;          // timestamp of most recent modification metadata
3616    RegionInfo splitA, splitB; //split daughters
3617
3618    public MetaEntry(RegionInfo rinfo, ServerName regionServer, long modTime) {
3619      this(rinfo, regionServer, modTime, null, null);
3620    }
3621
3622    public MetaEntry(RegionInfo rinfo, ServerName regionServer, long modTime,
3623        RegionInfo splitA, RegionInfo splitB) {
3624      super(rinfo);
3625      this.regionServer = regionServer;
3626      this.modTime = modTime;
3627      this.splitA = splitA;
3628      this.splitB = splitB;
3629    }
3630
3631    @Override
3632    public boolean equals(Object o) {
3633      boolean superEq = super.equals(o);
3634      if (!superEq) {
3635        return superEq;
3636      }
3637
3638      MetaEntry me = (MetaEntry) o;
3639      if (!regionServer.equals(me.regionServer)) {
3640        return false;
3641      }
3642      return (modTime == me.modTime);
3643    }
3644
3645    @Override
3646    public int hashCode() {
3647      int hash = Arrays.hashCode(getRegionName());
3648      hash = (int) (hash ^ getRegionId());
3649      hash ^= Arrays.hashCode(getStartKey());
3650      hash ^= Arrays.hashCode(getEndKey());
3651      hash ^= Boolean.valueOf(isOffline()).hashCode();
3652      hash ^= getTable().hashCode();
3653      if (regionServer != null) {
3654        hash ^= regionServer.hashCode();
3655      }
3656      hash = (int) (hash ^ modTime);
3657      return hash;
3658    }
3659  }
3660
3661  /**
3662   * Stores the regioninfo entries from HDFS
3663   */
3664  static class HdfsEntry {
3665    RegionInfo hri;
3666    Path hdfsRegionDir = null;
3667    long hdfsRegionDirModTime  = 0;
3668    boolean hdfsRegioninfoFilePresent = false;
3669    boolean hdfsOnlyEdits = false;
3670  }
3671
3672  /**
3673   * Stores the regioninfo retrieved from Online region servers.
3674   */
3675  static class OnlineEntry {
3676    RegionInfo hri;
3677    ServerName hsa;
3678
3679    @Override
3680    public String toString() {
3681      return hsa.toString() + ";" + hri.getRegionNameAsString();
3682    }
3683  }
3684
3685  /**
3686   * Maintain information about a particular region.  It gathers information
3687   * from three places -- HDFS, META, and region servers.
3688   */
3689  public static class HbckInfo implements KeyRange {
3690    private MetaEntry metaEntry = null; // info in META
3691    private HdfsEntry hdfsEntry = null; // info in HDFS
3692    private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
3693    private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
3694    private boolean skipChecks = false; // whether to skip further checks to this region info.
3695    private boolean isMerged = false;// whether this region has already been merged into another one
3696    private int deployedReplicaId = RegionInfo.DEFAULT_REPLICA_ID;
3697    private RegionInfo primaryHRIForDeployedReplica = null;
3698
3699    HbckInfo(MetaEntry metaEntry) {
3700      this.metaEntry = metaEntry;
3701    }
3702
3703    public synchronized int getReplicaId() {
3704      return metaEntry != null? metaEntry.getReplicaId(): deployedReplicaId;
3705    }
3706
3707    public synchronized void addServer(RegionInfo hri, ServerName server) {
3708      OnlineEntry rse = new OnlineEntry() ;
3709      rse.hri = hri;
3710      rse.hsa = server;
3711      this.deployedEntries.add(rse);
3712      this.deployedOn.add(server);
3713      // save the replicaId that we see deployed in the cluster
3714      this.deployedReplicaId = hri.getReplicaId();
3715      this.primaryHRIForDeployedReplica =
3716          RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
3717    }
3718
3719    @Override
3720    public synchronized String toString() {
3721      StringBuilder sb = new StringBuilder();
3722      sb.append("{ meta => ");
3723      sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3724      sb.append( ", hdfs => " + getHdfsRegionDir());
3725      sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3726      sb.append( ", replicaId => " + getReplicaId());
3727      sb.append(" }");
3728      return sb.toString();
3729    }
3730
3731    @Override
3732    public byte[] getStartKey() {
3733      if (this.metaEntry != null) {
3734        return this.metaEntry.getStartKey();
3735      } else if (this.hdfsEntry != null) {
3736        return this.hdfsEntry.hri.getStartKey();
3737      } else {
3738        LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3739        return null;
3740      }
3741    }
3742
3743    @Override
3744    public byte[] getEndKey() {
3745      if (this.metaEntry != null) {
3746        return this.metaEntry.getEndKey();
3747      } else if (this.hdfsEntry != null) {
3748        return this.hdfsEntry.hri.getEndKey();
3749      } else {
3750        LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3751        return null;
3752      }
3753    }
3754
3755    public TableName getTableName() {
3756      if (this.metaEntry != null) {
3757        return this.metaEntry.getTable();
3758      } else if (this.hdfsEntry != null) {
3759        // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3760        // so we get the name from the Path
3761        Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3762        return FSUtils.getTableName(tableDir);
3763      } else {
3764        // return the info from the first online/deployed hri
3765        for (OnlineEntry e : deployedEntries) {
3766          return e.hri.getTable();
3767        }
3768        return null;
3769      }
3770    }
3771
3772    public String getRegionNameAsString() {
3773      if (metaEntry != null) {
3774        return metaEntry.getRegionNameAsString();
3775      } else if (hdfsEntry != null) {
3776        if (hdfsEntry.hri != null) {
3777          return hdfsEntry.hri.getRegionNameAsString();
3778        }
3779      } else {
3780        // return the info from the first online/deployed hri
3781        for (OnlineEntry e : deployedEntries) {
3782          return e.hri.getRegionNameAsString();
3783        }
3784      }
3785      return null;
3786    }
3787
3788    public byte[] getRegionName() {
3789      if (metaEntry != null) {
3790        return metaEntry.getRegionName();
3791      } else if (hdfsEntry != null) {
3792        return hdfsEntry.hri.getRegionName();
3793      } else {
3794        // return the info from the first online/deployed hri
3795        for (OnlineEntry e : deployedEntries) {
3796          return e.hri.getRegionName();
3797        }
3798        return null;
3799      }
3800    }
3801
3802    public RegionInfo getPrimaryHRIForDeployedReplica() {
3803      return primaryHRIForDeployedReplica;
3804    }
3805
3806    Path getHdfsRegionDir() {
3807      if (hdfsEntry == null) {
3808        return null;
3809      }
3810      return hdfsEntry.hdfsRegionDir;
3811    }
3812
3813    boolean containsOnlyHdfsEdits() {
3814      if (hdfsEntry == null) {
3815        return false;
3816      }
3817      return hdfsEntry.hdfsOnlyEdits;
3818    }
3819
3820    boolean isHdfsRegioninfoPresent() {
3821      if (hdfsEntry == null) {
3822        return false;
3823      }
3824      return hdfsEntry.hdfsRegioninfoFilePresent;
3825    }
3826
3827    long getModTime() {
3828      if (hdfsEntry == null) {
3829        return 0;
3830      }
3831      return hdfsEntry.hdfsRegionDirModTime;
3832    }
3833
3834    RegionInfo getHdfsHRI() {
3835      if (hdfsEntry == null) {
3836        return null;
3837      }
3838      return hdfsEntry.hri;
3839    }
3840
3841    public void setSkipChecks(boolean skipChecks) {
3842      this.skipChecks = skipChecks;
3843    }
3844
3845    public boolean isSkipChecks() {
3846      return skipChecks;
3847    }
3848
3849    public void setMerged(boolean isMerged) {
3850      this.isMerged = isMerged;
3851    }
3852
3853    public boolean isMerged() {
3854      return this.isMerged;
3855    }
3856  }
3857
3858  final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3859    @Override
3860    public int compare(HbckInfo l, HbckInfo r) {
3861      if (l == r) {
3862        // same instance
3863        return 0;
3864      }
3865
3866      int tableCompare = l.getTableName().compareTo(r.getTableName());
3867      if (tableCompare != 0) {
3868        return tableCompare;
3869      }
3870
3871      int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3872          l.getStartKey(), r.getStartKey());
3873      if (startComparison != 0) {
3874        return startComparison;
3875      }
3876
3877      // Special case for absolute endkey
3878      byte[] endKey = r.getEndKey();
3879      endKey = (endKey.length == 0) ? null : endKey;
3880      byte[] endKey2 = l.getEndKey();
3881      endKey2 = (endKey2.length == 0) ? null : endKey2;
3882      int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3883          endKey2,  endKey);
3884
3885      if (endComparison != 0) {
3886        return endComparison;
3887      }
3888
3889      // use regionId as tiebreaker.
3890      // Null is considered after all possible values so make it bigger.
3891      if (l.hdfsEntry == null && r.hdfsEntry == null) {
3892        return 0;
3893      }
3894      if (l.hdfsEntry == null && r.hdfsEntry != null) {
3895        return 1;
3896      }
3897      // l.hdfsEntry must not be null
3898      if (r.hdfsEntry == null) {
3899        return -1;
3900      }
3901      // both l.hdfsEntry and r.hdfsEntry must not be null.
3902      return Long.compare(l.hdfsEntry.hri.getRegionId(), r.hdfsEntry.hri.getRegionId());
3903    }
3904  };
3905
3906  /**
3907   * Prints summary of all tables found on the system.
3908   */
3909  private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
3910    StringBuilder sb = new StringBuilder();
3911    int numOfSkippedRegions;
3912    errors.print("Summary:");
3913    for (TableInfo tInfo : tablesInfo.values()) {
3914      numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
3915          skippedRegions.get(tInfo.getName()).size() : 0;
3916
3917      if (errors.tableHasErrors(tInfo)) {
3918        errors.print("Table " + tInfo.getName() + " is inconsistent.");
3919      } else if (numOfSkippedRegions > 0){
3920        errors.print("Table " + tInfo.getName() + " is okay (with "
3921          + numOfSkippedRegions + " skipped regions).");
3922      }
3923      else {
3924        errors.print("Table " + tInfo.getName() + " is okay.");
3925      }
3926      errors.print("    Number of regions: " + tInfo.getNumRegions());
3927      if (numOfSkippedRegions > 0) {
3928        Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
3929        System.out.println("    Number of skipped regions: " + numOfSkippedRegions);
3930        System.out.println("      List of skipped regions:");
3931        for(String sr : skippedRegionStrings) {
3932          System.out.println("        " + sr);
3933        }
3934      }
3935      sb.setLength(0); // clear out existing buffer, if any.
3936      sb.append("    Deployed on: ");
3937      for (ServerName server : tInfo.deployedOn) {
3938        sb.append(" " + server.toString());
3939      }
3940      errors.print(sb.toString());
3941    }
3942  }
3943
3944  static ErrorReporter getErrorReporter(
3945      final Configuration conf) throws ClassNotFoundException {
3946    Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3947    return ReflectionUtils.newInstance(reporter, conf);
3948  }
3949
3950  public interface ErrorReporter {
3951    enum ERROR_CODE {
3952      UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3953      NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META,
3954      NOT_DEPLOYED, MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3955      FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3956      HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3957      ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3958      LINGERING_HFILELINK, WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR,
3959      ORPHAN_TABLE_STATE, NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE, DUPE_ENDKEYS,
3960      UNSUPPORTED_OPTION, INVALID_TABLE
3961    }
3962    void clear();
3963    void report(String message);
3964    void reportError(String message);
3965    void reportError(ERROR_CODE errorCode, String message);
3966    void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3967    void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3968    void reportError(
3969      ERROR_CODE errorCode,
3970      String message,
3971      TableInfo table,
3972      HbckInfo info1,
3973      HbckInfo info2
3974    );
3975    int summarize();
3976    void detail(String details);
3977    ArrayList<ERROR_CODE> getErrorList();
3978    void progress();
3979    void print(String message);
3980    void resetErrors();
3981    boolean tableHasErrors(TableInfo table);
3982  }
3983
3984  static class PrintingErrorReporter implements ErrorReporter {
3985    public int errorCount = 0;
3986    private int showProgress;
3987    // How frequently calls to progress() will create output
3988    private static final int progressThreshold = 100;
3989
3990    Set<TableInfo> errorTables = new HashSet<>();
3991
3992    // for use by unit tests to verify which errors were discovered
3993    private ArrayList<ERROR_CODE> errorList = new ArrayList<>();
3994
3995    @Override
3996    public void clear() {
3997      errorTables.clear();
3998      errorList.clear();
3999      errorCount = 0;
4000    }
4001
4002    @Override
4003    public synchronized void reportError(ERROR_CODE errorCode, String message) {
4004      if (errorCode == ERROR_CODE.WRONG_USAGE) {
4005        System.err.println(message);
4006        return;
4007      }
4008
4009      errorList.add(errorCode);
4010      if (!summary) {
4011        System.out.println("ERROR: " + message);
4012      }
4013      errorCount++;
4014      showProgress = 0;
4015    }
4016
4017    @Override
4018    public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
4019      errorTables.add(table);
4020      reportError(errorCode, message);
4021    }
4022
4023    @Override
4024    public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
4025                                         HbckInfo info) {
4026      errorTables.add(table);
4027      String reference = "(region " + info.getRegionNameAsString() + ")";
4028      reportError(errorCode, reference + " " + message);
4029    }
4030
4031    @Override
4032    public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
4033                                         HbckInfo info1, HbckInfo info2) {
4034      errorTables.add(table);
4035      String reference = "(regions " + info1.getRegionNameAsString()
4036          + " and " + info2.getRegionNameAsString() + ")";
4037      reportError(errorCode, reference + " " + message);
4038    }
4039
4040    @Override
4041    public synchronized void reportError(String message) {
4042      reportError(ERROR_CODE.UNKNOWN, message);
4043    }
4044
4045    /**
4046     * Report error information, but do not increment the error count.  Intended for cases
4047     * where the actual error would have been reported previously.
4048     * @param message
4049     */
4050    @Override
4051    public synchronized void report(String message) {
4052      if (! summary) {
4053        System.out.println("ERROR: " + message);
4054      }
4055      showProgress = 0;
4056    }
4057
4058    @Override
4059    public synchronized int summarize() {
4060      System.out.println(Integer.toString(errorCount) +
4061                         " inconsistencies detected.");
4062      if (errorCount == 0) {
4063        System.out.println("Status: OK");
4064        return 0;
4065      } else {
4066        System.out.println("Status: INCONSISTENT");
4067        return -1;
4068      }
4069    }
4070
4071    @Override
4072    public ArrayList<ERROR_CODE> getErrorList() {
4073      return errorList;
4074    }
4075
4076    @Override
4077    public synchronized void print(String message) {
4078      if (!summary) {
4079        System.out.println(message);
4080      }
4081    }
4082
4083    @Override
4084    public boolean tableHasErrors(TableInfo table) {
4085      return errorTables.contains(table);
4086    }
4087
4088    @Override
4089    public void resetErrors() {
4090      errorCount = 0;
4091    }
4092
4093    @Override
4094    public synchronized void detail(String message) {
4095      if (details) {
4096        System.out.println(message);
4097      }
4098      showProgress = 0;
4099    }
4100
4101    @Override
4102    public synchronized void progress() {
4103      if (showProgress++ == progressThreshold) {
4104        if (!summary) {
4105          System.out.print(".");
4106        }
4107        showProgress = 0;
4108      }
4109    }
4110  }
4111
4112  /**
4113   * Contact a region server and get all information from it
4114   */
4115  static class WorkItemRegion implements Callable<Void> {
4116    private final HBaseFsck hbck;
4117    private final ServerName rsinfo;
4118    private final ErrorReporter errors;
4119    private final Connection connection;
4120
4121    WorkItemRegion(HBaseFsck hbck, ServerName info,
4122                   ErrorReporter errors, Connection connection) {
4123      this.hbck = hbck;
4124      this.rsinfo = info;
4125      this.errors = errors;
4126      this.connection = connection;
4127    }
4128
4129    @Override
4130    public synchronized Void call() throws IOException {
4131      errors.progress();
4132      try {
4133        // list all online regions from this region server
4134        List<RegionInfo> regions = connection.getAdmin().getRegions(rsinfo);
4135        regions = filterRegions(regions);
4136
4137        if (details) {
4138          errors.detail(
4139            "RegionServer: " + rsinfo.getServerName() + " number of regions: " + regions.size());
4140          for (RegionInfo rinfo : regions) {
4141            errors.detail("  " + rinfo.getRegionNameAsString() + " id: " + rinfo.getRegionId() +
4142              " encoded_name: " + rinfo.getEncodedName() + " start: " +
4143              Bytes.toStringBinary(rinfo.getStartKey()) + " end: " +
4144              Bytes.toStringBinary(rinfo.getEndKey()));
4145          }
4146        }
4147
4148        // check to see if the existence of this region matches the region in META
4149        for (RegionInfo r : regions) {
4150          HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
4151          hbi.addServer(r, rsinfo);
4152        }
4153      } catch (IOException e) { // unable to connect to the region server.
4154        errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE,
4155          "RegionServer: " + rsinfo.getServerName() + " Unable to fetch region information. " + e);
4156        throw e;
4157      }
4158      return null;
4159    }
4160
4161    private List<RegionInfo> filterRegions(List<RegionInfo> regions) {
4162      List<RegionInfo> ret = Lists.newArrayList();
4163      for (RegionInfo hri : regions) {
4164        if (hri.isMetaRegion() || (!hbck.checkMetaOnly
4165            && hbck.isTableIncluded(hri.getTable()))) {
4166          ret.add(hri);
4167        }
4168      }
4169      return ret;
4170    }
4171  }
4172
4173  /**
4174   * Contact hdfs and get all information about specified table directory into
4175   * regioninfo list.
4176   */
4177  class WorkItemHdfsDir implements Callable<Void> {
4178    private FileStatus tableDir;
4179    private ErrorReporter errors;
4180    private FileSystem fs;
4181
4182    WorkItemHdfsDir(FileSystem fs, ErrorReporter errors,
4183                    FileStatus status) {
4184      this.fs = fs;
4185      this.tableDir = status;
4186      this.errors = errors;
4187    }
4188
4189    @Override
4190    public synchronized Void call() throws InterruptedException, ExecutionException {
4191      final Vector<Exception> exceptions = new Vector<>();
4192
4193      try {
4194        final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
4195        final List<Future<?>> futures = new ArrayList<>(regionDirs.length);
4196
4197        for (final FileStatus regionDir : regionDirs) {
4198          errors.progress();
4199          final String encodedName = regionDir.getPath().getName();
4200          // ignore directories that aren't hexadecimal
4201          if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
4202            continue;
4203          }
4204
4205          if (!exceptions.isEmpty()) {
4206            break;
4207          }
4208
4209          futures.add(executor.submit(new Runnable() {
4210            @Override
4211            public void run() {
4212              try {
4213                LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
4214
4215                Path regioninfoFile = new Path(regionDir.getPath(), HRegionFileSystem.REGION_INFO_FILE);
4216                boolean regioninfoFileExists = fs.exists(regioninfoFile);
4217
4218                if (!regioninfoFileExists) {
4219                  // As tables become larger it is more and more likely that by the time you
4220                  // reach a given region that it will be gone due to region splits/merges.
4221                  if (!fs.exists(regionDir.getPath())) {
4222                    LOG.warn("By the time we tried to process this region dir it was already gone: "
4223                        + regionDir.getPath());
4224                    return;
4225                  }
4226                }
4227
4228                HbckInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
4229                HdfsEntry he = new HdfsEntry();
4230                synchronized (hbi) {
4231                  if (hbi.getHdfsRegionDir() != null) {
4232                    errors.print("Directory " + encodedName + " duplicate??" +
4233                                 hbi.getHdfsRegionDir());
4234                  }
4235
4236                  he.hdfsRegionDir = regionDir.getPath();
4237                  he.hdfsRegionDirModTime = regionDir.getModificationTime();
4238                  he.hdfsRegioninfoFilePresent = regioninfoFileExists;
4239                  // we add to orphan list when we attempt to read .regioninfo
4240
4241                  // Set a flag if this region contains only edits
4242                  // This is special case if a region is left after split
4243                  he.hdfsOnlyEdits = true;
4244                  FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
4245                  Path ePath = WALSplitUtil.getRegionDirRecoveredEditsDir(regionDir.getPath());
4246                  for (FileStatus subDir : subDirs) {
4247                    errors.progress();
4248                    String sdName = subDir.getPath().getName();
4249                    if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
4250                      he.hdfsOnlyEdits = false;
4251                      break;
4252                    }
4253                  }
4254                  hbi.hdfsEntry = he;
4255                }
4256              } catch (Exception e) {
4257                LOG.error("Could not load region dir", e);
4258                exceptions.add(e);
4259              }
4260            }
4261          }));
4262        }
4263
4264        // Ensure all pending tasks are complete (or that we run into an exception)
4265        for (Future<?> f : futures) {
4266          if (!exceptions.isEmpty()) {
4267            break;
4268          }
4269          try {
4270            f.get();
4271          } catch (ExecutionException e) {
4272            LOG.error("Unexpected exec exception!  Should've been caught already.  (Bug?)", e);
4273            // Shouldn't happen, we already logged/caught any exceptions in the Runnable
4274          }
4275        }
4276      } catch (IOException e) {
4277        LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
4278        exceptions.add(e);
4279      } finally {
4280        if (!exceptions.isEmpty()) {
4281          errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
4282              + tableDir.getPath().getName()
4283              + " Unable to fetch all HDFS region information. ");
4284          // Just throw the first exception as an indication something bad happened
4285          // Don't need to propagate all the exceptions, we already logged them all anyway
4286          throw new ExecutionException("First exception in WorkItemHdfsDir", exceptions.firstElement());
4287        }
4288      }
4289      return null;
4290    }
4291  }
4292
4293  /**
4294   * Contact hdfs and get all information about specified table directory into
4295   * regioninfo list.
4296   */
4297  static class WorkItemHdfsRegionInfo implements Callable<Void> {
4298    private HbckInfo hbi;
4299    private HBaseFsck hbck;
4300    private ErrorReporter errors;
4301
4302    WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
4303      this.hbi = hbi;
4304      this.hbck = hbck;
4305      this.errors = errors;
4306    }
4307
4308    @Override
4309    public synchronized Void call() throws IOException {
4310      // only load entries that haven't been loaded yet.
4311      if (hbi.getHdfsHRI() == null) {
4312        try {
4313          errors.progress();
4314          hbck.loadHdfsRegioninfo(hbi);
4315        } catch (IOException ioe) {
4316          String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
4317              + hbi.getTableName() + " in hdfs dir "
4318              + hbi.getHdfsRegionDir()
4319              + "!  It may be an invalid format or version file.  Treating as "
4320              + "an orphaned regiondir.";
4321          errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
4322          try {
4323            hbck.debugLsr(hbi.getHdfsRegionDir());
4324          } catch (IOException ioe2) {
4325            LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
4326            throw ioe2;
4327          }
4328          hbck.orphanHdfsDirs.add(hbi);
4329          throw ioe;
4330        }
4331      }
4332      return null;
4333    }
4334  }
4335
4336  /**
4337   * Display the full report from fsck. This displays all live and dead region
4338   * servers, and all known regions.
4339   */
4340  public static void setDisplayFullReport() {
4341    details = true;
4342  }
4343
4344  /**
4345   * Set exclusive mode.
4346   */
4347  public static void setForceExclusive() {
4348    forceExclusive = true;
4349  }
4350
4351  /**
4352   * Only one instance of hbck can modify HBase at a time.
4353   */
4354  public boolean isExclusive() {
4355    return fixAny || forceExclusive;
4356  }
4357
4358  /**
4359   * Set summary mode.
4360   * Print only summary of the tables and status (OK or INCONSISTENT)
4361   */
4362  static void setSummary() {
4363    summary = true;
4364  }
4365
4366  /**
4367   * Set hbase:meta check mode.
4368   * Print only info about hbase:meta table deployment/state
4369   */
4370  void setCheckMetaOnly() {
4371    checkMetaOnly = true;
4372  }
4373
4374  /**
4375   * Set region boundaries check mode.
4376   */
4377  void setRegionBoundariesCheck() {
4378    checkRegionBoundaries = true;
4379  }
4380
4381  /**
4382   * Set replication fix mode.
4383   */
4384  public void setFixReplication(boolean shouldFix) {
4385    fixReplication = shouldFix;
4386    fixAny |= shouldFix;
4387  }
4388
4389  public void setCleanReplicationBarrier(boolean shouldClean) {
4390    cleanReplicationBarrier = shouldClean;
4391  }
4392
4393  /**
4394   * Check if we should rerun fsck again. This checks if we've tried to
4395   * fix something and we should rerun fsck tool again.
4396   * Display the full report from fsck. This displays all live and dead
4397   * region servers, and all known regions.
4398   */
4399  void setShouldRerun() {
4400    rerun = true;
4401  }
4402
4403  public boolean shouldRerun() {
4404    return rerun;
4405  }
4406
4407  /**
4408   * Fix inconsistencies found by fsck. This should try to fix errors (if any)
4409   * found by fsck utility.
4410   */
4411  public void setFixAssignments(boolean shouldFix) {
4412    fixAssignments = shouldFix;
4413    fixAny |= shouldFix;
4414  }
4415
4416  boolean shouldFixAssignments() {
4417    return fixAssignments;
4418  }
4419
4420  public void setFixMeta(boolean shouldFix) {
4421    fixMeta = shouldFix;
4422    fixAny |= shouldFix;
4423  }
4424
4425  boolean shouldFixMeta() {
4426    return fixMeta;
4427  }
4428
4429  public void setFixEmptyMetaCells(boolean shouldFix) {
4430    fixEmptyMetaCells = shouldFix;
4431    fixAny |= shouldFix;
4432  }
4433
4434  boolean shouldFixEmptyMetaCells() {
4435    return fixEmptyMetaCells;
4436  }
4437
4438  public void setCheckHdfs(boolean checking) {
4439    checkHdfs = checking;
4440  }
4441
4442  boolean shouldCheckHdfs() {
4443    return checkHdfs;
4444  }
4445
4446  public void setFixHdfsHoles(boolean shouldFix) {
4447    fixHdfsHoles = shouldFix;
4448    fixAny |= shouldFix;
4449  }
4450
4451  boolean shouldFixHdfsHoles() {
4452    return fixHdfsHoles;
4453  }
4454
4455  public void setFixTableOrphans(boolean shouldFix) {
4456    fixTableOrphans = shouldFix;
4457    fixAny |= shouldFix;
4458  }
4459
4460  boolean shouldFixTableOrphans() {
4461    return fixTableOrphans;
4462  }
4463
4464  public void setFixHdfsOverlaps(boolean shouldFix) {
4465    fixHdfsOverlaps = shouldFix;
4466    fixAny |= shouldFix;
4467  }
4468
4469  boolean shouldFixHdfsOverlaps() {
4470    return fixHdfsOverlaps;
4471  }
4472
4473  public void setFixHdfsOrphans(boolean shouldFix) {
4474    fixHdfsOrphans = shouldFix;
4475    fixAny |= shouldFix;
4476  }
4477
4478  boolean shouldFixHdfsOrphans() {
4479    return fixHdfsOrphans;
4480  }
4481
4482  public void setFixVersionFile(boolean shouldFix) {
4483    fixVersionFile = shouldFix;
4484    fixAny |= shouldFix;
4485  }
4486
4487  public boolean shouldFixVersionFile() {
4488    return fixVersionFile;
4489  }
4490
4491  public void setSidelineBigOverlaps(boolean sbo) {
4492    this.sidelineBigOverlaps = sbo;
4493  }
4494
4495  public boolean shouldSidelineBigOverlaps() {
4496    return sidelineBigOverlaps;
4497  }
4498
4499  public void setFixSplitParents(boolean shouldFix) {
4500    fixSplitParents = shouldFix;
4501    fixAny |= shouldFix;
4502  }
4503
4504  public void setRemoveParents(boolean shouldFix) {
4505    removeParents = shouldFix;
4506    fixAny |= shouldFix;
4507  }
4508
4509  boolean shouldFixSplitParents() {
4510    return fixSplitParents;
4511  }
4512
4513  boolean shouldRemoveParents() {
4514    return removeParents;
4515  }
4516
4517  public void setFixReferenceFiles(boolean shouldFix) {
4518    fixReferenceFiles = shouldFix;
4519    fixAny |= shouldFix;
4520  }
4521
4522  boolean shouldFixReferenceFiles() {
4523    return fixReferenceFiles;
4524  }
4525
4526  public void setFixHFileLinks(boolean shouldFix) {
4527    fixHFileLinks = shouldFix;
4528    fixAny |= shouldFix;
4529  }
4530
4531  boolean shouldFixHFileLinks() {
4532    return fixHFileLinks;
4533  }
4534
4535  public boolean shouldIgnorePreCheckPermission() {
4536    return !fixAny || ignorePreCheckPermission;
4537  }
4538
4539  public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
4540    this.ignorePreCheckPermission = ignorePreCheckPermission;
4541  }
4542
4543  /**
4544   * @param mm maximum number of regions to merge into a single region.
4545   */
4546  public void setMaxMerge(int mm) {
4547    this.maxMerge = mm;
4548  }
4549
4550  public int getMaxMerge() {
4551    return maxMerge;
4552  }
4553
4554  public void setMaxOverlapsToSideline(int mo) {
4555    this.maxOverlapsToSideline = mo;
4556  }
4557
4558  public int getMaxOverlapsToSideline() {
4559    return maxOverlapsToSideline;
4560  }
4561
4562  /**
4563   * Only check/fix tables specified by the list,
4564   * Empty list means all tables are included.
4565   */
4566  boolean isTableIncluded(TableName table) {
4567    return (tablesIncluded.isEmpty()) || tablesIncluded.contains(table);
4568  }
4569
4570  public void includeTable(TableName table) {
4571    tablesIncluded.add(table);
4572  }
4573
4574  Set<TableName> getIncludedTables() {
4575    return new HashSet<>(tablesIncluded);
4576  }
4577
4578  /**
4579   * We are interested in only those tables that have not changed their state in
4580   * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
4581   * @param seconds - the time in seconds
4582   */
4583  public void setTimeLag(long seconds) {
4584    timelag = seconds * 1000; // convert to milliseconds
4585  }
4586
4587  /**
4588   *
4589   * @param sidelineDir - HDFS path to sideline data
4590   */
4591  public void setSidelineDir(String sidelineDir) {
4592    this.sidelineDir = new Path(sidelineDir);
4593  }
4594
4595  protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4596    return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4597  }
4598
4599  public HFileCorruptionChecker getHFilecorruptionChecker() {
4600    return hfcc;
4601  }
4602
4603  public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4604    this.hfcc = hfcc;
4605  }
4606
4607  public void setRetCode(int code) {
4608    this.retcode = code;
4609  }
4610
4611  public int getRetCode() {
4612    return retcode;
4613  }
4614
4615  protected HBaseFsck printUsageAndExit() {
4616    StringWriter sw = new StringWriter(2048);
4617    PrintWriter out = new PrintWriter(sw);
4618    out.println("");
4619    out.println("-----------------------------------------------------------------------");
4620    out.println("NOTE: As of HBase version 2.0, the hbck tool is significantly changed.");
4621    out.println("In general, all Read-Only options are supported and can be be used");
4622    out.println("safely. Most -fix/ -repair options are NOT supported. Please see usage");
4623    out.println("below for details on which options are not supported.");
4624    out.println("-----------------------------------------------------------------------");
4625    out.println("");
4626    out.println("Usage: fsck [opts] {only tables}");
4627    out.println(" where [opts] are:");
4628    out.println("   -help Display help options (this)");
4629    out.println("   -details Display full report of all regions.");
4630    out.println("   -timelag <timeInSeconds>  Process only regions that " +
4631                       " have not experienced any metadata updates in the last " +
4632                       " <timeInSeconds> seconds.");
4633    out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4634        " before checking if the fix worked if run with -fix");
4635    out.println("   -summary Print only summary of the tables and status.");
4636    out.println("   -metaonly Only check the state of the hbase:meta table.");
4637    out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4638    out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
4639    out.println("   -exclusive Abort if another hbck is exclusive or fixing.");
4640
4641    out.println("");
4642    out.println("  Datafile Repair options: (expert features, use with caution!)");
4643    out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
4644    out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
4645
4646    out.println("");
4647    out.println(" Replication options");
4648    out.println("   -fixReplication   Deletes replication queues for removed peers");
4649
4650    out.println("");
4651    out.println("  Metadata Repair options supported as of version 2.0: (expert features, use with caution!)");
4652    out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
4653    out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
4654    out.println("   -fixHFileLinks  Try to offline lingering HFileLinks");
4655    out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
4656        + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4657    out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
4658
4659    out.println("");
4660    out.println("NOTE: Following options are NOT supported as of HBase version 2.0+.");
4661    out.println("");
4662    out.println("  UNSUPPORTED Metadata Repair options: (expert features, use with caution!)");
4663    out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
4664    out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
4665    out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
4666    out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
4667    out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
4668    out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4669    out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
4670    out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4671    out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
4672    out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4673    out.println("   -fixSplitParents  Try to force offline split parents to be online.");
4674    out.println("   -removeParents    Try to offline and sideline lingering parents and keep daughter regions.");
4675    out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
4676        + " (empty REGIONINFO_QUALIFIER rows)");
4677
4678    out.println("");
4679    out.println("  UNSUPPORTED Metadata Repair shortcuts");
4680    out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4681        "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles" +
4682        "-fixHFileLinks");
4683    out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4684    out.println("");
4685    out.println(" Replication options");
4686    out.println("   -fixReplication   Deletes replication queues for removed peers");
4687    out.println("   -cleanReplicationBrarier [tableName] clean the replication barriers " +
4688        "of a specified table, tableName is required");
4689    out.flush();
4690    errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4691
4692    setRetCode(-2);
4693    return this;
4694  }
4695
4696  /**
4697   * Main program
4698   *
4699   * @param args
4700   * @throws Exception
4701   */
4702  public static void main(String[] args) throws Exception {
4703    // create a fsck object
4704    Configuration conf = HBaseConfiguration.create();
4705    Path hbasedir = FSUtils.getRootDir(conf);
4706    URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4707    FSUtils.setFsDefault(conf, new Path(defaultFs));
4708    int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4709    System.exit(ret);
4710  }
4711
4712  /**
4713   * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
4714   */
4715  static class HBaseFsckTool extends Configured implements Tool {
4716    HBaseFsckTool(Configuration conf) { super(conf); }
4717    @Override
4718    public int run(String[] args) throws Exception {
4719      HBaseFsck hbck = new HBaseFsck(getConf());
4720      hbck.exec(hbck.executor, args);
4721      hbck.close();
4722      return hbck.getRetCode();
4723    }
4724  }
4725
4726  public HBaseFsck exec(ExecutorService exec, String[] args)
4727      throws KeeperException, IOException, InterruptedException, ReplicationException {
4728    long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4729
4730    boolean checkCorruptHFiles = false;
4731    boolean sidelineCorruptHFiles = false;
4732
4733    // Process command-line args.
4734    for (int i = 0; i < args.length; i++) {
4735      String cmd = args[i];
4736      if (cmd.equals("-help") || cmd.equals("-h")) {
4737        return printUsageAndExit();
4738      } else if (cmd.equals("-details")) {
4739        setDisplayFullReport();
4740      } else if (cmd.equals("-exclusive")) {
4741        setForceExclusive();
4742      } else if (cmd.equals("-timelag")) {
4743        if (i == args.length - 1) {
4744          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4745          return printUsageAndExit();
4746        }
4747        try {
4748          long timelag = Long.parseLong(args[++i]);
4749          setTimeLag(timelag);
4750        } catch (NumberFormatException e) {
4751          errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4752          return printUsageAndExit();
4753        }
4754      } else if (cmd.equals("-sleepBeforeRerun")) {
4755        if (i == args.length - 1) {
4756          errors.reportError(ERROR_CODE.WRONG_USAGE,
4757            "HBaseFsck: -sleepBeforeRerun needs a value.");
4758          return printUsageAndExit();
4759        }
4760        try {
4761          sleepBeforeRerun = Long.parseLong(args[++i]);
4762        } catch (NumberFormatException e) {
4763          errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4764          return printUsageAndExit();
4765        }
4766      } else if (cmd.equals("-sidelineDir")) {
4767        if (i == args.length - 1) {
4768          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4769          return printUsageAndExit();
4770        }
4771        setSidelineDir(args[++i]);
4772      } else if (cmd.equals("-fix")) {
4773        errors.reportError(ERROR_CODE.WRONG_USAGE,
4774          "This option is deprecated, please use  -fixAssignments instead.");
4775        setFixAssignments(true);
4776      } else if (cmd.equals("-fixAssignments")) {
4777        setFixAssignments(true);
4778      } else if (cmd.equals("-fixMeta")) {
4779        setFixMeta(true);
4780      } else if (cmd.equals("-noHdfsChecking")) {
4781        setCheckHdfs(false);
4782      } else if (cmd.equals("-fixHdfsHoles")) {
4783        setFixHdfsHoles(true);
4784      } else if (cmd.equals("-fixHdfsOrphans")) {
4785        setFixHdfsOrphans(true);
4786      } else if (cmd.equals("-fixTableOrphans")) {
4787        setFixTableOrphans(true);
4788      } else if (cmd.equals("-fixHdfsOverlaps")) {
4789        setFixHdfsOverlaps(true);
4790      } else if (cmd.equals("-fixVersionFile")) {
4791        setFixVersionFile(true);
4792      } else if (cmd.equals("-sidelineBigOverlaps")) {
4793        setSidelineBigOverlaps(true);
4794      } else if (cmd.equals("-fixSplitParents")) {
4795        setFixSplitParents(true);
4796      } else if (cmd.equals("-removeParents")) {
4797        setRemoveParents(true);
4798      } else if (cmd.equals("-ignorePreCheckPermission")) {
4799        setIgnorePreCheckPermission(true);
4800      } else if (cmd.equals("-checkCorruptHFiles")) {
4801        checkCorruptHFiles = true;
4802      } else if (cmd.equals("-sidelineCorruptHFiles")) {
4803        sidelineCorruptHFiles = true;
4804      } else if (cmd.equals("-fixReferenceFiles")) {
4805        setFixReferenceFiles(true);
4806      } else if (cmd.equals("-fixHFileLinks")) {
4807        setFixHFileLinks(true);
4808      } else if (cmd.equals("-fixEmptyMetaCells")) {
4809        setFixEmptyMetaCells(true);
4810      } else if (cmd.equals("-repair")) {
4811        // this attempts to merge overlapping hdfs regions, needs testing
4812        // under load
4813        setFixHdfsHoles(true);
4814        setFixHdfsOrphans(true);
4815        setFixMeta(true);
4816        setFixAssignments(true);
4817        setFixHdfsOverlaps(true);
4818        setFixVersionFile(true);
4819        setSidelineBigOverlaps(true);
4820        setFixSplitParents(false);
4821        setCheckHdfs(true);
4822        setFixReferenceFiles(true);
4823        setFixHFileLinks(true);
4824      } else if (cmd.equals("-repairHoles")) {
4825        // this will make all missing hdfs regions available but may lose data
4826        setFixHdfsHoles(true);
4827        setFixHdfsOrphans(false);
4828        setFixMeta(true);
4829        setFixAssignments(true);
4830        setFixHdfsOverlaps(false);
4831        setSidelineBigOverlaps(false);
4832        setFixSplitParents(false);
4833        setCheckHdfs(true);
4834      } else if (cmd.equals("-maxOverlapsToSideline")) {
4835        if (i == args.length - 1) {
4836          errors.reportError(ERROR_CODE.WRONG_USAGE,
4837            "-maxOverlapsToSideline needs a numeric value argument.");
4838          return printUsageAndExit();
4839        }
4840        try {
4841          int maxOverlapsToSideline = Integer.parseInt(args[++i]);
4842          setMaxOverlapsToSideline(maxOverlapsToSideline);
4843        } catch (NumberFormatException e) {
4844          errors.reportError(ERROR_CODE.WRONG_USAGE,
4845            "-maxOverlapsToSideline needs a numeric value argument.");
4846          return printUsageAndExit();
4847        }
4848      } else if (cmd.equals("-maxMerge")) {
4849        if (i == args.length - 1) {
4850          errors.reportError(ERROR_CODE.WRONG_USAGE,
4851            "-maxMerge needs a numeric value argument.");
4852          return printUsageAndExit();
4853        }
4854        try {
4855          int maxMerge = Integer.parseInt(args[++i]);
4856          setMaxMerge(maxMerge);
4857        } catch (NumberFormatException e) {
4858          errors.reportError(ERROR_CODE.WRONG_USAGE,
4859            "-maxMerge needs a numeric value argument.");
4860          return printUsageAndExit();
4861        }
4862      } else if (cmd.equals("-summary")) {
4863        setSummary();
4864      } else if (cmd.equals("-metaonly")) {
4865        setCheckMetaOnly();
4866      } else if (cmd.equals("-boundaries")) {
4867        setRegionBoundariesCheck();
4868      } else if (cmd.equals("-fixReplication")) {
4869        setFixReplication(true);
4870      } else if (cmd.equals("-cleanReplicationBarrier")) {
4871        setCleanReplicationBarrier(true);
4872        if(args[++i].startsWith("-")){
4873          printUsageAndExit();
4874        }
4875        setCleanReplicationBarrierTable(args[i]);
4876      } else if (cmd.startsWith("-")) {
4877        errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
4878        return printUsageAndExit();
4879      } else {
4880        includeTable(TableName.valueOf(cmd));
4881        errors.print("Allow checking/fixes for table: " + cmd);
4882      }
4883    }
4884
4885    errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
4886
4887    // pre-check current user has FS write permission or not
4888    try {
4889      preCheckPermission();
4890    } catch (AccessDeniedException ace) {
4891      Runtime.getRuntime().exit(-1);
4892    } catch (IOException ioe) {
4893      Runtime.getRuntime().exit(-1);
4894    }
4895
4896    // do the real work of hbck
4897    connect();
4898
4899    // after connecting to server above, we have server version
4900    // check if unsupported option is specified based on server version
4901    if (!isOptionsSupported(args)) {
4902      return printUsageAndExit();
4903    }
4904
4905    try {
4906      // if corrupt file mode is on, first fix them since they may be opened later
4907      if (checkCorruptHFiles || sidelineCorruptHFiles) {
4908        LOG.info("Checking all hfiles for corruption");
4909        HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
4910        setHFileCorruptionChecker(hfcc); // so we can get result
4911        Collection<TableName> tables = getIncludedTables();
4912        Collection<Path> tableDirs = new ArrayList<>();
4913        Path rootdir = FSUtils.getRootDir(getConf());
4914        if (tables.size() > 0) {
4915          for (TableName t : tables) {
4916            tableDirs.add(FSUtils.getTableDir(rootdir, t));
4917          }
4918        } else {
4919          tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
4920        }
4921        hfcc.checkTables(tableDirs);
4922        hfcc.report(errors);
4923      }
4924
4925      // check and fix table integrity, region consistency.
4926      int code = onlineHbck();
4927      setRetCode(code);
4928      // If we have changed the HBase state it is better to run hbck again
4929      // to see if we haven't broken something else in the process.
4930      // We run it only once more because otherwise we can easily fall into
4931      // an infinite loop.
4932      if (shouldRerun()) {
4933        try {
4934          LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
4935          Thread.sleep(sleepBeforeRerun);
4936        } catch (InterruptedException ie) {
4937          LOG.warn("Interrupted while sleeping");
4938          return this;
4939        }
4940        // Just report
4941        setFixAssignments(false);
4942        setFixMeta(false);
4943        setFixHdfsHoles(false);
4944        setFixHdfsOverlaps(false);
4945        setFixVersionFile(false);
4946        setFixTableOrphans(false);
4947        errors.resetErrors();
4948        code = onlineHbck();
4949        setRetCode(code);
4950      }
4951    } finally {
4952      IOUtils.closeQuietly(this);
4953    }
4954    return this;
4955  }
4956
4957  private boolean isOptionsSupported(String[] args) {
4958    boolean result = true;
4959    String hbaseServerVersion = status.getHBaseVersion();
4960    if (VersionInfo.compareVersion("2.any.any", hbaseServerVersion) < 0) {
4961      // Process command-line args.
4962      for (String arg : args) {
4963        if (unsupportedOptionsInV2.contains(arg)) {
4964          errors.reportError(ERROR_CODE.UNSUPPORTED_OPTION,
4965              "option '" + arg + "' is not " + "supportted!");
4966          result = false;
4967          break;
4968        }
4969      }
4970    }
4971    return result;
4972  }
4973
4974  public void setCleanReplicationBarrierTable(String cleanReplicationBarrierTable) {
4975    this.cleanReplicationBarrierTable = TableName.valueOf(cleanReplicationBarrierTable);
4976  }
4977
4978  public void cleanReplicationBarrier() throws IOException {
4979    if (!cleanReplicationBarrier || cleanReplicationBarrierTable == null) {
4980      return;
4981    }
4982    if (cleanReplicationBarrierTable.isSystemTable()) {
4983      errors.reportError(ERROR_CODE.INVALID_TABLE,
4984        "invalid table: " + cleanReplicationBarrierTable);
4985      return;
4986    }
4987
4988    boolean isGlobalScope = false;
4989    try {
4990      isGlobalScope = admin.getDescriptor(cleanReplicationBarrierTable).hasGlobalReplicationScope();
4991    } catch (TableNotFoundException e) {
4992      LOG.info("we may need to clean some erroneous data due to bugs");
4993    }
4994
4995    if (isGlobalScope) {
4996      errors.reportError(ERROR_CODE.INVALID_TABLE,
4997        "table's replication scope is global: " + cleanReplicationBarrierTable);
4998      return;
4999    }
5000    List<byte[]> regionNames = new ArrayList<>();
5001    Scan barrierScan = new Scan();
5002    barrierScan.setCaching(100);
5003    barrierScan.addFamily(HConstants.REPLICATION_BARRIER_FAMILY);
5004    barrierScan
5005        .withStartRow(MetaTableAccessor.getTableStartRowForMeta(cleanReplicationBarrierTable,
5006          MetaTableAccessor.QueryType.REGION))
5007        .withStopRow(MetaTableAccessor.getTableStopRowForMeta(cleanReplicationBarrierTable,
5008          MetaTableAccessor.QueryType.REGION));
5009    Result result;
5010    try (ResultScanner scanner = meta.getScanner(barrierScan)) {
5011      while ((result = scanner.next()) != null) {
5012        regionNames.add(result.getRow());
5013      }
5014    }
5015    if (regionNames.size() <= 0) {
5016      errors.reportError(ERROR_CODE.INVALID_TABLE,
5017        "there is no barriers of this table: " + cleanReplicationBarrierTable);
5018      return;
5019    }
5020    ReplicationQueueStorage queueStorage =
5021        ReplicationStorageFactory.getReplicationQueueStorage(zkw, getConf());
5022    List<ReplicationPeerDescription> peerDescriptions = admin.listReplicationPeers();
5023    if (peerDescriptions != null && peerDescriptions.size() > 0) {
5024      List<String> peers = peerDescriptions.stream()
5025          .filter(peerConfig -> ReplicationUtils.contains(peerConfig.getPeerConfig(),
5026            cleanReplicationBarrierTable))
5027          .map(peerConfig -> peerConfig.getPeerId()).collect(Collectors.toList());
5028      try {
5029        List<String> batch = new ArrayList<>();
5030        for (String peer : peers) {
5031          for (byte[] regionName : regionNames) {
5032            batch.add(RegionInfo.encodeRegionName(regionName));
5033            if (batch.size() % 100 == 0) {
5034              queueStorage.removeLastSequenceIds(peer, batch);
5035              batch.clear();
5036            }
5037          }
5038          if (batch.size() > 0) {
5039            queueStorage.removeLastSequenceIds(peer, batch);
5040            batch.clear();
5041          }
5042        }
5043      } catch (ReplicationException re) {
5044        throw new IOException(re);
5045      }
5046    }
5047    for (byte[] regionName : regionNames) {
5048      meta.delete(new Delete(regionName).addFamily(HConstants.REPLICATION_BARRIER_FAMILY));
5049    }
5050    setShouldRerun();
5051  }
5052
5053  /**
5054   * ls -r for debugging purposes
5055   */
5056  void debugLsr(Path p) throws IOException {
5057    debugLsr(getConf(), p, errors);
5058  }
5059
5060  /**
5061   * ls -r for debugging purposes
5062   */
5063  public static void debugLsr(Configuration conf,
5064      Path p) throws IOException {
5065    debugLsr(conf, p, new PrintingErrorReporter());
5066  }
5067
5068  /**
5069   * ls -r for debugging purposes
5070   */
5071  public static void debugLsr(Configuration conf,
5072      Path p, ErrorReporter errors) throws IOException {
5073    if (!LOG.isDebugEnabled() || p == null) {
5074      return;
5075    }
5076    FileSystem fs = p.getFileSystem(conf);
5077
5078    if (!fs.exists(p)) {
5079      // nothing
5080      return;
5081    }
5082    errors.print(p.toString());
5083
5084    if (fs.isFile(p)) {
5085      return;
5086    }
5087
5088    if (fs.getFileStatus(p).isDirectory()) {
5089      FileStatus[] fss= fs.listStatus(p);
5090      for (FileStatus status : fss) {
5091        debugLsr(conf, status.getPath(), errors);
5092      }
5093    }
5094  }
5095}