001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.io.Closeable;
021import java.io.FileNotFoundException;
022import java.io.IOException;
023import java.io.InterruptedIOException;
024import java.io.PrintWriter;
025import java.io.StringWriter;
026import java.net.InetAddress;
027import java.net.URI;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Collection;
031import java.util.Collections;
032import java.util.Comparator;
033import java.util.EnumSet;
034import java.util.HashMap;
035import java.util.HashSet;
036import java.util.Iterator;
037import java.util.List;
038import java.util.Locale;
039import java.util.Map;
040import java.util.Map.Entry;
041import java.util.Objects;
042import java.util.Optional;
043import java.util.Set;
044import java.util.SortedMap;
045import java.util.SortedSet;
046import java.util.TreeMap;
047import java.util.TreeSet;
048import java.util.Vector;
049import java.util.concurrent.Callable;
050import java.util.concurrent.ConcurrentSkipListMap;
051import java.util.concurrent.ExecutionException;
052import java.util.concurrent.ExecutorService;
053import java.util.concurrent.Executors;
054import java.util.concurrent.Future;
055import java.util.concurrent.FutureTask;
056import java.util.concurrent.ScheduledThreadPoolExecutor;
057import java.util.concurrent.TimeUnit;
058import java.util.concurrent.TimeoutException;
059import java.util.concurrent.atomic.AtomicBoolean;
060import java.util.concurrent.atomic.AtomicInteger;
061import java.util.stream.Collectors;
062import org.apache.commons.io.IOUtils;
063import org.apache.commons.lang3.RandomStringUtils;
064import org.apache.commons.lang3.StringUtils;
065import org.apache.hadoop.conf.Configuration;
066import org.apache.hadoop.conf.Configured;
067import org.apache.hadoop.fs.FSDataOutputStream;
068import org.apache.hadoop.fs.FileStatus;
069import org.apache.hadoop.fs.FileSystem;
070import org.apache.hadoop.fs.Path;
071import org.apache.hadoop.fs.permission.FsAction;
072import org.apache.hadoop.fs.permission.FsPermission;
073import org.apache.hadoop.hbase.Abortable;
074import org.apache.hadoop.hbase.Cell;
075import org.apache.hadoop.hbase.CellUtil;
076import org.apache.hadoop.hbase.ClusterMetrics;
077import org.apache.hadoop.hbase.ClusterMetrics.Option;
078import org.apache.hadoop.hbase.HBaseConfiguration;
079import org.apache.hadoop.hbase.HBaseInterfaceAudience;
080import org.apache.hadoop.hbase.HConstants;
081import org.apache.hadoop.hbase.HRegionInfo;
082import org.apache.hadoop.hbase.HRegionLocation;
083import org.apache.hadoop.hbase.KeyValue;
084import org.apache.hadoop.hbase.MasterNotRunningException;
085import org.apache.hadoop.hbase.MetaTableAccessor;
086import org.apache.hadoop.hbase.RegionLocations;
087import org.apache.hadoop.hbase.ServerName;
088import org.apache.hadoop.hbase.TableName;
089import org.apache.hadoop.hbase.TableNotFoundException;
090import org.apache.hadoop.hbase.ZooKeeperConnectionException;
091import org.apache.hadoop.hbase.client.Admin;
092import org.apache.hadoop.hbase.client.ClusterConnection;
093import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
094import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
095import org.apache.hadoop.hbase.client.Connection;
096import org.apache.hadoop.hbase.client.ConnectionFactory;
097import org.apache.hadoop.hbase.client.Delete;
098import org.apache.hadoop.hbase.client.Get;
099import org.apache.hadoop.hbase.client.Put;
100import org.apache.hadoop.hbase.client.RegionInfo;
101import org.apache.hadoop.hbase.client.RegionInfoBuilder;
102import org.apache.hadoop.hbase.client.RegionReplicaUtil;
103import org.apache.hadoop.hbase.client.Result;
104import org.apache.hadoop.hbase.client.ResultScanner;
105import org.apache.hadoop.hbase.client.RowMutations;
106import org.apache.hadoop.hbase.client.Scan;
107import org.apache.hadoop.hbase.client.Table;
108import org.apache.hadoop.hbase.client.TableDescriptor;
109import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
110import org.apache.hadoop.hbase.client.TableState;
111import org.apache.hadoop.hbase.io.FileLink;
112import org.apache.hadoop.hbase.io.HFileLink;
113import org.apache.hadoop.hbase.io.hfile.CacheConfig;
114import org.apache.hadoop.hbase.io.hfile.HFile;
115import org.apache.hadoop.hbase.log.HBaseMarkers;
116import org.apache.hadoop.hbase.master.MasterFileSystem;
117import org.apache.hadoop.hbase.master.RegionState;
118import org.apache.hadoop.hbase.regionserver.HRegion;
119import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
120import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
121import org.apache.hadoop.hbase.replication.ReplicationException;
122import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
123import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
124import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
125import org.apache.hadoop.hbase.replication.ReplicationUtils;
126import org.apache.hadoop.hbase.security.AccessDeniedException;
127import org.apache.hadoop.hbase.security.UserProvider;
128import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
129import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
130import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
131import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
132import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
133import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
134import org.apache.hadoop.hbase.wal.WAL;
135import org.apache.hadoop.hbase.wal.WALFactory;
136import org.apache.hadoop.hbase.wal.WALSplitter;
137import org.apache.hadoop.hbase.zookeeper.ZKUtil;
138import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
139import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
140import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
141import org.apache.hadoop.ipc.RemoteException;
142import org.apache.hadoop.security.UserGroupInformation;
143import org.apache.hadoop.util.ReflectionUtils;
144import org.apache.hadoop.util.Tool;
145import org.apache.hadoop.util.ToolRunner;
146import org.apache.yetus.audience.InterfaceAudience;
147import org.apache.yetus.audience.InterfaceStability;
148import org.apache.zookeeper.KeeperException;
149import org.slf4j.Logger;
150import org.slf4j.LoggerFactory;
151
152import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
153import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
154import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
155import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList;
156import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
157import org.apache.hbase.thirdparty.com.google.common.collect.Multimap;
158import org.apache.hbase.thirdparty.com.google.common.collect.Ordering;
159import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
160import org.apache.hbase.thirdparty.com.google.common.collect.TreeMultimap;
161
162import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
163import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
164
165/**
166 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
167 * table integrity problems in a corrupted HBase. This tool was written for hbase-1.x. It does not
168 * work with hbase-2.x; it can read state but is not allowed to change state; i.e. effect 'repair'.
169 * See hbck2 (HBASE-19121) for a hbck tool for hbase2.
170 *
171 * <p>
172 * Region consistency checks verify that hbase:meta, region deployment on region
173 * servers and the state of data in HDFS (.regioninfo files) all are in
174 * accordance.
175 * <p>
176 * Table integrity checks verify that all possible row keys resolve to exactly
177 * one region of a table.  This means there are no individual degenerate
178 * or backwards regions; no holes between regions; and that there are no
179 * overlapping regions.
180 * <p>
181 * The general repair strategy works in two phases:
182 * <ol>
183 * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
184 * <li> Repair Region Consistency with hbase:meta and assignments
185 * </ol>
186 * <p>
187 * For table integrity repairs, the tables' region directories are scanned
188 * for .regioninfo files.  Each table's integrity is then verified.  If there
189 * are any orphan regions (regions with no .regioninfo files) or holes, new
190 * regions are fabricated.  Backwards regions are sidelined as well as empty
191 * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
192 * a new region is created and all data is merged into the new region.
193 * <p>
194 * Table integrity repairs deal solely with HDFS and could potentially be done
195 * offline -- the hbase region servers or master do not need to be running.
196 * This phase can eventually be used to completely reconstruct the hbase:meta table in
197 * an offline fashion.
198 * <p>
199 * Region consistency requires three conditions -- 1) valid .regioninfo file
200 * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
201 * and 3) a region is deployed only at the regionserver that was assigned to
202 * with proper state in the master.
203 * <p>
204 * Region consistency repairs require hbase to be online so that hbck can
205 * contact the HBase master and region servers.  The hbck#connect() method must
206 * first be called successfully.  Much of the region consistency information
207 * is transient and less risky to repair.
208 * <p>
209 * If hbck is run from the command line, there are a handful of arguments that
210 * can be used to limit the kinds of repairs hbck will do.  See the code in
211 * {@link #printUsageAndExit()} for more details.
212 */
213@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
214@InterfaceStability.Evolving
215public class HBaseFsck extends Configured implements Closeable {
216  public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
217  public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
218  private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
219  private static boolean rsSupportsOffline = true;
220  private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
221  private static final int DEFAULT_MAX_MERGE = 5;
222  private static final String TO_BE_LOADED = "to_be_loaded";
223  /**
224   * Here is where hbase-1.x used to default the lock for hbck1.
225   * It puts in place a lock when it goes to write/make changes.
226   */
227  @VisibleForTesting
228  public static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
229  private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
230  private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
231  private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
232  // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
233  // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
234  // AlreadyBeingCreatedException which is implies timeout on this operations up to
235  // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
236  private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
237  private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
238  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
239  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
240
241  /**********************
242   * Internal resources
243   **********************/
244  private static final Logger LOG = LoggerFactory.getLogger(HBaseFsck.class.getName());
245  private ClusterMetrics status;
246  private ClusterConnection connection;
247  private Admin admin;
248  private Table meta;
249  // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
250  protected ExecutorService executor;
251  private long startMillis = EnvironmentEdgeManager.currentTime();
252  private HFileCorruptionChecker hfcc;
253  private int retcode = 0;
254  private Path HBCK_LOCK_PATH;
255  private FSDataOutputStream hbckOutFd;
256  // This lock is to prevent cleanup of balancer resources twice between
257  // ShutdownHook and the main code. We cleanup only if the connect() is
258  // successful
259  private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
260
261  // Unsupported options in HBase 2.0+
262  private static final Set<String> unsupportedOptionsInV2 = Sets.newHashSet("-fix",
263      "-fixAssignments", "-fixMeta", "-fixHdfsHoles", "-fixHdfsOrphans", "-fixTableOrphans",
264      "-fixHdfsOverlaps", "-sidelineBigOverlaps", "-fixSplitParents", "-removeParents",
265      "-fixEmptyMetaCells", "-repair", "-repairHoles", "-maxOverlapsToSideline", "-maxMerge");
266
267  /***********
268   * Options
269   ***********/
270  private static boolean details = false; // do we display the full report
271  private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
272  private static boolean forceExclusive = false; // only this hbck can modify HBase
273  private boolean fixAssignments = false; // fix assignment errors?
274  private boolean fixMeta = false; // fix meta errors?
275  private boolean checkHdfs = true; // load and check fs consistency?
276  private boolean fixHdfsHoles = false; // fix fs holes?
277  private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
278  private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
279  private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
280  private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
281  private boolean fixSplitParents = false; // fix lingering split parents
282  private boolean removeParents = false; // remove split parents
283  private boolean fixReferenceFiles = false; // fix lingering reference store file
284  private boolean fixHFileLinks = false; // fix lingering HFileLinks
285  private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
286  private boolean fixReplication = false; // fix undeleted replication queues for removed peer
287  private boolean cleanReplicationBarrier = false; // clean replication barriers of a table
288  private boolean fixAny = false; // Set to true if any of the fix is required.
289
290  // limit checking/fixes to listed tables, if empty attempt to check/fix all
291  // hbase:meta are always checked
292  private Set<TableName> tablesIncluded = new HashSet<>();
293  private TableName cleanReplicationBarrierTable;
294  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
295  // maximum number of overlapping regions to sideline
296  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
297  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
298  private Path sidelineDir = null;
299
300  private boolean rerun = false; // if we tried to fix something, rerun hbck
301  private static boolean summary = false; // if we want to print less output
302  private boolean checkMetaOnly = false;
303  private boolean checkRegionBoundaries = false;
304  private boolean ignorePreCheckPermission = false; // if pre-check permission
305
306  /*********
307   * State
308   *********/
309  final private ErrorReporter errors;
310  int fixes = 0;
311
312  /**
313   * This map contains the state of all hbck items.  It maps from encoded region
314   * name to HbckInfo structure.  The information contained in HbckInfo is used
315   * to detect and correct consistency (hdfs/meta/deployment) problems.
316   */
317  private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<>();
318  // Empty regioninfo qualifiers in hbase:meta
319  private Set<Result> emptyRegionInfoQualifiers = new HashSet<>();
320
321  /**
322   * This map from Tablename -> TableInfo contains the structures necessary to
323   * detect table consistency problems (holes, dupes, overlaps).  It is sorted
324   * to prevent dupes.
325   *
326   * If tablesIncluded is empty, this map contains all tables.
327   * Otherwise, it contains only meta tables and tables in tablesIncluded,
328   * unless checkMetaOnly is specified, in which case, it contains only
329   * the meta table
330   */
331  private SortedMap<TableName, TableInfo> tablesInfo = new ConcurrentSkipListMap<>();
332
333  /**
334   * When initially looking at HDFS, we attempt to find any orphaned data.
335   */
336  private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
337
338  private Map<TableName, Set<String>> orphanTableDirs = new HashMap<>();
339  private Map<TableName, TableState> tableStates = new HashMap<>();
340  private final RetryCounterFactory lockFileRetryCounterFactory;
341  private final RetryCounterFactory createZNodeRetryCounterFactory;
342
343  private Map<TableName, Set<String>> skippedRegions = new HashMap<>();
344
345  private ZKWatcher zkw = null;
346  private String hbckEphemeralNodePath = null;
347  private boolean hbckZodeCreated = false;
348
349  /**
350   * Constructor
351   *
352   * @param conf Configuration object
353   * @throws MasterNotRunningException if the master is not running
354   * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
355   */
356  public HBaseFsck(Configuration conf) throws IOException, ClassNotFoundException {
357    this(conf, createThreadPool(conf));
358  }
359
360  private static ExecutorService createThreadPool(Configuration conf) {
361    int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
362    return new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
363  }
364
365  /**
366   * Constructor
367   *
368   * @param conf
369   *          Configuration object
370   * @throws MasterNotRunningException
371   *           if the master is not running
372   * @throws ZooKeeperConnectionException
373   *           if unable to connect to ZooKeeper
374   */
375  public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
376      ZooKeeperConnectionException, IOException, ClassNotFoundException {
377    super(conf);
378    errors = getErrorReporter(getConf());
379    this.executor = exec;
380    lockFileRetryCounterFactory = createLockRetryCounterFactory(getConf());
381    createZNodeRetryCounterFactory = createZnodeRetryCounterFactory(getConf());
382    zkw = createZooKeeperWatcher();
383  }
384
385  /**
386   * @return A retry counter factory configured for retrying lock file creation.
387   */
388  public static RetryCounterFactory createLockRetryCounterFactory(Configuration conf) {
389    return new RetryCounterFactory(
390        conf.getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
391        conf.getInt("hbase.hbck.lockfile.attempt.sleep.interval",
392            DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
393        conf.getInt("hbase.hbck.lockfile.attempt.maxsleeptime",
394            DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
395  }
396
397  /**
398   * @return A retry counter factory configured for retrying znode creation.
399   */
400  private static RetryCounterFactory createZnodeRetryCounterFactory(Configuration conf) {
401    return new RetryCounterFactory(
402        conf.getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
403        conf.getInt("hbase.hbck.createznode.attempt.sleep.interval",
404            DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
405        conf.getInt("hbase.hbck.createznode.attempt.maxsleeptime",
406            DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
407  }
408
409  /**
410   * @return Return the tmp dir this tool writes too.
411   */
412  @VisibleForTesting
413  public static Path getTmpDir(Configuration conf) throws IOException {
414    return new Path(FSUtils.getRootDir(conf), HConstants.HBASE_TEMP_DIRECTORY);
415  }
416
417  private static class FileLockCallable implements Callable<FSDataOutputStream> {
418    RetryCounter retryCounter;
419    private final Configuration conf;
420    private Path hbckLockPath = null;
421
422    public FileLockCallable(Configuration conf, RetryCounter retryCounter) {
423      this.retryCounter = retryCounter;
424      this.conf = conf;
425    }
426
427    /**
428     * @return Will be <code>null</code> unless you call {@link #call()}
429     */
430    Path getHbckLockPath() {
431      return this.hbckLockPath;
432    }
433
434    @Override
435    public FSDataOutputStream call() throws IOException {
436      try {
437        FileSystem fs = FSUtils.getCurrentFileSystem(this.conf);
438        FsPermission defaultPerms = FSUtils.getFilePermissions(fs, this.conf,
439            HConstants.DATA_FILE_UMASK_KEY);
440        Path tmpDir = getTmpDir(conf);
441        this.hbckLockPath = new Path(tmpDir, HBCK_LOCK_FILE);
442        fs.mkdirs(tmpDir);
443        final FSDataOutputStream out = createFileWithRetries(fs, this.hbckLockPath, defaultPerms);
444        out.writeBytes(InetAddress.getLocalHost().toString());
445        // Add a note into the file we write on why hbase2 is writing out an hbck1 lock file.
446        out.writeBytes(" Written by an hbase-2.x Master to block an " +
447            "attempt by an hbase-1.x HBCK tool making modification to state. " +
448            "See 'HBCK must match HBase server version' in the hbase refguide.");
449        out.flush();
450        return out;
451      } catch(RemoteException e) {
452        if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
453          return null;
454        } else {
455          throw e;
456        }
457      }
458    }
459
460    private FSDataOutputStream createFileWithRetries(final FileSystem fs,
461        final Path hbckLockFilePath, final FsPermission defaultPerms)
462        throws IOException {
463      IOException exception = null;
464      do {
465        try {
466          return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
467        } catch (IOException ioe) {
468          LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
469              + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
470              + retryCounter.getMaxAttempts());
471          LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(),
472              ioe);
473          try {
474            exception = ioe;
475            retryCounter.sleepUntilNextRetry();
476          } catch (InterruptedException ie) {
477            throw (InterruptedIOException) new InterruptedIOException(
478                "Can't create lock file " + hbckLockFilePath.getName())
479            .initCause(ie);
480          }
481        }
482      } while (retryCounter.shouldRetry());
483
484      throw exception;
485    }
486  }
487
488  /**
489   * This method maintains a lock using a file. If the creation fails we return null
490   *
491   * @return FSDataOutputStream object corresponding to the newly opened lock file
492   * @throws IOException if IO failure occurs
493   */
494  public static Pair<Path, FSDataOutputStream> checkAndMarkRunningHbck(Configuration conf,
495      RetryCounter retryCounter) throws IOException {
496    FileLockCallable callable = new FileLockCallable(conf, retryCounter);
497    ExecutorService executor = Executors.newFixedThreadPool(1);
498    FutureTask<FSDataOutputStream> futureTask = new FutureTask<>(callable);
499    executor.execute(futureTask);
500    final int timeoutInSeconds = conf.getInt(
501      "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
502    FSDataOutputStream stream = null;
503    try {
504      stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
505    } catch (ExecutionException ee) {
506      LOG.warn("Encountered exception when opening lock file", ee);
507    } catch (InterruptedException ie) {
508      LOG.warn("Interrupted when opening lock file", ie);
509      Thread.currentThread().interrupt();
510    } catch (TimeoutException exception) {
511      // took too long to obtain lock
512      LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
513      futureTask.cancel(true);
514    } finally {
515      executor.shutdownNow();
516    }
517    return new Pair<Path, FSDataOutputStream>(callable.getHbckLockPath(), stream);
518  }
519
520  private void unlockHbck() {
521    if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) {
522      RetryCounter retryCounter = lockFileRetryCounterFactory.create();
523      do {
524        try {
525          IOUtils.closeQuietly(hbckOutFd);
526          FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
527          LOG.info("Finishing hbck");
528          return;
529        } catch (IOException ioe) {
530          LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
531              + (retryCounter.getAttemptTimes() + 1) + " of "
532              + retryCounter.getMaxAttempts());
533          LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
534          try {
535            retryCounter.sleepUntilNextRetry();
536          } catch (InterruptedException ie) {
537            Thread.currentThread().interrupt();
538            LOG.warn("Interrupted while deleting lock file" +
539                HBCK_LOCK_PATH);
540            return;
541          }
542        }
543      } while (retryCounter.shouldRetry());
544    }
545  }
546
547  /**
548   * To repair region consistency, one must call connect() in order to repair
549   * online state.
550   */
551  public void connect() throws IOException {
552
553    if (isExclusive()) {
554      // Grab the lock
555      Pair<Path, FSDataOutputStream> pair =
556          checkAndMarkRunningHbck(getConf(), this.lockFileRetryCounterFactory.create());
557      HBCK_LOCK_PATH = pair.getFirst();
558      this.hbckOutFd = pair.getSecond();
559      if (hbckOutFd == null) {
560        setRetCode(-1);
561        LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " +
562            "[If you are sure no other instance is running, delete the lock file " +
563            HBCK_LOCK_PATH + " and rerun the tool]");
564        throw new IOException("Duplicate hbck - Abort");
565      }
566
567      // Make sure to cleanup the lock
568      hbckLockCleanup.set(true);
569    }
570
571
572    // Add a shutdown hook to this thread, in case user tries to
573    // kill the hbck with a ctrl-c, we want to cleanup the lock so that
574    // it is available for further calls
575    Runtime.getRuntime().addShutdownHook(new Thread() {
576      @Override
577      public void run() {
578        IOUtils.closeQuietly(HBaseFsck.this);
579        cleanupHbckZnode();
580        unlockHbck();
581      }
582    });
583
584    LOG.info("Launching hbck");
585
586    connection = (ClusterConnection)ConnectionFactory.createConnection(getConf());
587    admin = connection.getAdmin();
588    meta = connection.getTable(TableName.META_TABLE_NAME);
589    status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS,
590      Option.DEAD_SERVERS, Option.MASTER, Option.BACKUP_MASTERS,
591      Option.REGIONS_IN_TRANSITION, Option.HBASE_VERSION));
592  }
593
594  /**
595   * Get deployed regions according to the region servers.
596   */
597  private void loadDeployedRegions() throws IOException, InterruptedException {
598    // From the master, get a list of all known live region servers
599    Collection<ServerName> regionServers = status.getLiveServerMetrics().keySet();
600    errors.print("Number of live region servers: " + regionServers.size());
601    if (details) {
602      for (ServerName rsinfo: regionServers) {
603        errors.print("  " + rsinfo.getServerName());
604      }
605    }
606
607    // From the master, get a list of all dead region servers
608    Collection<ServerName> deadRegionServers = status.getDeadServerNames();
609    errors.print("Number of dead region servers: " + deadRegionServers.size());
610    if (details) {
611      for (ServerName name: deadRegionServers) {
612        errors.print("  " + name);
613      }
614    }
615
616    // Print the current master name and state
617    errors.print("Master: " + status.getMasterName());
618
619    // Print the list of all backup masters
620    Collection<ServerName> backupMasters = status.getBackupMasterNames();
621    errors.print("Number of backup masters: " + backupMasters.size());
622    if (details) {
623      for (ServerName name: backupMasters) {
624        errors.print("  " + name);
625      }
626    }
627
628    errors.print("Average load: " + status.getAverageLoad());
629    errors.print("Number of requests: " + status.getRequestCount());
630    errors.print("Number of regions: " + status.getRegionCount());
631
632    List<RegionState> rits = status.getRegionStatesInTransition();
633    errors.print("Number of regions in transition: " + rits.size());
634    if (details) {
635      for (RegionState state: rits) {
636        errors.print("  " + state.toDescriptiveString());
637      }
638    }
639
640    // Determine what's deployed
641    processRegionServers(regionServers);
642  }
643
644  /**
645   * Clear the current state of hbck.
646   */
647  private void clearState() {
648    // Make sure regionInfo is empty before starting
649    fixes = 0;
650    regionInfoMap.clear();
651    emptyRegionInfoQualifiers.clear();
652    tableStates.clear();
653    errors.clear();
654    tablesInfo.clear();
655    orphanHdfsDirs.clear();
656    skippedRegions.clear();
657  }
658
659  /**
660   * This repair method analyzes hbase data in hdfs and repairs it to satisfy
661   * the table integrity rules.  HBase doesn't need to be online for this
662   * operation to work.
663   */
664  public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
665    // Initial pass to fix orphans.
666    if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
667        || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
668      LOG.info("Loading regioninfos HDFS");
669      // if nothing is happening this should always complete in two iterations.
670      int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
671      int curIter = 0;
672      do {
673        clearState(); // clears hbck state and reset fixes to 0 and.
674        // repair what's on HDFS
675        restoreHdfsIntegrity();
676        curIter++;// limit the number of iterations.
677      } while (fixes > 0 && curIter <= maxIterations);
678
679      // Repairs should be done in the first iteration and verification in the second.
680      // If there are more than 2 passes, something funny has happened.
681      if (curIter > 2) {
682        if (curIter == maxIterations) {
683          LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
684              + "Tables integrity may not be fully repaired!");
685        } else {
686          LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
687        }
688      }
689    }
690  }
691
692  /**
693   * This repair method requires the cluster to be online since it contacts
694   * region servers and the masters.  It makes each region's state in HDFS, in
695   * hbase:meta, and deployments consistent.
696   *
697   * @return If &gt; 0 , number of errors detected, if &lt; 0 there was an unrecoverable
698   *     error.  If 0, we have a clean hbase.
699   */
700  public int onlineConsistencyRepair() throws IOException, KeeperException,
701    InterruptedException {
702
703    // get regions according to what is online on each RegionServer
704    loadDeployedRegions();
705    // check whether hbase:meta is deployed and online
706    recordMetaRegion();
707    // Check if hbase:meta is found only once and in the right place
708    if (!checkMetaRegion()) {
709      String errorMsg = "hbase:meta table is not consistent. ";
710      if (shouldFixAssignments()) {
711        errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
712      } else {
713        errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
714      }
715      errors.reportError(errorMsg + " Exiting...");
716      return -2;
717    }
718    // Not going with further consistency check for tables when hbase:meta itself is not consistent.
719    LOG.info("Loading regionsinfo from the hbase:meta table");
720    boolean success = loadMetaEntries();
721    if (!success) return -1;
722
723    // Empty cells in hbase:meta?
724    reportEmptyMetaCells();
725
726    // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
727    if (shouldFixEmptyMetaCells()) {
728      fixEmptyMetaCells();
729    }
730
731    // get a list of all tables that have not changed recently.
732    if (!checkMetaOnly) {
733      reportTablesInFlux();
734    }
735
736    // Get disabled tables states
737    loadTableStates();
738
739    // load regiondirs and regioninfos from HDFS
740    if (shouldCheckHdfs()) {
741      LOG.info("Loading region directories from HDFS");
742      loadHdfsRegionDirs();
743      LOG.info("Loading region information from HDFS");
744      loadHdfsRegionInfos();
745    }
746
747    // fix the orphan tables
748    fixOrphanTables();
749
750    LOG.info("Checking and fixing region consistency");
751    // Check and fix consistency
752    checkAndFixConsistency();
753
754    // Check integrity (does not fix)
755    checkIntegrity();
756    return errors.getErrorList().size();
757  }
758
759  /**
760   * This method maintains an ephemeral znode. If the creation fails we return false or throw
761   * exception
762   *
763   * @return true if creating znode succeeds; false otherwise
764   * @throws IOException if IO failure occurs
765   */
766  private boolean setMasterInMaintenanceMode() throws IOException {
767    RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
768    hbckEphemeralNodePath = ZNodePaths.joinZNode(
769      zkw.getZNodePaths().masterMaintZNode,
770      "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
771    do {
772      try {
773        hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
774        if (hbckZodeCreated) {
775          break;
776        }
777      } catch (KeeperException e) {
778        if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
779           throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
780        }
781        // fall through and retry
782      }
783
784      LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try=" +
785          (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
786
787      try {
788        retryCounter.sleepUntilNextRetry();
789      } catch (InterruptedException ie) {
790        throw (InterruptedIOException) new InterruptedIOException(
791              "Can't create znode " + hbckEphemeralNodePath).initCause(ie);
792      }
793    } while (retryCounter.shouldRetry());
794    return hbckZodeCreated;
795  }
796
797  private void cleanupHbckZnode() {
798    try {
799      if (zkw != null && hbckZodeCreated) {
800        ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
801        hbckZodeCreated = false;
802      }
803    } catch (KeeperException e) {
804      // Ignore
805      if (!e.code().equals(KeeperException.Code.NONODE)) {
806        LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
807      }
808    }
809  }
810
811  /**
812   * Contacts the master and prints out cluster-wide information
813   * @return 0 on success, non-zero on failure
814   */
815  public int onlineHbck()
816      throws IOException, KeeperException, InterruptedException, ReplicationException {
817    // print hbase server version
818    errors.print("Version: " + status.getHBaseVersion());
819
820    // Clean start
821    clearState();
822    // Do offline check and repair first
823    offlineHdfsIntegrityRepair();
824    offlineReferenceFileRepair();
825    offlineHLinkFileRepair();
826    // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
827    // hbck, it is likely that hbck would be misled and report transient errors.  Therefore, it
828    // is better to set Master into maintenance mode during online hbck.
829    //
830    if (!setMasterInMaintenanceMode()) {
831      LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
832        + "error.  Please run HBCK multiple times to reduce the chance of transient error.");
833    }
834
835    onlineConsistencyRepair();
836
837    if (checkRegionBoundaries) {
838      checkRegionBoundaries();
839    }
840
841    checkAndFixReplication();
842
843    cleanReplicationBarrier();
844
845    // Remove the hbck znode
846    cleanupHbckZnode();
847
848    // Remove the hbck lock
849    unlockHbck();
850
851    // Print table summary
852    printTableSummary(tablesInfo);
853    return errors.summarize();
854  }
855
856  public static byte[] keyOnly (byte[] b) {
857    if (b == null)
858      return b;
859    int rowlength = Bytes.toShort(b, 0);
860    byte[] result = new byte[rowlength];
861    System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
862    return result;
863  }
864
865  @Override
866  public void close() throws IOException {
867    try {
868      cleanupHbckZnode();
869      unlockHbck();
870    } catch (Exception io) {
871      LOG.warn(io.toString(), io);
872    } finally {
873      if (zkw != null) {
874        zkw.close();
875        zkw = null;
876      }
877      IOUtils.closeQuietly(admin);
878      IOUtils.closeQuietly(meta);
879      IOUtils.closeQuietly(connection);
880    }
881  }
882
883  private static class RegionBoundariesInformation {
884    public byte [] regionName;
885    public byte [] metaFirstKey;
886    public byte [] metaLastKey;
887    public byte [] storesFirstKey;
888    public byte [] storesLastKey;
889    @Override
890    public String toString () {
891      return "regionName=" + Bytes.toStringBinary(regionName) +
892             "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
893             "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
894             "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
895             "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
896    }
897  }
898
899  public void checkRegionBoundaries() {
900    try {
901      ByteArrayComparator comparator = new ByteArrayComparator();
902      List<RegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
903      final RegionBoundariesInformation currentRegionBoundariesInformation =
904          new RegionBoundariesInformation();
905      Path hbaseRoot = FSUtils.getRootDir(getConf());
906      for (RegionInfo regionInfo : regions) {
907        Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
908        currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
909        // For each region, get the start and stop key from the META and compare them to the
910        // same information from the Stores.
911        Path path = new Path(tableDir, regionInfo.getEncodedName());
912        FileSystem fs = path.getFileSystem(getConf());
913        FileStatus[] files = fs.listStatus(path);
914        // For all the column families in this region...
915        byte[] storeFirstKey = null;
916        byte[] storeLastKey = null;
917        for (FileStatus file : files) {
918          String fileName = file.getPath().toString();
919          fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
920          if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
921            FileStatus[] storeFiles = fs.listStatus(file.getPath());
922            // For all the stores in this column family.
923            for (FileStatus storeFile : storeFiles) {
924              HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(),
925                CacheConfig.DISABLED, true, getConf());
926              if ((reader.getFirstKey() != null)
927                  && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
928                      ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey()) > 0))) {
929                storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey().get()).getKey();
930              }
931              if ((reader.getLastKey() != null)
932                  && ((storeLastKey == null) || (comparator.compare(storeLastKey,
933                      ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey())) < 0)) {
934                storeLastKey = ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey();
935              }
936              reader.close();
937            }
938          }
939        }
940        currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
941        currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
942        currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
943        currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
944        if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
945          currentRegionBoundariesInformation.metaFirstKey = null;
946        if (currentRegionBoundariesInformation.metaLastKey.length == 0)
947          currentRegionBoundariesInformation.metaLastKey = null;
948
949        // For a region to be correct, we need the META start key to be smaller or equal to the
950        // smallest start key from all the stores, and the start key from the next META entry to
951        // be bigger than the last key from all the current stores. First region start key is null;
952        // Last region end key is null; some regions can be empty and not have any store.
953
954        boolean valid = true;
955        // Checking start key.
956        if ((currentRegionBoundariesInformation.storesFirstKey != null)
957            && (currentRegionBoundariesInformation.metaFirstKey != null)) {
958          valid = valid
959              && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
960                currentRegionBoundariesInformation.metaFirstKey) >= 0;
961        }
962        // Checking stop key.
963        if ((currentRegionBoundariesInformation.storesLastKey != null)
964            && (currentRegionBoundariesInformation.metaLastKey != null)) {
965          valid = valid
966              && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
967                currentRegionBoundariesInformation.metaLastKey) < 0;
968        }
969        if (!valid) {
970          errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
971            tablesInfo.get(regionInfo.getTable()));
972          LOG.warn("Region's boundaries not aligned between stores and META for:");
973          LOG.warn(Objects.toString(currentRegionBoundariesInformation));
974        }
975      }
976    } catch (IOException e) {
977      LOG.error(e.toString(), e);
978    }
979  }
980
981  /**
982   * Iterates through the list of all orphan/invalid regiondirs.
983   */
984  private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
985    for (HbckInfo hi : orphanHdfsDirs) {
986      LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
987      adoptHdfsOrphan(hi);
988    }
989  }
990
991  /**
992   * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
993   * these orphans by creating a new region, and moving the column families,
994   * recovered edits, WALs, into the new region dir.  We determine the region
995   * startkey and endkeys by looking at all of the hfiles inside the column
996   * families to identify the min and max keys. The resulting region will
997   * likely violate table integrity but will be dealt with by merging
998   * overlapping regions.
999   */
1000  @SuppressWarnings("deprecation")
1001  private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
1002    Path p = hi.getHdfsRegionDir();
1003    FileSystem fs = p.getFileSystem(getConf());
1004    FileStatus[] dirs = fs.listStatus(p);
1005    if (dirs == null) {
1006      LOG.warn("Attempt to adopt orphan hdfs region skipped because no files present in " +
1007          p + ". This dir could probably be deleted.");
1008      return ;
1009    }
1010
1011    TableName tableName = hi.getTableName();
1012    TableInfo tableInfo = tablesInfo.get(tableName);
1013    Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
1014    TableDescriptor template = tableInfo.getHTD();
1015
1016    // find min and max key values
1017    Pair<byte[],byte[]> orphanRegionRange = null;
1018    for (FileStatus cf : dirs) {
1019      String cfName= cf.getPath().getName();
1020      // TODO Figure out what the special dirs are
1021      if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
1022
1023      FileStatus[] hfiles = fs.listStatus(cf.getPath());
1024      for (FileStatus hfile : hfiles) {
1025        byte[] start, end;
1026        HFile.Reader hf = null;
1027        try {
1028          hf = HFile.createReader(fs, hfile.getPath(), CacheConfig.DISABLED, true, getConf());
1029          hf.loadFileInfo();
1030          Optional<Cell> startKv = hf.getFirstKey();
1031          start = CellUtil.cloneRow(startKv.get());
1032          Optional<Cell> endKv = hf.getLastKey();
1033          end = CellUtil.cloneRow(endKv.get());
1034        } catch (IOException ioe) {
1035          LOG.warn("Problem reading orphan file " + hfile + ", skipping");
1036          continue;
1037        } catch (NullPointerException ioe) {
1038          LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
1039          continue;
1040        } finally {
1041          if (hf != null) {
1042            hf.close();
1043          }
1044        }
1045
1046        // expand the range to include the range of all hfiles
1047        if (orphanRegionRange == null) {
1048          // first range
1049          orphanRegionRange = new Pair<>(start, end);
1050        } else {
1051          // TODO add test
1052
1053          // expand range only if the hfile is wider.
1054          if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
1055            orphanRegionRange.setFirst(start);
1056          }
1057          if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
1058            orphanRegionRange.setSecond(end);
1059          }
1060        }
1061      }
1062    }
1063    if (orphanRegionRange == null) {
1064      LOG.warn("No data in dir " + p + ", sidelining data");
1065      fixes++;
1066      sidelineRegionDir(fs, hi);
1067      return;
1068    }
1069    LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
1070        Bytes.toString(orphanRegionRange.getSecond()) + ")");
1071
1072    // create new region on hdfs. move data into place.
1073    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(template.getTableName())
1074        .setStartKey(orphanRegionRange.getFirst())
1075        .setEndKey(Bytes.add(orphanRegionRange.getSecond(), new byte[1]))
1076        .build();
1077    LOG.info("Creating new region : " + regionInfo);
1078    HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), regionInfo, template);
1079    Path target = region.getRegionFileSystem().getRegionDir();
1080
1081    // rename all the data to new region
1082    mergeRegionDirs(target, hi);
1083    fixes++;
1084  }
1085
1086  /**
1087   * This method determines if there are table integrity errors in HDFS.  If
1088   * there are errors and the appropriate "fix" options are enabled, the method
1089   * will first correct orphan regions making them into legit regiondirs, and
1090   * then reload to merge potentially overlapping regions.
1091   *
1092   * @return number of table integrity errors found
1093   */
1094  private int restoreHdfsIntegrity() throws IOException, InterruptedException {
1095    // Determine what's on HDFS
1096    LOG.info("Loading HBase regioninfo from HDFS...");
1097    loadHdfsRegionDirs(); // populating regioninfo table.
1098
1099    int errs = errors.getErrorList().size();
1100    // First time just get suggestions.
1101    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1102    checkHdfsIntegrity(false, false);
1103
1104    if (errors.getErrorList().size() == errs) {
1105      LOG.info("No integrity errors.  We are done with this phase. Glorious.");
1106      return 0;
1107    }
1108
1109    if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
1110      adoptHdfsOrphans(orphanHdfsDirs);
1111      // TODO optimize by incrementally adding instead of reloading.
1112    }
1113
1114    // Make sure there are no holes now.
1115    if (shouldFixHdfsHoles()) {
1116      clearState(); // this also resets # fixes.
1117      loadHdfsRegionDirs();
1118      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1119      tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1120    }
1121
1122    // Now we fix overlaps
1123    if (shouldFixHdfsOverlaps()) {
1124      // second pass we fix overlaps.
1125      clearState(); // this also resets # fixes.
1126      loadHdfsRegionDirs();
1127      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1128      tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1129    }
1130
1131    return errors.getErrorList().size();
1132  }
1133
1134  /**
1135   * Scan all the store file names to find any lingering reference files,
1136   * which refer to some none-exiting files. If "fix" option is enabled,
1137   * any lingering reference file will be sidelined if found.
1138   * <p>
1139   * Lingering reference file prevents a region from opening. It has to
1140   * be fixed before a cluster can start properly.
1141   */
1142  private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1143    clearState();
1144    Configuration conf = getConf();
1145    Path hbaseRoot = FSUtils.getRootDir(conf);
1146    FileSystem fs = hbaseRoot.getFileSystem(conf);
1147    LOG.info("Computing mapping of all store files");
1148    Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1149      new FSUtils.ReferenceFileFilter(fs), executor, errors);
1150    errors.print("");
1151    LOG.info("Validating mapping using HDFS state");
1152    for (Path path: allFiles.values()) {
1153      Path referredToFile = StoreFileInfo.getReferredToFile(path);
1154      if (fs.exists(referredToFile)) continue;  // good, expected
1155
1156      // Found a lingering reference file
1157      errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1158        "Found lingering reference file " + path);
1159      if (!shouldFixReferenceFiles()) continue;
1160
1161      // Now, trying to fix it since requested
1162      boolean success = false;
1163      String pathStr = path.toString();
1164
1165      // A reference file path should be like
1166      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1167      // Up 5 directories to get the root folder.
1168      // So the file will be sidelined to a similar folder structure.
1169      int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1170      for (int i = 0; index > 0 && i < 5; i++) {
1171        index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1172      }
1173      if (index > 0) {
1174        Path rootDir = getSidelineDir();
1175        Path dst = new Path(rootDir, pathStr.substring(index + 1));
1176        fs.mkdirs(dst.getParent());
1177        LOG.info("Trying to sideline reference file "
1178          + path + " to " + dst);
1179        setShouldRerun();
1180
1181        success = fs.rename(path, dst);
1182        debugLsr(dst);
1183
1184      }
1185      if (!success) {
1186        LOG.error("Failed to sideline reference file " + path);
1187      }
1188    }
1189  }
1190
1191  /**
1192   * Scan all the store file names to find any lingering HFileLink files,
1193   * which refer to some none-exiting files. If "fix" option is enabled,
1194   * any lingering HFileLink file will be sidelined if found.
1195   */
1196  private void offlineHLinkFileRepair() throws IOException, InterruptedException {
1197    Configuration conf = getConf();
1198    Path hbaseRoot = FSUtils.getRootDir(conf);
1199    FileSystem fs = hbaseRoot.getFileSystem(conf);
1200    LOG.info("Computing mapping of all link files");
1201    Map<String, Path> allFiles = FSUtils
1202        .getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors);
1203    errors.print("");
1204
1205    LOG.info("Validating mapping using HDFS state");
1206    for (Path path : allFiles.values()) {
1207      // building HFileLink object to gather locations
1208      HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
1209      if (actualLink.exists(fs)) continue; // good, expected
1210
1211      // Found a lingering HFileLink
1212      errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
1213      if (!shouldFixHFileLinks()) continue;
1214
1215      // Now, trying to fix it since requested
1216      setShouldRerun();
1217
1218      // An HFileLink path should be like
1219      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
1220      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1221      boolean success = sidelineFile(fs, hbaseRoot, path);
1222
1223      if (!success) {
1224        LOG.error("Failed to sideline HFileLink file " + path);
1225      }
1226
1227      // An HFileLink backreference path should be like
1228      // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
1229      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1230      Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil
1231              .getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()),
1232                  HFileLink.getReferencedRegionName(path.getName().toString()),
1233                  path.getParent().getName()),
1234          HFileLink.getReferencedHFileName(path.getName().toString()));
1235      success = sidelineFile(fs, hbaseRoot, backRefPath);
1236
1237      if (!success) {
1238        LOG.error("Failed to sideline HFileLink backreference file " + path);
1239      }
1240    }
1241  }
1242
1243  private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
1244    URI uri = hbaseRoot.toUri().relativize(path.toUri());
1245    if (uri.isAbsolute()) return false;
1246    String relativePath = uri.getPath();
1247    Path rootDir = getSidelineDir();
1248    Path dst = new Path(rootDir, relativePath);
1249    boolean pathCreated = fs.mkdirs(dst.getParent());
1250    if (!pathCreated) {
1251      LOG.error("Failed to create path: " + dst.getParent());
1252      return false;
1253    }
1254    LOG.info("Trying to sideline file " + path + " to " + dst);
1255    return fs.rename(path, dst);
1256  }
1257
1258  /**
1259   * TODO -- need to add tests for this.
1260   */
1261  private void reportEmptyMetaCells() {
1262    errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1263      emptyRegionInfoQualifiers.size());
1264    if (details) {
1265      for (Result r: emptyRegionInfoQualifiers) {
1266        errors.print("  " + r);
1267      }
1268    }
1269  }
1270
1271  /**
1272   * TODO -- need to add tests for this.
1273   */
1274  private void reportTablesInFlux() {
1275    AtomicInteger numSkipped = new AtomicInteger(0);
1276    TableDescriptor[] allTables = getTables(numSkipped);
1277    errors.print("Number of Tables: " + allTables.length);
1278    if (details) {
1279      if (numSkipped.get() > 0) {
1280        errors.detail("Number of Tables in flux: " + numSkipped.get());
1281      }
1282      for (TableDescriptor td : allTables) {
1283        errors.detail("  Table: " + td.getTableName() + "\t" +
1284                           (td.isReadOnly() ? "ro" : "rw") + "\t" +
1285                            (td.isMetaRegion() ? "META" : "    ") + "\t" +
1286                           " families: " + td.getColumnFamilyCount());
1287      }
1288    }
1289  }
1290
1291  public ErrorReporter getErrors() {
1292    return errors;
1293  }
1294
1295  /**
1296   * Read the .regioninfo file from the file system.  If there is no
1297   * .regioninfo, add it to the orphan hdfs region list.
1298   */
1299  private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
1300    Path regionDir = hbi.getHdfsRegionDir();
1301    if (regionDir == null) {
1302      if (hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1303        // Log warning only for default/ primary replica with no region dir
1304        LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
1305      }
1306      return;
1307    }
1308
1309    if (hbi.hdfsEntry.hri != null) {
1310      // already loaded data
1311      return;
1312    }
1313
1314    FileSystem fs = FileSystem.get(getConf());
1315    RegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
1316    LOG.debug("RegionInfo read: " + hri.toString());
1317    hbi.hdfsEntry.hri = hri;
1318  }
1319
1320  /**
1321   * Exception thrown when a integrity repair operation fails in an
1322   * unresolvable way.
1323   */
1324  public static class RegionRepairException extends IOException {
1325    private static final long serialVersionUID = 1L;
1326    final IOException ioe;
1327    public RegionRepairException(String s, IOException ioe) {
1328      super(s);
1329      this.ioe = ioe;
1330    }
1331  }
1332
1333  /**
1334   * Populate hbi's from regionInfos loaded from file system.
1335   */
1336  private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1337      throws IOException, InterruptedException {
1338    tablesInfo.clear(); // regenerating the data
1339    // generate region split structure
1340    Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1341
1342    // Parallelized read of .regioninfo files.
1343    List<WorkItemHdfsRegionInfo> hbis = new ArrayList<>(hbckInfos.size());
1344    List<Future<Void>> hbiFutures;
1345
1346    for (HbckInfo hbi : hbckInfos) {
1347      WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1348      hbis.add(work);
1349    }
1350
1351    // Submit and wait for completion
1352    hbiFutures = executor.invokeAll(hbis);
1353
1354    for(int i=0; i<hbiFutures.size(); i++) {
1355      WorkItemHdfsRegionInfo work = hbis.get(i);
1356      Future<Void> f = hbiFutures.get(i);
1357      try {
1358        f.get();
1359      } catch(ExecutionException e) {
1360        LOG.warn("Failed to read .regioninfo file for region " +
1361              work.hbi.getRegionNameAsString(), e.getCause());
1362      }
1363    }
1364
1365    Path hbaseRoot = FSUtils.getRootDir(getConf());
1366    FileSystem fs = hbaseRoot.getFileSystem(getConf());
1367    // serialized table info gathering.
1368    for (HbckInfo hbi: hbckInfos) {
1369
1370      if (hbi.getHdfsHRI() == null) {
1371        // was an orphan
1372        continue;
1373      }
1374
1375
1376      // get table name from hdfs, populate various HBaseFsck tables.
1377      TableName tableName = hbi.getTableName();
1378      if (tableName == null) {
1379        // There was an entry in hbase:meta not in the HDFS?
1380        LOG.warn("tableName was null for: " + hbi);
1381        continue;
1382      }
1383
1384      TableInfo modTInfo = tablesInfo.get(tableName);
1385      if (modTInfo == null) {
1386        // only executed once per table.
1387        modTInfo = new TableInfo(tableName);
1388        tablesInfo.put(tableName, modTInfo);
1389        try {
1390          TableDescriptor htd =
1391              FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1392          modTInfo.htds.add(htd);
1393        } catch (IOException ioe) {
1394          if (!orphanTableDirs.containsKey(tableName)) {
1395            LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1396            //should only report once for each table
1397            errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1398                "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1399            Set<String> columns = new HashSet<>();
1400            orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1401          }
1402        }
1403      }
1404      if (!hbi.isSkipChecks()) {
1405        modTInfo.addRegionInfo(hbi);
1406      }
1407    }
1408
1409    loadTableInfosForTablesWithNoRegion();
1410    errors.print("");
1411
1412    return tablesInfo;
1413  }
1414
1415  /**
1416   * To get the column family list according to the column family dirs
1417   * @param columns
1418   * @param hbi
1419   * @return a set of column families
1420   * @throws IOException
1421   */
1422  private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1423    Path regionDir = hbi.getHdfsRegionDir();
1424    FileSystem fs = regionDir.getFileSystem(getConf());
1425    FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1426    for (FileStatus subdir : subDirs) {
1427      String columnfamily = subdir.getPath().getName();
1428      columns.add(columnfamily);
1429    }
1430    return columns;
1431  }
1432
1433  /**
1434   * To fabricate a .tableinfo file with following contents<br>
1435   * 1. the correct tablename <br>
1436   * 2. the correct colfamily list<br>
1437   * 3. the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1438   * @throws IOException
1439   */
1440  private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1441      Set<String> columns) throws IOException {
1442    if (columns ==null || columns.isEmpty()) return false;
1443    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
1444    for (String columnfamimly : columns) {
1445      builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(columnfamimly));
1446    }
1447    fstd.createTableDescriptor(builder.build(), true);
1448    return true;
1449  }
1450
1451  /**
1452   * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1453   * @throws IOException
1454   */
1455  public void fixEmptyMetaCells() throws IOException {
1456    if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1457      LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1458      for (Result region : emptyRegionInfoQualifiers) {
1459        deleteMetaRegion(region.getRow());
1460        errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1461      }
1462      emptyRegionInfoQualifiers.clear();
1463    }
1464  }
1465
1466  /**
1467   * To fix orphan table by creating a .tableinfo file under tableDir <br>
1468   * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1469   * 2. else create a default .tableinfo file with following items<br>
1470   * &nbsp;2.1 the correct tablename <br>
1471   * &nbsp;2.2 the correct colfamily list<br>
1472   * &nbsp;2.3 the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1473   * @throws IOException
1474   */
1475  public void fixOrphanTables() throws IOException {
1476    if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1477
1478      List<TableName> tmpList = new ArrayList<>(orphanTableDirs.keySet().size());
1479      tmpList.addAll(orphanTableDirs.keySet());
1480      TableDescriptor[] htds = getTableDescriptors(tmpList);
1481      Iterator<Entry<TableName, Set<String>>> iter =
1482          orphanTableDirs.entrySet().iterator();
1483      int j = 0;
1484      int numFailedCase = 0;
1485      FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1486      while (iter.hasNext()) {
1487        Entry<TableName, Set<String>> entry =
1488            iter.next();
1489        TableName tableName = entry.getKey();
1490        LOG.info("Trying to fix orphan table error: " + tableName);
1491        if (j < htds.length) {
1492          if (tableName.equals(htds[j].getTableName())) {
1493            TableDescriptor htd = htds[j];
1494            LOG.info("fixing orphan table: " + tableName + " from cache");
1495            fstd.createTableDescriptor(htd, true);
1496            j++;
1497            iter.remove();
1498          }
1499        } else {
1500          if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1501            LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1502            LOG.warn("Strongly recommend to modify the TableDescriptor if necessary for: " + tableName);
1503            iter.remove();
1504          } else {
1505            LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1506            numFailedCase++;
1507          }
1508        }
1509        fixes++;
1510      }
1511
1512      if (orphanTableDirs.isEmpty()) {
1513        // all orphanTableDirs are luckily recovered
1514        // re-run doFsck after recovering the .tableinfo file
1515        setShouldRerun();
1516        LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1517      } else if (numFailedCase > 0) {
1518        LOG.error("Failed to fix " + numFailedCase
1519            + " OrphanTables with default .tableinfo files");
1520      }
1521
1522    }
1523    //cleanup the list
1524    orphanTableDirs.clear();
1525
1526  }
1527
1528  /**
1529   * This borrows code from MasterFileSystem.bootstrap(). Explicitly creates it's own WAL, so be
1530   * sure to close it as well as the region when you're finished.
1531   * @param walFactoryID A unique identifier for WAL factory. Filesystem implementations will use
1532   *          this ID to make a directory inside WAL directory path.
1533   * @return an open hbase:meta HRegion
1534   */
1535  private HRegion createNewMeta(String walFactoryID) throws IOException {
1536    Path rootdir = FSUtils.getRootDir(getConf());
1537    Configuration c = getConf();
1538    RegionInfo metaHRI = RegionInfoBuilder.FIRST_META_REGIONINFO;
1539    TableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1540    MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1541    // The WAL subsystem will use the default rootDir rather than the passed in rootDir
1542    // unless I pass along via the conf.
1543    Configuration confForWAL = new Configuration(c);
1544    confForWAL.set(HConstants.HBASE_DIR, rootdir.toString());
1545    WAL wal = new WALFactory(confForWAL, walFactoryID).getWAL(metaHRI);
1546    HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor, wal);
1547    MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1548    return meta;
1549  }
1550
1551  /**
1552   * Generate set of puts to add to new meta.  This expects the tables to be
1553   * clean with no overlaps or holes.  If there are any problems it returns null.
1554   *
1555   * @return An array list of puts to do in bulk, null if tables have problems
1556   */
1557  private ArrayList<Put> generatePuts(SortedMap<TableName, TableInfo> tablesInfo)
1558      throws IOException {
1559    ArrayList<Put> puts = new ArrayList<>();
1560    boolean hasProblems = false;
1561    for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1562      TableName name = e.getKey();
1563
1564      // skip "hbase:meta"
1565      if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1566        continue;
1567      }
1568
1569      TableInfo ti = e.getValue();
1570      puts.add(MetaTableAccessor.makePutFromTableState(
1571        new TableState(ti.tableName, TableState.State.ENABLED),
1572        EnvironmentEdgeManager.currentTime()));
1573      for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1574          .entrySet()) {
1575        Collection<HbckInfo> his = spl.getValue();
1576        int sz = his.size();
1577        if (sz != 1) {
1578          // problem
1579          LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1580              + " had " +  sz + " regions instead of exactly 1." );
1581          hasProblems = true;
1582          continue;
1583        }
1584
1585        // add the row directly to meta.
1586        HbckInfo hi = his.iterator().next();
1587        RegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
1588        Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime());
1589        puts.add(p);
1590      }
1591    }
1592    return hasProblems ? null : puts;
1593  }
1594
1595  /**
1596   * Suggest fixes for each table
1597   */
1598  private void suggestFixes(
1599      SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1600    logParallelMerge();
1601    for (TableInfo tInfo : tablesInfo.values()) {
1602      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1603      tInfo.checkRegionChain(handler);
1604    }
1605  }
1606
1607  /**
1608   * Rebuilds meta from information in hdfs/fs.  Depends on configuration settings passed into
1609   * hbck constructor to point to a particular fs/dir. Assumes HBase is OFFLINE.
1610   *
1611   * @param fix flag that determines if method should attempt to fix holes
1612   * @return true if successful, false if attempt failed.
1613   */
1614  public boolean rebuildMeta(boolean fix) throws IOException,
1615      InterruptedException {
1616
1617    // TODO check to make sure hbase is offline. (or at least the table
1618    // currently being worked on is off line)
1619
1620    // Determine what's on HDFS
1621    LOG.info("Loading HBase regioninfo from HDFS...");
1622    loadHdfsRegionDirs(); // populating regioninfo table.
1623
1624    int errs = errors.getErrorList().size();
1625    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1626    checkHdfsIntegrity(false, false);
1627
1628    // make sure ok.
1629    if (errors.getErrorList().size() != errs) {
1630      // While in error state, iterate until no more fixes possible
1631      while(true) {
1632        fixes = 0;
1633        suggestFixes(tablesInfo);
1634        errors.clear();
1635        loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1636        checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1637
1638        int errCount = errors.getErrorList().size();
1639
1640        if (fixes == 0) {
1641          if (errCount > 0) {
1642            return false; // failed to fix problems.
1643          } else {
1644            break; // no fixes and no problems? drop out and fix stuff!
1645          }
1646        }
1647      }
1648    }
1649
1650    // we can rebuild, move old meta out of the way and start
1651    LOG.info("HDFS regioninfo's seems good.  Sidelining old hbase:meta");
1652    Path backupDir = sidelineOldMeta();
1653
1654    LOG.info("Creating new hbase:meta");
1655    String walFactoryId = "hbck-meta-recovery-" + RandomStringUtils.randomNumeric(8);
1656    HRegion meta = createNewMeta(walFactoryId);
1657
1658    // populate meta
1659    List<Put> puts = generatePuts(tablesInfo);
1660    if (puts == null) {
1661      LOG.error(HBaseMarkers.FATAL, "Problem encountered when creating new hbase:meta "
1662          + "entries. You may need to restore the previously sidelined hbase:meta");
1663      return false;
1664    }
1665    meta.batchMutate(puts.toArray(new Put[puts.size()]), HConstants.NO_NONCE, HConstants.NO_NONCE);
1666    meta.close();
1667    if (meta.getWAL() != null) {
1668      meta.getWAL().close();
1669    }
1670    // clean up the temporary hbck meta recovery WAL directory
1671    removeHBCKMetaRecoveryWALDir(walFactoryId);
1672    LOG.info("Success! hbase:meta table rebuilt.");
1673    LOG.info("Old hbase:meta is moved into " + backupDir);
1674    return true;
1675  }
1676
1677  /**
1678   * Removes the empty Meta recovery WAL directory.
1679   * @param walFactoryId A unique identifier for WAL factory which was used by Filesystem to make a
1680   *          Meta recovery WAL directory inside WAL directory path.
1681   */
1682  private void removeHBCKMetaRecoveryWALDir(String walFactoryId) throws IOException {
1683    Path walLogDir = new Path(new Path(CommonFSUtils.getWALRootDir(getConf()),
1684          HConstants.HREGION_LOGDIR_NAME), walFactoryId);
1685    FileSystem fs = CommonFSUtils.getWALFileSystem(getConf());
1686    FileStatus[] walFiles = FSUtils.listStatus(fs, walLogDir, null);
1687    if (walFiles == null || walFiles.length == 0) {
1688      LOG.info("HBCK meta recovery WAL directory is empty, removing it now.");
1689      if (!FSUtils.deleteDirectory(fs, walLogDir)) {
1690        LOG.warn("Couldn't clear the HBCK Meta recovery WAL directory " + walLogDir);
1691      }
1692    }
1693  }
1694
1695  /**
1696   * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1697   */
1698  private void logParallelMerge() {
1699    if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1700      LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1701          " false to run serially.");
1702    } else {
1703      LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
1704          " true to run in parallel.");
1705    }
1706  }
1707
1708  private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1709      boolean fixOverlaps) throws IOException {
1710    LOG.info("Checking HBase region split map from HDFS data...");
1711    logParallelMerge();
1712    for (TableInfo tInfo : tablesInfo.values()) {
1713      TableIntegrityErrorHandler handler;
1714      if (fixHoles || fixOverlaps) {
1715        handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1716          fixHoles, fixOverlaps);
1717      } else {
1718        handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1719      }
1720      if (!tInfo.checkRegionChain(handler)) {
1721        // should dump info as well.
1722        errors.report("Found inconsistency in table " + tInfo.getName());
1723      }
1724    }
1725    return tablesInfo;
1726  }
1727
1728  private Path getSidelineDir() throws IOException {
1729    if (sidelineDir == null) {
1730      Path hbaseDir = FSUtils.getRootDir(getConf());
1731      Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1732      sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1733          + startMillis);
1734    }
1735    return sidelineDir;
1736  }
1737
1738  /**
1739   * Sideline a region dir (instead of deleting it)
1740   */
1741  Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1742    return sidelineRegionDir(fs, null, hi);
1743  }
1744
1745  /**
1746   * Sideline a region dir (instead of deleting it)
1747   *
1748   * @param parentDir if specified, the region will be sidelined to folder like
1749   *     {@literal .../parentDir/<table name>/<region name>}. The purpose is to group together
1750   *     similar regions sidelined, for example, those regions should be bulk loaded back later
1751   *     on. If NULL, it is ignored.
1752   */
1753  Path sidelineRegionDir(FileSystem fs,
1754      String parentDir, HbckInfo hi) throws IOException {
1755    TableName tableName = hi.getTableName();
1756    Path regionDir = hi.getHdfsRegionDir();
1757
1758    if (!fs.exists(regionDir)) {
1759      LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1760      return null;
1761    }
1762
1763    Path rootDir = getSidelineDir();
1764    if (parentDir != null) {
1765      rootDir = new Path(rootDir, parentDir);
1766    }
1767    Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1768    Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1769    fs.mkdirs(sidelineRegionDir);
1770    boolean success = false;
1771    FileStatus[] cfs =  fs.listStatus(regionDir);
1772    if (cfs == null) {
1773      LOG.info("Region dir is empty: " + regionDir);
1774    } else {
1775      for (FileStatus cf : cfs) {
1776        Path src = cf.getPath();
1777        Path dst =  new Path(sidelineRegionDir, src.getName());
1778        if (fs.isFile(src)) {
1779          // simple file
1780          success = fs.rename(src, dst);
1781          if (!success) {
1782            String msg = "Unable to rename file " + src +  " to " + dst;
1783            LOG.error(msg);
1784            throw new IOException(msg);
1785          }
1786          continue;
1787        }
1788
1789        // is a directory.
1790        fs.mkdirs(dst);
1791
1792        LOG.info("Sidelining files from " + src + " into containing region " + dst);
1793        // FileSystem.rename is inconsistent with directories -- if the
1794        // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1795        // it moves the src into the dst dir resulting in (foo/a/b).  If
1796        // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1797        FileStatus[] hfiles = fs.listStatus(src);
1798        if (hfiles != null && hfiles.length > 0) {
1799          for (FileStatus hfile : hfiles) {
1800            success = fs.rename(hfile.getPath(), dst);
1801            if (!success) {
1802              String msg = "Unable to rename file " + src +  " to " + dst;
1803              LOG.error(msg);
1804              throw new IOException(msg);
1805            }
1806          }
1807        }
1808        LOG.debug("Sideline directory contents:");
1809        debugLsr(sidelineRegionDir);
1810      }
1811    }
1812
1813    LOG.info("Removing old region dir: " + regionDir);
1814    success = fs.delete(regionDir, true);
1815    if (!success) {
1816      String msg = "Unable to delete dir " + regionDir;
1817      LOG.error(msg);
1818      throw new IOException(msg);
1819    }
1820    return sidelineRegionDir;
1821  }
1822
1823  /**
1824   * Side line an entire table.
1825   */
1826  void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1827      Path backupHbaseDir) throws IOException {
1828    Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1829    if (fs.exists(tableDir)) {
1830      Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1831      fs.mkdirs(backupTableDir.getParent());
1832      boolean success = fs.rename(tableDir, backupTableDir);
1833      if (!success) {
1834        throw new IOException("Failed to move  " + tableName + " from "
1835            +  tableDir + " to " + backupTableDir);
1836      }
1837    } else {
1838      LOG.info("No previous " + tableName +  " exists.  Continuing.");
1839    }
1840  }
1841
1842  /**
1843   * @return Path to backup of original directory
1844   */
1845  Path sidelineOldMeta() throws IOException {
1846    // put current hbase:meta aside.
1847    Path hbaseDir = FSUtils.getRootDir(getConf());
1848    FileSystem fs = hbaseDir.getFileSystem(getConf());
1849    Path backupDir = getSidelineDir();
1850    fs.mkdirs(backupDir);
1851
1852    try {
1853      sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1854    } catch (IOException e) {
1855        LOG.error(HBaseMarkers.FATAL, "... failed to sideline meta. Currently in "
1856            + "inconsistent state.  To restore try to rename hbase:meta in " +
1857            backupDir.getName() + " to " + hbaseDir.getName() + ".", e);
1858      throw e; // throw original exception
1859    }
1860    return backupDir;
1861  }
1862
1863  /**
1864   * Load the list of disabled tables in ZK into local set.
1865   * @throws ZooKeeperConnectionException
1866   * @throws IOException
1867   */
1868  private void loadTableStates()
1869  throws IOException {
1870    tableStates = MetaTableAccessor.getTableStates(connection);
1871    // Add hbase:meta so this tool keeps working. In hbase2, meta is always enabled though it
1872    // has no entry in the table states. HBCK doesn't work right w/ hbase2 but just do this in
1873    // meantime.
1874    this.tableStates.put(TableName.META_TABLE_NAME,
1875        new TableState(TableName.META_TABLE_NAME, TableState.State.ENABLED));
1876  }
1877
1878  /**
1879   * Check if the specified region's table is disabled.
1880   * @param tableName table to check status of
1881   */
1882  private boolean isTableDisabled(TableName tableName) {
1883    return tableStates.containsKey(tableName)
1884        && tableStates.get(tableName)
1885        .inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1886  }
1887
1888  /**
1889   * Scan HDFS for all regions, recording their information into
1890   * regionInfoMap
1891   */
1892  public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1893    Path rootDir = FSUtils.getRootDir(getConf());
1894    FileSystem fs = rootDir.getFileSystem(getConf());
1895
1896    // list all tables from HDFS
1897    List<FileStatus> tableDirs = Lists.newArrayList();
1898
1899    boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1900
1901    List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1902    for (Path path : paths) {
1903      TableName tableName = FSUtils.getTableName(path);
1904       if ((!checkMetaOnly &&
1905           isTableIncluded(tableName)) ||
1906           tableName.equals(TableName.META_TABLE_NAME)) {
1907         tableDirs.add(fs.getFileStatus(path));
1908       }
1909    }
1910
1911    // verify that version file exists
1912    if (!foundVersionFile) {
1913      errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1914          "Version file does not exist in root dir " + rootDir);
1915      if (shouldFixVersionFile()) {
1916        LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1917            + " file.");
1918        setShouldRerun();
1919        FSUtils.setVersion(fs, rootDir, getConf().getInt(
1920            HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1921            HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1922            HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1923      }
1924    }
1925
1926    // Avoid multithreading at table-level because already multithreaded internally at
1927    // region-level.  Additionally multithreading at table-level can lead to deadlock
1928    // if there are many tables in the cluster.  Since there are a limited # of threads
1929    // in the executor's thread pool and if we multithread at the table-level by putting
1930    // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1931    // executor tied up solely in waiting for the tables' region-level calls to complete.
1932    // If there are enough tables then there will be no actual threads in the pool left
1933    // for the region-level callables to be serviced.
1934    for (FileStatus tableDir : tableDirs) {
1935      LOG.debug("Loading region dirs from " +tableDir.getPath());
1936      WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1937      try {
1938        item.call();
1939      } catch (ExecutionException e) {
1940        LOG.warn("Could not completely load table dir " +
1941            tableDir.getPath(), e.getCause());
1942      }
1943    }
1944    errors.print("");
1945  }
1946
1947  /**
1948   * Record the location of the hbase:meta region as found in ZooKeeper.
1949   */
1950  private boolean recordMetaRegion() throws IOException {
1951    RegionLocations rl = connection.locateRegion(TableName.META_TABLE_NAME,
1952        HConstants.EMPTY_START_ROW, false, false);
1953    if (rl == null) {
1954      errors.reportError(ERROR_CODE.NULL_META_REGION,
1955          "META region was not found in ZooKeeper");
1956      return false;
1957    }
1958    for (HRegionLocation metaLocation : rl.getRegionLocations()) {
1959      // Check if Meta region is valid and existing
1960      if (metaLocation == null ) {
1961        errors.reportError(ERROR_CODE.NULL_META_REGION,
1962            "META region location is null");
1963        return false;
1964      }
1965      if (metaLocation.getRegionInfo() == null) {
1966        errors.reportError(ERROR_CODE.NULL_META_REGION,
1967            "META location regionInfo is null");
1968        return false;
1969      }
1970      if (metaLocation.getHostname() == null) {
1971        errors.reportError(ERROR_CODE.NULL_META_REGION,
1972            "META location hostName is null");
1973        return false;
1974      }
1975      ServerName sn = metaLocation.getServerName();
1976      MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, EnvironmentEdgeManager.currentTime());
1977      HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1978      if (hbckInfo == null) {
1979        regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1980      } else {
1981        hbckInfo.metaEntry = m;
1982      }
1983    }
1984    return true;
1985  }
1986
1987  private ZKWatcher createZooKeeperWatcher() throws IOException {
1988    return new ZKWatcher(getConf(), "hbase Fsck", new Abortable() {
1989      @Override
1990      public void abort(String why, Throwable e) {
1991        LOG.error(why, e);
1992        System.exit(1);
1993      }
1994
1995      @Override
1996      public boolean isAborted() {
1997        return false;
1998      }
1999
2000    });
2001  }
2002
2003  /**
2004   * Contacts each regionserver and fetches metadata about regions.
2005   * @param regionServerList - the list of region servers to connect to
2006   * @throws IOException if a remote or network exception occurs
2007   */
2008  void processRegionServers(Collection<ServerName> regionServerList)
2009    throws IOException, InterruptedException {
2010
2011    List<WorkItemRegion> workItems = new ArrayList<>(regionServerList.size());
2012    List<Future<Void>> workFutures;
2013
2014    // loop to contact each region server in parallel
2015    for (ServerName rsinfo: regionServerList) {
2016      workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
2017    }
2018
2019    workFutures = executor.invokeAll(workItems);
2020
2021    for(int i=0; i<workFutures.size(); i++) {
2022      WorkItemRegion item = workItems.get(i);
2023      Future<Void> f = workFutures.get(i);
2024      try {
2025        f.get();
2026      } catch(ExecutionException e) {
2027        LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
2028            e.getCause());
2029      }
2030    }
2031  }
2032
2033  /**
2034   * Check consistency of all regions that have been found in previous phases.
2035   */
2036  private void checkAndFixConsistency()
2037  throws IOException, KeeperException, InterruptedException {
2038    // Divide the checks in two phases. One for default/primary replicas and another
2039    // for the non-primary ones. Keeps code cleaner this way.
2040
2041    List<CheckRegionConsistencyWorkItem> workItems = new ArrayList<>(regionInfoMap.size());
2042    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
2043      if (e.getValue().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2044        workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
2045      }
2046    }
2047    checkRegionConsistencyConcurrently(workItems);
2048
2049    boolean prevHdfsCheck = shouldCheckHdfs();
2050    setCheckHdfs(false); //replicas don't have any hdfs data
2051    // Run a pass over the replicas and fix any assignment issues that exist on the currently
2052    // deployed/undeployed replicas.
2053    List<CheckRegionConsistencyWorkItem> replicaWorkItems = new ArrayList<>(regionInfoMap.size());
2054    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
2055      if (e.getValue().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2056        replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
2057      }
2058    }
2059    checkRegionConsistencyConcurrently(replicaWorkItems);
2060    setCheckHdfs(prevHdfsCheck);
2061
2062    // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
2063    // not get accurate state of the hbase if continuing. The config here allows users to tune
2064    // the tolerance of number of skipped region.
2065    // TODO: evaluate the consequence to continue the hbck operation without config.
2066    int terminateThreshold =  getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
2067    int numOfSkippedRegions = skippedRegions.size();
2068    if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
2069      throw new IOException(numOfSkippedRegions
2070        + " region(s) could not be checked or repaired.  See logs for detail.");
2071    }
2072
2073    if (shouldCheckHdfs()) {
2074      checkAndFixTableStates();
2075    }
2076  }
2077
2078  /**
2079   * Check consistency of all regions using mulitple threads concurrently.
2080   */
2081  private void checkRegionConsistencyConcurrently(
2082    final List<CheckRegionConsistencyWorkItem> workItems)
2083    throws IOException, KeeperException, InterruptedException {
2084    if (workItems.isEmpty()) {
2085      return;  // nothing to check
2086    }
2087
2088    List<Future<Void>> workFutures = executor.invokeAll(workItems);
2089    for(Future<Void> f: workFutures) {
2090      try {
2091        f.get();
2092      } catch(ExecutionException e1) {
2093        LOG.warn("Could not check region consistency " , e1.getCause());
2094        if (e1.getCause() instanceof IOException) {
2095          throw (IOException)e1.getCause();
2096        } else if (e1.getCause() instanceof KeeperException) {
2097          throw (KeeperException)e1.getCause();
2098        } else if (e1.getCause() instanceof InterruptedException) {
2099          throw (InterruptedException)e1.getCause();
2100        } else {
2101          throw new IOException(e1.getCause());
2102        }
2103      }
2104    }
2105  }
2106
2107  class CheckRegionConsistencyWorkItem implements Callable<Void> {
2108    private final String key;
2109    private final HbckInfo hbi;
2110
2111    CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
2112      this.key = key;
2113      this.hbi = hbi;
2114    }
2115
2116    @Override
2117    public synchronized Void call() throws Exception {
2118      try {
2119        checkRegionConsistency(key, hbi);
2120      } catch (Exception e) {
2121        // If the region is non-META region, skip this region and send warning/error message; if
2122        // the region is META region, we should not continue.
2123        LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()
2124          + "'.", e);
2125        if (hbi.getHdfsHRI().isMetaRegion()) {
2126          throw e;
2127        }
2128        LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
2129        addSkippedRegion(hbi);
2130      }
2131      return null;
2132    }
2133  }
2134
2135  private void addSkippedRegion(final HbckInfo hbi) {
2136    Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
2137    if (skippedRegionNames == null) {
2138      skippedRegionNames = new HashSet<>();
2139    }
2140    skippedRegionNames.add(hbi.getRegionNameAsString());
2141    skippedRegions.put(hbi.getTableName(), skippedRegionNames);
2142  }
2143
2144  /**
2145   * Check and fix table states, assumes full info available:
2146   * - tableInfos
2147   * - empty tables loaded
2148   */
2149  private void checkAndFixTableStates() throws IOException {
2150    // first check dangling states
2151    for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
2152      TableName tableName = entry.getKey();
2153      TableState tableState = entry.getValue();
2154      TableInfo tableInfo = tablesInfo.get(tableName);
2155      if (isTableIncluded(tableName)
2156          && !tableName.isSystemTable()
2157          && tableInfo == null) {
2158        if (fixMeta) {
2159          MetaTableAccessor.deleteTableState(connection, tableName);
2160          TableState state = MetaTableAccessor.getTableState(connection, tableName);
2161          if (state != null) {
2162            errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
2163                tableName + " unable to delete dangling table state " + tableState);
2164          }
2165        } else if (!checkMetaOnly) {
2166          // dangling table state in meta if checkMetaOnly is false. If checkMetaOnly is
2167          // true, tableInfo will be null as tablesInfo are not polulated for all tables from hdfs
2168          errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
2169              tableName + " has dangling table state " + tableState);
2170        }
2171      }
2172    }
2173    // check that all tables have states
2174    for (TableName tableName : tablesInfo.keySet()) {
2175      if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
2176        if (fixMeta) {
2177          MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
2178          TableState newState = MetaTableAccessor.getTableState(connection, tableName);
2179          if (newState == null) {
2180            errors.reportError(ERROR_CODE.NO_TABLE_STATE,
2181                "Unable to change state for table " + tableName + " in meta ");
2182          }
2183        } else {
2184          errors.reportError(ERROR_CODE.NO_TABLE_STATE,
2185              tableName + " has no state in meta ");
2186        }
2187      }
2188    }
2189  }
2190
2191  private void preCheckPermission() throws IOException, AccessDeniedException {
2192    if (shouldIgnorePreCheckPermission()) {
2193      return;
2194    }
2195
2196    Path hbaseDir = FSUtils.getRootDir(getConf());
2197    FileSystem fs = hbaseDir.getFileSystem(getConf());
2198    UserProvider userProvider = UserProvider.instantiate(getConf());
2199    UserGroupInformation ugi = userProvider.getCurrent().getUGI();
2200    FileStatus[] files = fs.listStatus(hbaseDir);
2201    for (FileStatus file : files) {
2202      try {
2203        FSUtils.checkAccess(ugi, file, FsAction.WRITE);
2204      } catch (AccessDeniedException ace) {
2205        LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
2206        errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
2207          + " does not have write perms to " + file.getPath()
2208          + ". Please rerun hbck as hdfs user " + file.getOwner());
2209        throw ace;
2210      }
2211    }
2212  }
2213
2214  /**
2215   * Deletes region from meta table
2216   */
2217  private void deleteMetaRegion(HbckInfo hi) throws IOException {
2218    deleteMetaRegion(hi.metaEntry.getRegionName());
2219  }
2220
2221  /**
2222   * Deletes region from meta table
2223   */
2224  private void deleteMetaRegion(byte[] metaKey) throws IOException {
2225    Delete d = new Delete(metaKey);
2226    meta.delete(d);
2227    LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
2228  }
2229
2230  /**
2231   * Reset the split parent region info in meta table
2232   */
2233  private void resetSplitParent(HbckInfo hi) throws IOException {
2234    RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
2235    Delete d = new Delete(hi.metaEntry.getRegionName());
2236    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
2237    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
2238    mutations.add(d);
2239
2240    RegionInfo hri = RegionInfoBuilder.newBuilder(hi.metaEntry)
2241        .setOffline(false)
2242        .setSplit(false)
2243        .build();
2244    Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime());
2245    mutations.add(p);
2246
2247    meta.mutateRow(mutations);
2248    LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
2249  }
2250
2251  /**
2252   * This backwards-compatibility wrapper for permanently offlining a region
2253   * that should not be alive.  If the region server does not support the
2254   * "offline" method, it will use the closest unassign method instead.  This
2255   * will basically work until one attempts to disable or delete the affected
2256   * table.  The problem has to do with in-memory only master state, so
2257   * restarting the HMaster or failing over to another should fix this.
2258   */
2259  private void offline(byte[] regionName) throws IOException {
2260    String regionString = Bytes.toStringBinary(regionName);
2261    if (!rsSupportsOffline) {
2262      LOG.warn("Using unassign region " + regionString
2263          + " instead of using offline method, you should"
2264          + " restart HMaster after these repairs");
2265      admin.unassign(regionName, true);
2266      return;
2267    }
2268
2269    // first time we assume the rs's supports #offline.
2270    try {
2271      LOG.info("Offlining region " + regionString);
2272      admin.offline(regionName);
2273    } catch (IOException ioe) {
2274      String notFoundMsg = "java.lang.NoSuchMethodException: " +
2275        "org.apache.hadoop.hbase.master.HMaster.offline([B)";
2276      if (ioe.getMessage().contains(notFoundMsg)) {
2277        LOG.warn("Using unassign region " + regionString
2278            + " instead of using offline method, you should"
2279            + " restart HMaster after these repairs");
2280        rsSupportsOffline = false; // in the future just use unassign
2281        admin.unassign(regionName, true);
2282        return;
2283      }
2284      throw ioe;
2285    }
2286  }
2287
2288  private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
2289    undeployRegionsForHbi(hi);
2290    // undeploy replicas of the region (but only if the method is invoked for the primary)
2291    if (hi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2292      return;
2293    }
2294    int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2295    for (int i = 1; i < numReplicas; i++) {
2296      if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2297      RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
2298          hi.getPrimaryHRIForDeployedReplica(), i);
2299      HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2300      if (h != null) {
2301        undeployRegionsForHbi(h);
2302        //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2303        //in consistency checks
2304        h.setSkipChecks(true);
2305      }
2306    }
2307  }
2308
2309  private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {
2310    for (OnlineEntry rse : hi.deployedEntries) {
2311      LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
2312      try {
2313        HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);
2314        offline(rse.hri.getRegionName());
2315      } catch (IOException ioe) {
2316        LOG.warn("Got exception when attempting to offline region "
2317            + Bytes.toString(rse.hri.getRegionName()), ioe);
2318      }
2319    }
2320  }
2321
2322  /**
2323   * Attempts to undeploy a region from a region server based in information in
2324   * META.  Any operations that modify the file system should make sure that
2325   * its corresponding region is not deployed to prevent data races.
2326   *
2327   * A separate call is required to update the master in-memory region state
2328   * kept in the AssignementManager.  Because disable uses this state instead of
2329   * that found in META, we can't seem to cleanly disable/delete tables that
2330   * have been hbck fixed.  When used on a version of HBase that does not have
2331   * the offline ipc call exposed on the master (&lt;0.90.5, &lt;0.92.0) a master
2332   * restart or failover may be required.
2333   */
2334  private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
2335    if (hi.metaEntry == null && hi.hdfsEntry == null) {
2336      undeployRegions(hi);
2337      return;
2338    }
2339
2340    // get assignment info and hregioninfo from meta.
2341    Get get = new Get(hi.getRegionName());
2342    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2343    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2344    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2345    // also get the locations of the replicas to close if the primary region is being closed
2346    if (hi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2347      int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2348      for (int i = 0; i < numReplicas; i++) {
2349        get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
2350        get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
2351      }
2352    }
2353    Result r = meta.get(get);
2354    RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2355    if (rl == null) {
2356      LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2357          " since meta does not have handle to reach it");
2358      return;
2359    }
2360    for (HRegionLocation h : rl.getRegionLocations()) {
2361      ServerName serverName = h.getServerName();
2362      if (serverName == null) {
2363        errors.reportError("Unable to close region "
2364            + hi.getRegionNameAsString() +  " because meta does not "
2365            + "have handle to reach it.");
2366        continue;
2367      }
2368      RegionInfo hri = h.getRegionInfo();
2369      if (hri == null) {
2370        LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2371            + " because hbase:meta had invalid or missing "
2372            + HConstants.CATALOG_FAMILY_STR + ":"
2373            + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2374            + " qualifier value.");
2375        continue;
2376      }
2377      // close the region -- close files and remove assignment
2378      HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2379    }
2380  }
2381
2382  private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
2383    KeeperException, InterruptedException {
2384    // If we are trying to fix the errors
2385    if (shouldFixAssignments()) {
2386      errors.print(msg);
2387      undeployRegions(hbi);
2388      setShouldRerun();
2389      RegionInfo hri = hbi.getHdfsHRI();
2390      if (hri == null) {
2391        hri = hbi.metaEntry;
2392      }
2393      HBaseFsckRepair.fixUnassigned(admin, hri);
2394      HBaseFsckRepair.waitUntilAssigned(admin, hri);
2395
2396      // also assign replicas if needed (do it only when this call operates on a primary replica)
2397      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) return;
2398      int replicationCount = admin.getDescriptor(hri.getTable()).getRegionReplication();
2399      for (int i = 1; i < replicationCount; i++) {
2400        hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2401        HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2402        if (h != null) {
2403          undeployRegions(h);
2404          //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2405          //in consistency checks
2406          h.setSkipChecks(true);
2407        }
2408        HBaseFsckRepair.fixUnassigned(admin, hri);
2409        HBaseFsckRepair.waitUntilAssigned(admin, hri);
2410      }
2411
2412    }
2413  }
2414
2415  /**
2416   * Check a single region for consistency and correct deployment.
2417   */
2418  private void checkRegionConsistency(final String key, final HbckInfo hbi)
2419  throws IOException, KeeperException, InterruptedException {
2420
2421    if (hbi.isSkipChecks()) return;
2422    String descriptiveName = hbi.toString();
2423    boolean inMeta = hbi.metaEntry != null;
2424    // In case not checking HDFS, assume the region is on HDFS
2425    boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2426    boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
2427    boolean isDeployed = !hbi.deployedOn.isEmpty();
2428    boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
2429    boolean deploymentMatchesMeta =
2430      hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2431      hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
2432    boolean splitParent =
2433        inMeta && hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
2434    boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry.getTable());
2435    boolean recentlyModified = inHdfs &&
2436      hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2437
2438    // ========== First the healthy cases =============
2439    if (hbi.containsOnlyHdfsEdits()) {
2440      return;
2441    }
2442    if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2443      return;
2444    } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2445      LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2446        "tabled that is not deployed");
2447      return;
2448    } else if (recentlyModified) {
2449      LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2450      return;
2451    }
2452    // ========== Cases where the region is not in hbase:meta =============
2453    else if (!inMeta && !inHdfs && !isDeployed) {
2454      // We shouldn't have record of this region at all then!
2455      assert false : "Entry for region with no data";
2456    } else if (!inMeta && !inHdfs && isDeployed) {
2457      errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2458          + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2459          "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2460      if (shouldFixAssignments()) {
2461        undeployRegions(hbi);
2462      }
2463
2464    } else if (!inMeta && inHdfs && !isDeployed) {
2465      if (hbi.isMerged()) {
2466        // This region has already been merged, the remaining hdfs file will be
2467        // cleaned by CatalogJanitor later
2468        hbi.setSkipChecks(true);
2469        LOG.info("Region " + descriptiveName
2470            + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2471        return;
2472      }
2473      errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2474          + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2475          "or deployed on any region server");
2476      // restore region consistency of an adopted orphan
2477      if (shouldFixMeta()) {
2478        if (!hbi.isHdfsRegioninfoPresent()) {
2479          LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2480              +  " in table integrity repair phase if -fixHdfsOrphans was" +
2481              " used.");
2482          return;
2483        }
2484
2485        RegionInfo hri = hbi.getHdfsHRI();
2486        TableInfo tableInfo = tablesInfo.get(hri.getTable());
2487
2488        for (RegionInfo region : tableInfo.getRegionsFromMeta()) {
2489          if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2490              && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2491                hri.getEndKey()) >= 0)
2492              && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2493            if(region.isSplit() || region.isOffline()) continue;
2494            Path regionDir = hbi.getHdfsRegionDir();
2495            FileSystem fs = regionDir.getFileSystem(getConf());
2496            List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2497            for (Path familyDir : familyDirs) {
2498              List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2499              for (Path referenceFilePath : referenceFilePaths) {
2500                Path parentRegionDir =
2501                    StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2502                if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2503                  LOG.warn(hri + " start and stop keys are in the range of " + region
2504                      + ". The region might not be cleaned up from hdfs when region " + region
2505                      + " split failed. Hence deleting from hdfs.");
2506                  HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2507                    regionDir.getParent(), hri);
2508                  return;
2509                }
2510              }
2511            }
2512          }
2513        }
2514        LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2515        int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
2516        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2517            admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
2518              .getLiveServerMetrics().keySet(), numReplicas);
2519
2520        tryAssignmentRepair(hbi, "Trying to reassign region...");
2521      }
2522
2523    } else if (!inMeta && inHdfs && isDeployed) {
2524      errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2525          + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2526      debugLsr(hbi.getHdfsRegionDir());
2527      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2528        // for replicas, this means that we should undeploy the region (we would have
2529        // gone over the primaries and fixed meta holes in first phase under
2530        // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2531        // this stage unless unwanted replica)
2532        if (shouldFixAssignments()) {
2533          undeployRegionsForHbi(hbi);
2534        }
2535      }
2536      if (shouldFixMeta() && hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2537        if (!hbi.isHdfsRegioninfoPresent()) {
2538          LOG.error("This should have been repaired in table integrity repair phase");
2539          return;
2540        }
2541
2542        LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2543        int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
2544        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2545            admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
2546              .getLiveServerMetrics().keySet(), numReplicas);
2547        tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2548      }
2549
2550    // ========== Cases where the region is in hbase:meta =============
2551    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2552      // check whether this is an actual error, or just transient state where parent
2553      // is not cleaned
2554      if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2555        // check that split daughters are there
2556        HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2557        HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2558        if (infoA != null && infoB != null) {
2559          // we already processed or will process daughters. Move on, nothing to see here.
2560          hbi.setSkipChecks(true);
2561          return;
2562        }
2563      }
2564
2565      // For Replica region, we need to do a similar check. If replica is not split successfully,
2566      // error is going to be reported against primary daughter region.
2567      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2568        LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, "
2569            + "and not deployed on any region server. This may be transient.");
2570        hbi.setSkipChecks(true);
2571        return;
2572      }
2573
2574      errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2575          + descriptiveName + " is a split parent in META, in HDFS, "
2576          + "and not deployed on any region server. This could be transient, "
2577          + "consider to run the catalog janitor first!");
2578      if (shouldFixSplitParents()) {
2579        setShouldRerun();
2580        resetSplitParent(hbi);
2581      }
2582    } else if (inMeta && !inHdfs && !isDeployed) {
2583      errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2584          + descriptiveName + " found in META, but not in HDFS "
2585          + "or deployed on any region server.");
2586      if (shouldFixMeta()) {
2587        deleteMetaRegion(hbi);
2588      }
2589    } else if (inMeta && !inHdfs && isDeployed) {
2590      errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2591          + " found in META, but not in HDFS, " +
2592          "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2593      // We treat HDFS as ground truth.  Any information in meta is transient
2594      // and equivalent data can be regenerated.  So, lets unassign and remove
2595      // these problems from META.
2596      if (shouldFixAssignments()) {
2597        errors.print("Trying to fix unassigned region...");
2598        undeployRegions(hbi);
2599      }
2600      if (shouldFixMeta()) {
2601        // wait for it to complete
2602        deleteMetaRegion(hbi);
2603      }
2604    } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2605      errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2606          + " not deployed on any region server.");
2607      tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2608    } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2609      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2610          "Region " + descriptiveName + " should not be deployed according " +
2611          "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2612      if (shouldFixAssignments()) {
2613        errors.print("Trying to close the region " + descriptiveName);
2614        setShouldRerun();
2615        HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2616      }
2617    } else if (inMeta && inHdfs && isMultiplyDeployed) {
2618      errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2619          + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2620          + " but is multiply assigned to region servers " +
2621          Joiner.on(", ").join(hbi.deployedOn));
2622      // If we are trying to fix the errors
2623      if (shouldFixAssignments()) {
2624        errors.print("Trying to fix assignment error...");
2625        setShouldRerun();
2626        HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2627      }
2628    } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2629      errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2630          + descriptiveName + " listed in hbase:meta on region server " +
2631          hbi.metaEntry.regionServer + " but found on region server " +
2632          hbi.deployedOn.get(0));
2633      // If we are trying to fix the errors
2634      if (shouldFixAssignments()) {
2635        errors.print("Trying to fix assignment error...");
2636        setShouldRerun();
2637        HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2638        HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2639      }
2640    } else {
2641      errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2642          " is in an unforeseen state:" +
2643          " inMeta=" + inMeta +
2644          " inHdfs=" + inHdfs +
2645          " isDeployed=" + isDeployed +
2646          " isMultiplyDeployed=" + isMultiplyDeployed +
2647          " deploymentMatchesMeta=" + deploymentMatchesMeta +
2648          " shouldBeDeployed=" + shouldBeDeployed);
2649    }
2650  }
2651
2652  /**
2653   * Checks tables integrity. Goes over all regions and scans the tables.
2654   * Collects all the pieces for each table and checks if there are missing,
2655   * repeated or overlapping ones.
2656   * @throws IOException
2657   */
2658  SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2659    tablesInfo = new TreeMap<>();
2660    LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2661    for (HbckInfo hbi : regionInfoMap.values()) {
2662      // Check only valid, working regions
2663      if (hbi.metaEntry == null) {
2664        // this assumes that consistency check has run loadMetaEntry
2665        Path p = hbi.getHdfsRegionDir();
2666        if (p == null) {
2667          errors.report("No regioninfo in Meta or HDFS. " + hbi);
2668        }
2669
2670        // TODO test.
2671        continue;
2672      }
2673      if (hbi.metaEntry.regionServer == null) {
2674        errors.detail("Skipping region because no region server: " + hbi);
2675        continue;
2676      }
2677      if (hbi.metaEntry.isOffline()) {
2678        errors.detail("Skipping region because it is offline: " + hbi);
2679        continue;
2680      }
2681      if (hbi.containsOnlyHdfsEdits()) {
2682        errors.detail("Skipping region because it only contains edits" + hbi);
2683        continue;
2684      }
2685
2686      // Missing regionDir or over-deployment is checked elsewhere. Include
2687      // these cases in modTInfo, so we can evaluate those regions as part of
2688      // the region chain in META
2689      //if (hbi.foundRegionDir == null) continue;
2690      //if (hbi.deployedOn.size() != 1) continue;
2691      if (hbi.deployedOn.isEmpty()) continue;
2692
2693      // We should be safe here
2694      TableName tableName = hbi.metaEntry.getTable();
2695      TableInfo modTInfo = tablesInfo.get(tableName);
2696      if (modTInfo == null) {
2697        modTInfo = new TableInfo(tableName);
2698      }
2699      for (ServerName server : hbi.deployedOn) {
2700        modTInfo.addServer(server);
2701      }
2702
2703      if (!hbi.isSkipChecks()) {
2704        modTInfo.addRegionInfo(hbi);
2705      }
2706
2707      tablesInfo.put(tableName, modTInfo);
2708    }
2709
2710    loadTableInfosForTablesWithNoRegion();
2711
2712    logParallelMerge();
2713    for (TableInfo tInfo : tablesInfo.values()) {
2714      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2715      if (!tInfo.checkRegionChain(handler)) {
2716        errors.report("Found inconsistency in table " + tInfo.getName());
2717      }
2718    }
2719    return tablesInfo;
2720  }
2721
2722  /** Loads table info's for tables that may not have been included, since there are no
2723   * regions reported for the table, but table dir is there in hdfs
2724   */
2725  private void loadTableInfosForTablesWithNoRegion() throws IOException {
2726    Map<String, TableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2727    for (TableDescriptor htd : allTables.values()) {
2728      if (checkMetaOnly && !htd.isMetaTable()) {
2729        continue;
2730      }
2731
2732      TableName tableName = htd.getTableName();
2733      if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2734        TableInfo tableInfo = new TableInfo(tableName);
2735        tableInfo.htds.add(htd);
2736        tablesInfo.put(htd.getTableName(), tableInfo);
2737      }
2738    }
2739  }
2740
2741  /**
2742   * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2743   * @return number of file move fixes done to merge regions.
2744   */
2745  public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2746    int fileMoves = 0;
2747    String thread = Thread.currentThread().getName();
2748    LOG.debug("[" + thread + "] Contained region dir after close and pause");
2749    debugLsr(contained.getHdfsRegionDir());
2750
2751    // rename the contained into the container.
2752    FileSystem fs = targetRegionDir.getFileSystem(getConf());
2753    FileStatus[] dirs = null;
2754    try {
2755      dirs = fs.listStatus(contained.getHdfsRegionDir());
2756    } catch (FileNotFoundException fnfe) {
2757      // region we are attempting to merge in is not present!  Since this is a merge, there is
2758      // no harm skipping this region if it does not exist.
2759      if (!fs.exists(contained.getHdfsRegionDir())) {
2760        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2761            + " is missing. Assuming already sidelined or moved.");
2762      } else {
2763        sidelineRegionDir(fs, contained);
2764      }
2765      return fileMoves;
2766    }
2767
2768    if (dirs == null) {
2769      if (!fs.exists(contained.getHdfsRegionDir())) {
2770        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2771            + " already sidelined.");
2772      } else {
2773        sidelineRegionDir(fs, contained);
2774      }
2775      return fileMoves;
2776    }
2777
2778    for (FileStatus cf : dirs) {
2779      Path src = cf.getPath();
2780      Path dst =  new Path(targetRegionDir, src.getName());
2781
2782      if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2783        // do not copy the old .regioninfo file.
2784        continue;
2785      }
2786
2787      if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2788        // do not copy the .oldlogs files
2789        continue;
2790      }
2791
2792      LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2793      // FileSystem.rename is inconsistent with directories -- if the
2794      // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2795      // it moves the src into the dst dir resulting in (foo/a/b).  If
2796      // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2797      for (FileStatus hfile : fs.listStatus(src)) {
2798        boolean success = fs.rename(hfile.getPath(), dst);
2799        if (success) {
2800          fileMoves++;
2801        }
2802      }
2803      LOG.debug("[" + thread + "] Sideline directory contents:");
2804      debugLsr(targetRegionDir);
2805    }
2806
2807    // if all success.
2808    sidelineRegionDir(fs, contained);
2809    LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2810        getSidelineDir());
2811    debugLsr(contained.getHdfsRegionDir());
2812
2813    return fileMoves;
2814  }
2815
2816
2817  static class WorkItemOverlapMerge implements Callable<Void> {
2818    private TableIntegrityErrorHandler handler;
2819    Collection<HbckInfo> overlapgroup;
2820
2821    WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2822      this.handler = handler;
2823      this.overlapgroup = overlapgroup;
2824    }
2825
2826    @Override
2827    public Void call() throws Exception {
2828      handler.handleOverlapGroup(overlapgroup);
2829      return null;
2830    }
2831  }
2832
2833  /**
2834   * Maintain information about a particular table.
2835   */
2836  public class TableInfo {
2837    TableName tableName;
2838    TreeSet <ServerName> deployedOn;
2839
2840    // backwards regions
2841    final List<HbckInfo> backwards = new ArrayList<>();
2842
2843    // sidelined big overlapped regions
2844    final Map<Path, HbckInfo> sidelinedRegions = new HashMap<>();
2845
2846    // region split calculator
2847    final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<>(cmp);
2848
2849    // Histogram of different TableDescriptors found.  Ideally there is only one!
2850    final Set<TableDescriptor> htds = new HashSet<>();
2851
2852    // key = start split, values = set of splits in problem group
2853    final Multimap<byte[], HbckInfo> overlapGroups =
2854      TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2855
2856    // list of regions derived from meta entries.
2857    private ImmutableList<RegionInfo> regionsFromMeta = null;
2858
2859    TableInfo(TableName name) {
2860      this.tableName = name;
2861      deployedOn = new TreeSet <>();
2862    }
2863
2864    /**
2865     * @return descriptor common to all regions.  null if are none or multiple!
2866     */
2867    private TableDescriptor getHTD() {
2868      if (htds.size() == 1) {
2869        return (TableDescriptor)htds.toArray()[0];
2870      } else {
2871        LOG.error("None/Multiple table descriptors found for table '"
2872          + tableName + "' regions: " + htds);
2873      }
2874      return null;
2875    }
2876
2877    public void addRegionInfo(HbckInfo hir) {
2878      if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2879        // end key is absolute end key, just add it.
2880        // ignore replicas other than primary for these checks
2881        if (hir.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2882        return;
2883      }
2884
2885      // if not the absolute end key, check for cycle
2886      if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2887        errors.reportError(
2888            ERROR_CODE.REGION_CYCLE,
2889            String.format("The endkey for this region comes before the "
2890                + "startkey, startkey=%s, endkey=%s",
2891                Bytes.toStringBinary(hir.getStartKey()),
2892                Bytes.toStringBinary(hir.getEndKey())), this, hir);
2893        backwards.add(hir);
2894        return;
2895      }
2896
2897      // main case, add to split calculator
2898      // ignore replicas other than primary for these checks
2899      if (hir.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2900    }
2901
2902    public void addServer(ServerName server) {
2903      this.deployedOn.add(server);
2904    }
2905
2906    public TableName getName() {
2907      return tableName;
2908    }
2909
2910    public int getNumRegions() {
2911      return sc.getStarts().size() + backwards.size();
2912    }
2913
2914    public synchronized ImmutableList<RegionInfo> getRegionsFromMeta() {
2915      // lazy loaded, synchronized to ensure a single load
2916      if (regionsFromMeta == null) {
2917        List<RegionInfo> regions = new ArrayList<>();
2918        for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2919          if (tableName.equals(h.getTableName())) {
2920            if (h.metaEntry != null) {
2921              regions.add(h.metaEntry);
2922            }
2923          }
2924        }
2925        regionsFromMeta = Ordering.from(RegionInfo.COMPARATOR).immutableSortedCopy(regions);
2926      }
2927
2928      return regionsFromMeta;
2929    }
2930
2931    private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2932      ErrorReporter errors;
2933
2934      IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2935        this.errors = errors;
2936        setTableInfo(ti);
2937      }
2938
2939      @Override
2940      public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2941        errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2942            "First region should start with an empty key.  You need to "
2943            + " create a new region and regioninfo in HDFS to plug the hole.",
2944            getTableInfo(), hi);
2945      }
2946
2947      @Override
2948      public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2949        errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2950            "Last region should end with an empty key. You need to "
2951                + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2952      }
2953
2954      @Override
2955      public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2956        errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2957            "Region has the same start and end key.", getTableInfo(), hi);
2958      }
2959
2960      @Override
2961      public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2962        byte[] key = r1.getStartKey();
2963        // dup start key
2964        errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2965            "Multiple regions have the same startkey: "
2966            + Bytes.toStringBinary(key), getTableInfo(), r1);
2967        errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2968            "Multiple regions have the same startkey: "
2969            + Bytes.toStringBinary(key), getTableInfo(), r2);
2970      }
2971
2972      @Override
2973      public void handleSplit(HbckInfo r1, HbckInfo r2) throws IOException{
2974        byte[] key = r1.getStartKey();
2975        // dup start key
2976        errors.reportError(ERROR_CODE.DUPE_ENDKEYS,
2977          "Multiple regions have the same regionID: "
2978            + Bytes.toStringBinary(key), getTableInfo(), r1);
2979        errors.reportError(ERROR_CODE.DUPE_ENDKEYS,
2980          "Multiple regions have the same regionID: "
2981            + Bytes.toStringBinary(key), getTableInfo(), r2);
2982      }
2983
2984      @Override
2985      public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2986        errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2987            "There is an overlap in the region chain.",
2988            getTableInfo(), hi1, hi2);
2989      }
2990
2991      @Override
2992      public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2993        errors.reportError(
2994            ERROR_CODE.HOLE_IN_REGION_CHAIN,
2995            "There is a hole in the region chain between "
2996                + Bytes.toStringBinary(holeStart) + " and "
2997                + Bytes.toStringBinary(holeStop)
2998                + ".  You need to create a new .regioninfo and region "
2999                + "dir in hdfs to plug the hole.");
3000      }
3001    }
3002
3003    /**
3004     * This handler fixes integrity errors from hdfs information.  There are
3005     * basically three classes of integrity problems 1) holes, 2) overlaps, and
3006     * 3) invalid regions.
3007     *
3008     * This class overrides methods that fix holes and the overlap group case.
3009     * Individual cases of particular overlaps are handled by the general
3010     * overlap group merge repair case.
3011     *
3012     * If hbase is online, this forces regions offline before doing merge
3013     * operations.
3014     */
3015    private class HDFSIntegrityFixer extends IntegrityFixSuggester {
3016      Configuration conf;
3017
3018      boolean fixOverlaps = true;
3019
3020      HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
3021          boolean fixHoles, boolean fixOverlaps) {
3022        super(ti, errors);
3023        this.conf = conf;
3024        this.fixOverlaps = fixOverlaps;
3025        // TODO properly use fixHoles
3026      }
3027
3028      /**
3029       * This is a special case hole -- when the first region of a table is
3030       * missing from META, HBase doesn't acknowledge the existance of the
3031       * table.
3032       */
3033      @Override
3034      public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
3035        errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
3036            "First region should start with an empty key.  Creating a new " +
3037            "region and regioninfo in HDFS to plug the hole.",
3038            getTableInfo(), next);
3039        TableDescriptor htd = getTableInfo().getHTD();
3040        // from special EMPTY_START_ROW to next region's startKey
3041        RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
3042            .setStartKey(HConstants.EMPTY_START_ROW)
3043            .setEndKey(next.getStartKey())
3044            .build();
3045
3046        // TODO test
3047        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
3048        LOG.info("Table region start key was not empty.  Created new empty region: "
3049            + newRegion + " " +region);
3050        fixes++;
3051      }
3052
3053      @Override
3054      public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
3055        errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
3056            "Last region should end with an empty key.  Creating a new "
3057                + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
3058        TableDescriptor htd = getTableInfo().getHTD();
3059        // from curEndKey to EMPTY_START_ROW
3060        RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
3061            .setStartKey(curEndKey)
3062            .setEndKey(HConstants.EMPTY_START_ROW)
3063            .build();
3064
3065        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
3066        LOG.info("Table region end key was not empty.  Created new empty region: " + newRegion
3067            + " " + region);
3068        fixes++;
3069      }
3070
3071      /**
3072       * There is a hole in the hdfs regions that violates the table integrity
3073       * rules.  Create a new empty region that patches the hole.
3074       */
3075      @Override
3076      public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
3077        errors.reportError(
3078            ERROR_CODE.HOLE_IN_REGION_CHAIN,
3079            "There is a hole in the region chain between "
3080                + Bytes.toStringBinary(holeStartKey) + " and "
3081                + Bytes.toStringBinary(holeStopKey)
3082                + ".  Creating a new regioninfo and region "
3083                + "dir in hdfs to plug the hole.");
3084        TableDescriptor htd = getTableInfo().getHTD();
3085        RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
3086            .setStartKey(holeStartKey)
3087            .setEndKey(holeStopKey)
3088            .build();
3089        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
3090        LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
3091        fixes++;
3092      }
3093
3094      /**
3095       * This takes set of overlapping regions and merges them into a single
3096       * region.  This covers cases like degenerate regions, shared start key,
3097       * general overlaps, duplicate ranges, and partial overlapping regions.
3098       *
3099       * Cases:
3100       * - Clean regions that overlap
3101       * - Only .oldlogs regions (can't find start/stop range, or figure out)
3102       *
3103       * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
3104       */
3105      @Override
3106      public void handleOverlapGroup(Collection<HbckInfo> overlap)
3107          throws IOException {
3108        Preconditions.checkNotNull(overlap);
3109        Preconditions.checkArgument(overlap.size() >0);
3110
3111        if (!this.fixOverlaps) {
3112          LOG.warn("Not attempting to repair overlaps.");
3113          return;
3114        }
3115
3116        if (overlap.size() > maxMerge) {
3117          LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
3118            "regions which is greater than " + maxMerge + ", the max number of regions to merge");
3119          if (sidelineBigOverlaps) {
3120            // we only sideline big overlapped groups that exceeds the max number of regions to merge
3121            sidelineBigOverlaps(overlap);
3122          }
3123          return;
3124        }
3125        if (shouldRemoveParents()) {
3126          removeParentsAndFixSplits(overlap);
3127        }
3128        mergeOverlaps(overlap);
3129      }
3130
3131      void removeParentsAndFixSplits(Collection<HbckInfo> overlap) throws IOException {
3132        Pair<byte[], byte[]> range = null;
3133        HbckInfo parent = null;
3134        HbckInfo daughterA = null;
3135        HbckInfo daughterB = null;
3136        Collection<HbckInfo> daughters = new ArrayList<HbckInfo>(overlap);
3137
3138        String thread = Thread.currentThread().getName();
3139        LOG.info("== [" + thread + "] Attempting fix splits in overlap state.");
3140
3141        // we only can handle a single split per group at the time
3142        if (overlap.size() > 3) {
3143          LOG.info("Too many overlaps were found on this group, falling back to regular merge.");
3144          return;
3145        }
3146
3147        for (HbckInfo hi : overlap) {
3148          if (range == null) {
3149            range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
3150          } else {
3151            if (RegionSplitCalculator.BYTES_COMPARATOR
3152              .compare(hi.getStartKey(), range.getFirst()) < 0) {
3153              range.setFirst(hi.getStartKey());
3154            }
3155            if (RegionSplitCalculator.BYTES_COMPARATOR
3156              .compare(hi.getEndKey(), range.getSecond()) > 0) {
3157              range.setSecond(hi.getEndKey());
3158            }
3159          }
3160        }
3161
3162        LOG.info("This group range is [" + Bytes.toStringBinary(range.getFirst()) + ", "
3163          + Bytes.toStringBinary(range.getSecond()) + "]");
3164
3165        // attempt to find a possible parent for the edge case of a split
3166        for (HbckInfo hi : overlap) {
3167          if (Bytes.compareTo(hi.getHdfsHRI().getStartKey(), range.getFirst()) == 0
3168            && Bytes.compareTo(hi.getHdfsHRI().getEndKey(), range.getSecond()) == 0) {
3169            LOG.info("This is a parent for this group: " + hi.toString());
3170            parent = hi;
3171          }
3172        }
3173
3174        // Remove parent regions from daughters collection
3175        if (parent != null) {
3176          daughters.remove(parent);
3177        }
3178
3179        // Lets verify that daughters share the regionID at split time and they
3180        // were created after the parent
3181        for (HbckInfo hi : daughters) {
3182          if (Bytes.compareTo(hi.getHdfsHRI().getStartKey(), range.getFirst()) == 0) {
3183            if (parent.getHdfsHRI().getRegionId() < hi.getHdfsHRI().getRegionId()) {
3184              daughterA = hi;
3185            }
3186          }
3187          if (Bytes.compareTo(hi.getHdfsHRI().getEndKey(), range.getSecond()) == 0) {
3188            if (parent.getHdfsHRI().getRegionId() < hi.getHdfsHRI().getRegionId()) {
3189              daughterB = hi;
3190            }
3191          }
3192        }
3193
3194        // daughters must share the same regionID and we should have a parent too
3195        if (daughterA.getHdfsHRI().getRegionId() != daughterB.getHdfsHRI().getRegionId() || parent == null)
3196          return;
3197
3198        FileSystem fs = FileSystem.get(conf);
3199        LOG.info("Found parent: " + parent.getRegionNameAsString());
3200        LOG.info("Found potential daughter a: " + daughterA.getRegionNameAsString());
3201        LOG.info("Found potential daughter b: " + daughterB.getRegionNameAsString());
3202        LOG.info("Trying to fix parent in overlap by removing the parent.");
3203        try {
3204          closeRegion(parent);
3205        } catch (IOException ioe) {
3206          LOG.warn("Parent region could not be closed, continuing with regular merge...", ioe);
3207          return;
3208        } catch (InterruptedException ie) {
3209          LOG.warn("Parent region could not be closed, continuing with regular merge...", ie);
3210          return;
3211        }
3212
3213        try {
3214          offline(parent.getRegionName());
3215        } catch (IOException ioe) {
3216          LOG.warn("Unable to offline parent region: " + parent.getRegionNameAsString()
3217            + ".  Just continuing with regular merge... ", ioe);
3218          return;
3219        }
3220
3221        try {
3222          HBaseFsckRepair.removeParentInMeta(conf, parent.getHdfsHRI());
3223        } catch (IOException ioe) {
3224          LOG.warn("Unable to remove parent region in META: " + parent.getRegionNameAsString()
3225            + ".  Just continuing with regular merge... ", ioe);
3226          return;
3227        }
3228
3229        sidelineRegionDir(fs, parent);
3230        LOG.info("[" + thread + "] Sidelined parent region dir "+ parent.getHdfsRegionDir() + " into " +
3231          getSidelineDir());
3232        debugLsr(parent.getHdfsRegionDir());
3233
3234        // Make sure we don't have the parents and daughters around
3235        overlap.remove(parent);
3236        overlap.remove(daughterA);
3237        overlap.remove(daughterB);
3238
3239        LOG.info("Done fixing split.");
3240
3241      }
3242
3243      void mergeOverlaps(Collection<HbckInfo> overlap)
3244          throws IOException {
3245        String thread = Thread.currentThread().getName();
3246        LOG.info("== [" + thread + "] Merging regions into one region: "
3247          + Joiner.on(",").join(overlap));
3248        // get the min / max range and close all concerned regions
3249        Pair<byte[], byte[]> range = null;
3250        for (HbckInfo hi : overlap) {
3251          if (range == null) {
3252            range = new Pair<>(hi.getStartKey(), hi.getEndKey());
3253          } else {
3254            if (RegionSplitCalculator.BYTES_COMPARATOR
3255                .compare(hi.getStartKey(), range.getFirst()) < 0) {
3256              range.setFirst(hi.getStartKey());
3257            }
3258            if (RegionSplitCalculator.BYTES_COMPARATOR
3259                .compare(hi.getEndKey(), range.getSecond()) > 0) {
3260              range.setSecond(hi.getEndKey());
3261            }
3262          }
3263          // need to close files so delete can happen.
3264          LOG.debug("[" + thread + "] Closing region before moving data around: " +  hi);
3265          LOG.debug("[" + thread + "] Contained region dir before close");
3266          debugLsr(hi.getHdfsRegionDir());
3267          try {
3268            LOG.info("[" + thread + "] Closing region: " + hi);
3269            closeRegion(hi);
3270          } catch (IOException ioe) {
3271            LOG.warn("[" + thread + "] Was unable to close region " + hi
3272              + ".  Just continuing... ", ioe);
3273          } catch (InterruptedException e) {
3274            LOG.warn("[" + thread + "] Was unable to close region " + hi
3275              + ".  Just continuing... ", e);
3276          }
3277
3278          try {
3279            LOG.info("[" + thread + "] Offlining region: " + hi);
3280            offline(hi.getRegionName());
3281          } catch (IOException ioe) {
3282            LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
3283              + ".  Just continuing... ", ioe);
3284          }
3285        }
3286
3287        // create new empty container region.
3288        TableDescriptor htd = getTableInfo().getHTD();
3289        // from start key to end Key
3290        RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
3291            .setStartKey(range.getFirst())
3292            .setEndKey(range.getSecond())
3293            .build();
3294        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
3295        LOG.info("[" + thread + "] Created new empty container region: " +
3296            newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
3297        debugLsr(region.getRegionFileSystem().getRegionDir());
3298
3299        // all target regions are closed, should be able to safely cleanup.
3300        boolean didFix= false;
3301        Path target = region.getRegionFileSystem().getRegionDir();
3302        for (HbckInfo contained : overlap) {
3303          LOG.info("[" + thread + "] Merging " + contained  + " into " + target );
3304          int merges = mergeRegionDirs(target, contained);
3305          if (merges > 0) {
3306            didFix = true;
3307          }
3308        }
3309        if (didFix) {
3310          fixes++;
3311        }
3312      }
3313
3314      /**
3315       * Sideline some regions in a big overlap group so that it
3316       * will have fewer regions, and it is easier to merge them later on.
3317       *
3318       * @param bigOverlap the overlapped group with regions more than maxMerge
3319       * @throws IOException
3320       */
3321      void sidelineBigOverlaps(
3322          Collection<HbckInfo> bigOverlap) throws IOException {
3323        int overlapsToSideline = bigOverlap.size() - maxMerge;
3324        if (overlapsToSideline > maxOverlapsToSideline) {
3325          overlapsToSideline = maxOverlapsToSideline;
3326        }
3327        List<HbckInfo> regionsToSideline =
3328          RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
3329        FileSystem fs = FileSystem.get(conf);
3330        for (HbckInfo regionToSideline: regionsToSideline) {
3331          try {
3332            LOG.info("Closing region: " + regionToSideline);
3333            closeRegion(regionToSideline);
3334          } catch (IOException ioe) {
3335            LOG.warn("Was unable to close region " + regionToSideline
3336              + ".  Just continuing... ", ioe);
3337          } catch (InterruptedException e) {
3338            LOG.warn("Was unable to close region " + regionToSideline
3339              + ".  Just continuing... ", e);
3340          }
3341
3342          try {
3343            LOG.info("Offlining region: " + regionToSideline);
3344            offline(regionToSideline.getRegionName());
3345          } catch (IOException ioe) {
3346            LOG.warn("Unable to offline region from master: " + regionToSideline
3347              + ".  Just continuing... ", ioe);
3348          }
3349
3350          LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
3351          Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
3352          if (sidelineRegionDir != null) {
3353            sidelinedRegions.put(sidelineRegionDir, regionToSideline);
3354            LOG.info("After sidelined big overlapped region: "
3355              + regionToSideline.getRegionNameAsString()
3356              + " to " + sidelineRegionDir.toString());
3357            fixes++;
3358          }
3359        }
3360      }
3361    }
3362
3363    /**
3364     * Check the region chain (from META) of this table.  We are looking for
3365     * holes, overlaps, and cycles.
3366     * @return false if there are errors
3367     * @throws IOException
3368     */
3369    public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
3370      // When table is disabled no need to check for the region chain. Some of the regions
3371      // accidently if deployed, this below code might report some issues like missing start
3372      // or end regions or region hole in chain and may try to fix which is unwanted.
3373      if (isTableDisabled(this.tableName)) {
3374        return true;
3375      }
3376      int originalErrorsCount = errors.getErrorList().size();
3377      Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
3378      SortedSet<byte[]> splits = sc.getSplits();
3379
3380      byte[] prevKey = null;
3381      byte[] problemKey = null;
3382
3383      if (splits.isEmpty()) {
3384        // no region for this table
3385        handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
3386      }
3387
3388      for (byte[] key : splits) {
3389        Collection<HbckInfo> ranges = regions.get(key);
3390        if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
3391          for (HbckInfo rng : ranges) {
3392            handler.handleRegionStartKeyNotEmpty(rng);
3393          }
3394        }
3395
3396        // check for degenerate ranges
3397        for (HbckInfo rng : ranges) {
3398          // special endkey case converts '' to null
3399          byte[] endKey = rng.getEndKey();
3400          endKey = (endKey.length == 0) ? null : endKey;
3401          if (Bytes.equals(rng.getStartKey(),endKey)) {
3402            handler.handleDegenerateRegion(rng);
3403          }
3404        }
3405
3406        if (ranges.size() == 1) {
3407          // this split key is ok -- no overlap, not a hole.
3408          if (problemKey != null) {
3409            LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3410          }
3411          problemKey = null; // fell through, no more problem.
3412        } else if (ranges.size() > 1) {
3413          // set the new problem key group name, if already have problem key, just
3414          // keep using it.
3415          if (problemKey == null) {
3416            // only for overlap regions.
3417            LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
3418            problemKey = key;
3419          }
3420          overlapGroups.putAll(problemKey, ranges);
3421
3422          // record errors
3423          ArrayList<HbckInfo> subRange = new ArrayList<>(ranges);
3424          //  this dumb and n^2 but this shouldn't happen often
3425          for (HbckInfo r1 : ranges) {
3426            if (r1.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) continue;
3427            subRange.remove(r1);
3428            for (HbckInfo r2 : subRange) {
3429              if (r2.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) continue;
3430              // general case of same start key
3431              if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
3432                handler.handleDuplicateStartKeys(r1,r2);
3433              } else if (Bytes.compareTo(r1.getEndKey(), r2.getStartKey())==0 &&
3434                r1.getHdfsHRI().getRegionId() == r2.getHdfsHRI().getRegionId()) {
3435                LOG.info("this is a split, log to splits");
3436                handler.handleSplit(r1, r2);
3437              } else {
3438                // overlap
3439                handler.handleOverlapInRegionChain(r1, r2);
3440              }
3441            }
3442          }
3443
3444        } else if (ranges.isEmpty()) {
3445          if (problemKey != null) {
3446            LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3447          }
3448          problemKey = null;
3449
3450          byte[] holeStopKey = sc.getSplits().higher(key);
3451          // if higher key is null we reached the top.
3452          if (holeStopKey != null) {
3453            // hole
3454            handler.handleHoleInRegionChain(key, holeStopKey);
3455          }
3456        }
3457        prevKey = key;
3458      }
3459
3460      // When the last region of a table is proper and having an empty end key, 'prevKey'
3461      // will be null.
3462      if (prevKey != null) {
3463        handler.handleRegionEndKeyNotEmpty(prevKey);
3464      }
3465
3466      // TODO fold this into the TableIntegrityHandler
3467      if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
3468        boolean ok = handleOverlapsParallel(handler, prevKey);
3469        if (!ok) {
3470          return false;
3471        }
3472      } else {
3473        for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3474          handler.handleOverlapGroup(overlap);
3475        }
3476      }
3477
3478      if (details) {
3479        // do full region split map dump
3480        errors.print("---- Table '"  +  this.tableName
3481            + "': region split map");
3482        dump(splits, regions);
3483        errors.print("---- Table '"  +  this.tableName
3484            + "': overlap groups");
3485        dumpOverlapProblems(overlapGroups);
3486        errors.print("There are " + overlapGroups.keySet().size()
3487            + " overlap groups with " + overlapGroups.size()
3488            + " overlapping regions");
3489      }
3490      if (!sidelinedRegions.isEmpty()) {
3491        LOG.warn("Sidelined big overlapped regions, please bulk load them!");
3492        errors.print("---- Table '"  +  this.tableName
3493            + "': sidelined big overlapped regions");
3494        dumpSidelinedRegions(sidelinedRegions);
3495      }
3496      return errors.getErrorList().size() == originalErrorsCount;
3497    }
3498
3499    private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
3500        throws IOException {
3501      // we parallelize overlap handler for the case we have lots of groups to fix.  We can
3502      // safely assume each group is independent.
3503      List<WorkItemOverlapMerge> merges = new ArrayList<>(overlapGroups.size());
3504      List<Future<Void>> rets;
3505      for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3506        //
3507        merges.add(new WorkItemOverlapMerge(overlap, handler));
3508      }
3509      try {
3510        rets = executor.invokeAll(merges);
3511      } catch (InterruptedException e) {
3512        LOG.error("Overlap merges were interrupted", e);
3513        return false;
3514      }
3515      for(int i=0; i<merges.size(); i++) {
3516        WorkItemOverlapMerge work = merges.get(i);
3517        Future<Void> f = rets.get(i);
3518        try {
3519          f.get();
3520        } catch(ExecutionException e) {
3521          LOG.warn("Failed to merge overlap group" + work, e.getCause());
3522        } catch (InterruptedException e) {
3523          LOG.error("Waiting for overlap merges was interrupted", e);
3524          return false;
3525        }
3526      }
3527      return true;
3528    }
3529
3530    /**
3531     * This dumps data in a visually reasonable way for visual debugging
3532     *
3533     * @param splits
3534     * @param regions
3535     */
3536    void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
3537      // we display this way because the last end key should be displayed as well.
3538      StringBuilder sb = new StringBuilder();
3539      for (byte[] k : splits) {
3540        sb.setLength(0); // clear out existing buffer, if any.
3541        sb.append(Bytes.toStringBinary(k) + ":\t");
3542        for (HbckInfo r : regions.get(k)) {
3543          sb.append("[ "+ r.toString() + ", "
3544              + Bytes.toStringBinary(r.getEndKey())+ "]\t");
3545        }
3546        errors.print(sb.toString());
3547      }
3548    }
3549  }
3550
3551  public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
3552    // we display this way because the last end key should be displayed as
3553    // well.
3554    for (byte[] k : regions.keySet()) {
3555      errors.print(Bytes.toStringBinary(k) + ":");
3556      for (HbckInfo r : regions.get(k)) {
3557        errors.print("[ " + r.toString() + ", "
3558            + Bytes.toStringBinary(r.getEndKey()) + "]");
3559      }
3560      errors.print("----");
3561    }
3562  }
3563
3564  public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
3565    for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
3566      TableName tableName = entry.getValue().getTableName();
3567      Path path = entry.getKey();
3568      errors.print("This sidelined region dir should be bulk loaded: "
3569        + path.toString());
3570      errors.print("Bulk load command looks like: "
3571        + "hbase org.apache.hadoop.hbase.tool.LoadIncrementalHFiles "
3572        + path.toUri().getPath() + " "+ tableName);
3573    }
3574  }
3575
3576  public Multimap<byte[], HbckInfo> getOverlapGroups(
3577      TableName table) {
3578    TableInfo ti = tablesInfo.get(table);
3579    return ti.overlapGroups;
3580  }
3581
3582  /**
3583   * Return a list of user-space table names whose metadata have not been
3584   * modified in the last few milliseconds specified by timelag
3585   * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
3586   * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
3587   * milliseconds specified by timelag, then the table is a candidate to be returned.
3588   * @return tables that have not been modified recently
3589   * @throws IOException if an error is encountered
3590   */
3591  TableDescriptor[] getTables(AtomicInteger numSkipped) {
3592    List<TableName> tableNames = new ArrayList<>();
3593    long now = EnvironmentEdgeManager.currentTime();
3594
3595    for (HbckInfo hbi : regionInfoMap.values()) {
3596      MetaEntry info = hbi.metaEntry;
3597
3598      // if the start key is zero, then we have found the first region of a table.
3599      // pick only those tables that were not modified in the last few milliseconds.
3600      if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
3601        if (info.modTime + timelag < now) {
3602          tableNames.add(info.getTable());
3603        } else {
3604          numSkipped.incrementAndGet(); // one more in-flux table
3605        }
3606      }
3607    }
3608    return getTableDescriptors(tableNames);
3609  }
3610
3611  TableDescriptor[] getTableDescriptors(List<TableName> tableNames) {
3612      LOG.info("getTableDescriptors == tableNames => " + tableNames);
3613    try (Connection conn = ConnectionFactory.createConnection(getConf());
3614        Admin admin = conn.getAdmin()) {
3615      List<TableDescriptor> tds = admin.listTableDescriptors(tableNames);
3616      return tds.toArray(new TableDescriptor[tds.size()]);
3617    } catch (IOException e) {
3618      LOG.debug("Exception getting table descriptors", e);
3619    }
3620    return new TableDescriptor[0];
3621  }
3622
3623  /**
3624   * Gets the entry in regionInfo corresponding to the the given encoded
3625   * region name. If the region has not been seen yet, a new entry is added
3626   * and returned.
3627   */
3628  private synchronized HbckInfo getOrCreateInfo(String name) {
3629    HbckInfo hbi = regionInfoMap.get(name);
3630    if (hbi == null) {
3631      hbi = new HbckInfo(null);
3632      regionInfoMap.put(name, hbi);
3633    }
3634    return hbi;
3635  }
3636
3637  private void checkAndFixReplication() throws ReplicationException {
3638    ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, errors);
3639    checker.checkUnDeletedQueues();
3640
3641    if (checker.hasUnDeletedQueues() && this.fixReplication) {
3642      checker.fixUnDeletedQueues();
3643      setShouldRerun();
3644    }
3645  }
3646
3647  /**
3648    * Check values in regionInfo for hbase:meta
3649    * Check if zero or more than one regions with hbase:meta are found.
3650    * If there are inconsistencies (i.e. zero or more than one regions
3651    * pretend to be holding the hbase:meta) try to fix that and report an error.
3652    * @throws IOException from HBaseFsckRepair functions
3653    * @throws KeeperException
3654    * @throws InterruptedException
3655    */
3656  boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3657    Map<Integer, HbckInfo> metaRegions = new HashMap<>();
3658    for (HbckInfo value : regionInfoMap.values()) {
3659      if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3660        metaRegions.put(value.getReplicaId(), value);
3661      }
3662    }
3663    int metaReplication = admin.getDescriptor(TableName.META_TABLE_NAME)
3664        .getRegionReplication();
3665    boolean noProblem = true;
3666    // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
3667    // Check the deployed servers. It should be exactly one server for each replica.
3668    for (int i = 0; i < metaReplication; i++) {
3669      HbckInfo metaHbckInfo = metaRegions.remove(i);
3670      List<ServerName> servers = new ArrayList<>();
3671      if (metaHbckInfo != null) {
3672        servers = metaHbckInfo.deployedOn;
3673      }
3674      if (servers.size() != 1) {
3675        noProblem = false;
3676        if (servers.isEmpty()) {
3677          assignMetaReplica(i);
3678        } else if (servers.size() > 1) {
3679          errors
3680          .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
3681                       metaHbckInfo.getReplicaId() + " is found on more than one region.");
3682          if (shouldFixAssignments()) {
3683            errors.print("Trying to fix a problem with hbase:meta, replicaId " +
3684                         metaHbckInfo.getReplicaId() +"..");
3685            setShouldRerun();
3686            // try fix it (treat is a dupe assignment)
3687            HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers);
3688          }
3689        }
3690      }
3691    }
3692    // unassign whatever is remaining in metaRegions. They are excess replicas.
3693    for (Map.Entry<Integer, HbckInfo> entry : metaRegions.entrySet()) {
3694      noProblem = false;
3695      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
3696          "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
3697          ", deployed " + metaRegions.size());
3698      if (shouldFixAssignments()) {
3699        errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
3700            " of hbase:meta..");
3701        setShouldRerun();
3702        unassignMetaReplica(entry.getValue());
3703      }
3704    }
3705    // if noProblem is false, rerun hbck with hopefully fixed META
3706    // if noProblem is true, no errors, so continue normally
3707    return noProblem;
3708  }
3709
3710  private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException,
3711  KeeperException {
3712    undeployRegions(hi);
3713    ZKUtil.deleteNode(zkw, zkw.getZNodePaths().getZNodeForReplica(hi.metaEntry.getReplicaId()));
3714  }
3715
3716  private void assignMetaReplica(int replicaId)
3717      throws IOException, KeeperException, InterruptedException {
3718    errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
3719        replicaId +" is not found on any region.");
3720    if (shouldFixAssignments()) {
3721      errors.print("Trying to fix a problem with hbase:meta..");
3722      setShouldRerun();
3723      // try to fix it (treat it as unassigned region)
3724      RegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
3725          RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId);
3726      HBaseFsckRepair.fixUnassigned(admin, h);
3727      HBaseFsckRepair.waitUntilAssigned(admin, h);
3728    }
3729  }
3730
3731  /**
3732   * Scan hbase:meta, adding all regions found to the regionInfo map.
3733   * @throws IOException if an error is encountered
3734   */
3735  boolean loadMetaEntries() throws IOException {
3736    MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() {
3737      int countRecord = 1;
3738
3739      // comparator to sort KeyValues with latest modtime
3740      final Comparator<Cell> comp = new Comparator<Cell>() {
3741        @Override
3742        public int compare(Cell k1, Cell k2) {
3743          return Long.compare(k1.getTimestamp(), k2.getTimestamp());
3744        }
3745      };
3746
3747      @Override
3748      public boolean visit(Result result) throws IOException {
3749        try {
3750
3751          // record the latest modification of this META record
3752          long ts =  Collections.max(result.listCells(), comp).getTimestamp();
3753          RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
3754          if (rl == null) {
3755            emptyRegionInfoQualifiers.add(result);
3756            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3757              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3758            return true;
3759          }
3760          ServerName sn = null;
3761          if (rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID) == null ||
3762              rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) {
3763            emptyRegionInfoQualifiers.add(result);
3764            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3765              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3766            return true;
3767          }
3768          RegionInfo hri = rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegionInfo();
3769          if (!(isTableIncluded(hri.getTable())
3770              || hri.isMetaRegion())) {
3771            return true;
3772          }
3773          PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(result);
3774          for (HRegionLocation h : rl.getRegionLocations()) {
3775            if (h == null || h.getRegionInfo() == null) {
3776              continue;
3777            }
3778            sn = h.getServerName();
3779            hri = h.getRegionInfo();
3780
3781            MetaEntry m = null;
3782            if (hri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
3783              m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3784            } else {
3785              m = new MetaEntry(hri, sn, ts, null, null);
3786            }
3787            HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3788            if (previous == null) {
3789              regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3790            } else if (previous.metaEntry == null) {
3791              previous.metaEntry = m;
3792            } else {
3793              throw new IOException("Two entries in hbase:meta are same " + previous);
3794            }
3795          }
3796          PairOfSameType<RegionInfo> mergeRegions = MetaTableAccessor.getMergeRegions(result);
3797          for (RegionInfo mergeRegion : new RegionInfo[] {
3798              mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3799            if (mergeRegion != null) {
3800              // This region is already been merged
3801              HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3802              hbInfo.setMerged(true);
3803            }
3804          }
3805
3806          // show proof of progress to the user, once for every 100 records.
3807          if (countRecord % 100 == 0) {
3808            errors.progress();
3809          }
3810          countRecord++;
3811          return true;
3812        } catch (RuntimeException e) {
3813          LOG.error("Result=" + result);
3814          throw e;
3815        }
3816      }
3817    };
3818    if (!checkMetaOnly) {
3819      // Scan hbase:meta to pick up user regions
3820      MetaTableAccessor.fullScanRegions(connection, visitor);
3821    }
3822
3823    errors.print("");
3824    return true;
3825  }
3826
3827  /**
3828   * Stores the regioninfo entries scanned from META
3829   */
3830  static class MetaEntry extends HRegionInfo {
3831    ServerName regionServer;   // server hosting this region
3832    long modTime;          // timestamp of most recent modification metadata
3833    RegionInfo splitA, splitB; //split daughters
3834
3835    public MetaEntry(RegionInfo rinfo, ServerName regionServer, long modTime) {
3836      this(rinfo, regionServer, modTime, null, null);
3837    }
3838
3839    public MetaEntry(RegionInfo rinfo, ServerName regionServer, long modTime,
3840        RegionInfo splitA, RegionInfo splitB) {
3841      super(rinfo);
3842      this.regionServer = regionServer;
3843      this.modTime = modTime;
3844      this.splitA = splitA;
3845      this.splitB = splitB;
3846    }
3847
3848    @Override
3849    public boolean equals(Object o) {
3850      boolean superEq = super.equals(o);
3851      if (!superEq) {
3852        return superEq;
3853      }
3854
3855      MetaEntry me = (MetaEntry) o;
3856      if (!regionServer.equals(me.regionServer)) {
3857        return false;
3858      }
3859      return (modTime == me.modTime);
3860    }
3861
3862    @Override
3863    public int hashCode() {
3864      int hash = Arrays.hashCode(getRegionName());
3865      hash = (int) (hash ^ getRegionId());
3866      hash ^= Arrays.hashCode(getStartKey());
3867      hash ^= Arrays.hashCode(getEndKey());
3868      hash ^= Boolean.valueOf(isOffline()).hashCode();
3869      hash ^= getTable().hashCode();
3870      if (regionServer != null) {
3871        hash ^= regionServer.hashCode();
3872      }
3873      hash = (int) (hash ^ modTime);
3874      return hash;
3875    }
3876  }
3877
3878  /**
3879   * Stores the regioninfo entries from HDFS
3880   */
3881  static class HdfsEntry {
3882    RegionInfo hri;
3883    Path hdfsRegionDir = null;
3884    long hdfsRegionDirModTime  = 0;
3885    boolean hdfsRegioninfoFilePresent = false;
3886    boolean hdfsOnlyEdits = false;
3887  }
3888
3889  /**
3890   * Stores the regioninfo retrieved from Online region servers.
3891   */
3892  static class OnlineEntry {
3893    RegionInfo hri;
3894    ServerName hsa;
3895
3896    @Override
3897    public String toString() {
3898      return hsa.toString() + ";" + hri.getRegionNameAsString();
3899    }
3900  }
3901
3902  /**
3903   * Maintain information about a particular region.  It gathers information
3904   * from three places -- HDFS, META, and region servers.
3905   */
3906  public static class HbckInfo implements KeyRange {
3907    private MetaEntry metaEntry = null; // info in META
3908    private HdfsEntry hdfsEntry = null; // info in HDFS
3909    private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
3910    private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
3911    private boolean skipChecks = false; // whether to skip further checks to this region info.
3912    private boolean isMerged = false;// whether this region has already been merged into another one
3913    private int deployedReplicaId = RegionInfo.DEFAULT_REPLICA_ID;
3914    private RegionInfo primaryHRIForDeployedReplica = null;
3915
3916    HbckInfo(MetaEntry metaEntry) {
3917      this.metaEntry = metaEntry;
3918    }
3919
3920    public synchronized int getReplicaId() {
3921      return metaEntry != null? metaEntry.getReplicaId(): deployedReplicaId;
3922    }
3923
3924    public synchronized void addServer(RegionInfo hri, ServerName server) {
3925      OnlineEntry rse = new OnlineEntry() ;
3926      rse.hri = hri;
3927      rse.hsa = server;
3928      this.deployedEntries.add(rse);
3929      this.deployedOn.add(server);
3930      // save the replicaId that we see deployed in the cluster
3931      this.deployedReplicaId = hri.getReplicaId();
3932      this.primaryHRIForDeployedReplica =
3933          RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
3934    }
3935
3936    @Override
3937    public synchronized String toString() {
3938      StringBuilder sb = new StringBuilder();
3939      sb.append("{ meta => ");
3940      sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3941      sb.append( ", hdfs => " + getHdfsRegionDir());
3942      sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3943      sb.append( ", replicaId => " + getReplicaId());
3944      sb.append(" }");
3945      return sb.toString();
3946    }
3947
3948    @Override
3949    public byte[] getStartKey() {
3950      if (this.metaEntry != null) {
3951        return this.metaEntry.getStartKey();
3952      } else if (this.hdfsEntry != null) {
3953        return this.hdfsEntry.hri.getStartKey();
3954      } else {
3955        LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3956        return null;
3957      }
3958    }
3959
3960    @Override
3961    public byte[] getEndKey() {
3962      if (this.metaEntry != null) {
3963        return this.metaEntry.getEndKey();
3964      } else if (this.hdfsEntry != null) {
3965        return this.hdfsEntry.hri.getEndKey();
3966      } else {
3967        LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3968        return null;
3969      }
3970    }
3971
3972    public TableName getTableName() {
3973      if (this.metaEntry != null) {
3974        return this.metaEntry.getTable();
3975      } else if (this.hdfsEntry != null) {
3976        // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3977        // so we get the name from the Path
3978        Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3979        return FSUtils.getTableName(tableDir);
3980      } else {
3981        // return the info from the first online/deployed hri
3982        for (OnlineEntry e : deployedEntries) {
3983          return e.hri.getTable();
3984        }
3985        return null;
3986      }
3987    }
3988
3989    public String getRegionNameAsString() {
3990      if (metaEntry != null) {
3991        return metaEntry.getRegionNameAsString();
3992      } else if (hdfsEntry != null) {
3993        if (hdfsEntry.hri != null) {
3994          return hdfsEntry.hri.getRegionNameAsString();
3995        }
3996      } else {
3997        // return the info from the first online/deployed hri
3998        for (OnlineEntry e : deployedEntries) {
3999          return e.hri.getRegionNameAsString();
4000        }
4001      }
4002      return null;
4003    }
4004
4005    public byte[] getRegionName() {
4006      if (metaEntry != null) {
4007        return metaEntry.getRegionName();
4008      } else if (hdfsEntry != null) {
4009        return hdfsEntry.hri.getRegionName();
4010      } else {
4011        // return the info from the first online/deployed hri
4012        for (OnlineEntry e : deployedEntries) {
4013          return e.hri.getRegionName();
4014        }
4015        return null;
4016      }
4017    }
4018
4019    public RegionInfo getPrimaryHRIForDeployedReplica() {
4020      return primaryHRIForDeployedReplica;
4021    }
4022
4023    Path getHdfsRegionDir() {
4024      if (hdfsEntry == null) {
4025        return null;
4026      }
4027      return hdfsEntry.hdfsRegionDir;
4028    }
4029
4030    boolean containsOnlyHdfsEdits() {
4031      if (hdfsEntry == null) {
4032        return false;
4033      }
4034      return hdfsEntry.hdfsOnlyEdits;
4035    }
4036
4037    boolean isHdfsRegioninfoPresent() {
4038      if (hdfsEntry == null) {
4039        return false;
4040      }
4041      return hdfsEntry.hdfsRegioninfoFilePresent;
4042    }
4043
4044    long getModTime() {
4045      if (hdfsEntry == null) {
4046        return 0;
4047      }
4048      return hdfsEntry.hdfsRegionDirModTime;
4049    }
4050
4051    RegionInfo getHdfsHRI() {
4052      if (hdfsEntry == null) {
4053        return null;
4054      }
4055      return hdfsEntry.hri;
4056    }
4057
4058    public void setSkipChecks(boolean skipChecks) {
4059      this.skipChecks = skipChecks;
4060    }
4061
4062    public boolean isSkipChecks() {
4063      return skipChecks;
4064    }
4065
4066    public void setMerged(boolean isMerged) {
4067      this.isMerged = isMerged;
4068    }
4069
4070    public boolean isMerged() {
4071      return this.isMerged;
4072    }
4073  }
4074
4075  final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
4076    @Override
4077    public int compare(HbckInfo l, HbckInfo r) {
4078      if (l == r) {
4079        // same instance
4080        return 0;
4081      }
4082
4083      int tableCompare = l.getTableName().compareTo(r.getTableName());
4084      if (tableCompare != 0) {
4085        return tableCompare;
4086      }
4087
4088      int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
4089          l.getStartKey(), r.getStartKey());
4090      if (startComparison != 0) {
4091        return startComparison;
4092      }
4093
4094      // Special case for absolute endkey
4095      byte[] endKey = r.getEndKey();
4096      endKey = (endKey.length == 0) ? null : endKey;
4097      byte[] endKey2 = l.getEndKey();
4098      endKey2 = (endKey2.length == 0) ? null : endKey2;
4099      int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
4100          endKey2,  endKey);
4101
4102      if (endComparison != 0) {
4103        return endComparison;
4104      }
4105
4106      // use regionId as tiebreaker.
4107      // Null is considered after all possible values so make it bigger.
4108      if (l.hdfsEntry == null && r.hdfsEntry == null) {
4109        return 0;
4110      }
4111      if (l.hdfsEntry == null && r.hdfsEntry != null) {
4112        return 1;
4113      }
4114      // l.hdfsEntry must not be null
4115      if (r.hdfsEntry == null) {
4116        return -1;
4117      }
4118      // both l.hdfsEntry and r.hdfsEntry must not be null.
4119      return Long.compare(l.hdfsEntry.hri.getRegionId(), r.hdfsEntry.hri.getRegionId());
4120    }
4121  };
4122
4123  /**
4124   * Prints summary of all tables found on the system.
4125   */
4126  private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
4127    StringBuilder sb = new StringBuilder();
4128    int numOfSkippedRegions;
4129    errors.print("Summary:");
4130    for (TableInfo tInfo : tablesInfo.values()) {
4131      numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
4132          skippedRegions.get(tInfo.getName()).size() : 0;
4133
4134      if (errors.tableHasErrors(tInfo)) {
4135        errors.print("Table " + tInfo.getName() + " is inconsistent.");
4136      } else if (numOfSkippedRegions > 0){
4137        errors.print("Table " + tInfo.getName() + " is okay (with "
4138          + numOfSkippedRegions + " skipped regions).");
4139      }
4140      else {
4141        errors.print("Table " + tInfo.getName() + " is okay.");
4142      }
4143      errors.print("    Number of regions: " + tInfo.getNumRegions());
4144      if (numOfSkippedRegions > 0) {
4145        Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
4146        System.out.println("    Number of skipped regions: " + numOfSkippedRegions);
4147        System.out.println("      List of skipped regions:");
4148        for(String sr : skippedRegionStrings) {
4149          System.out.println("        " + sr);
4150        }
4151      }
4152      sb.setLength(0); // clear out existing buffer, if any.
4153      sb.append("    Deployed on: ");
4154      for (ServerName server : tInfo.deployedOn) {
4155        sb.append(" " + server.toString());
4156      }
4157      errors.print(sb.toString());
4158    }
4159  }
4160
4161  static ErrorReporter getErrorReporter(
4162      final Configuration conf) throws ClassNotFoundException {
4163    Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
4164    return ReflectionUtils.newInstance(reporter, conf);
4165  }
4166
4167  public interface ErrorReporter {
4168    enum ERROR_CODE {
4169      UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
4170      NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META,
4171      NOT_DEPLOYED, MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
4172      FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
4173      HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
4174      ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
4175      LINGERING_HFILELINK, WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR,
4176      ORPHAN_TABLE_STATE, NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE, DUPE_ENDKEYS,
4177      UNSUPPORTED_OPTION, INVALID_TABLE
4178    }
4179    void clear();
4180    void report(String message);
4181    void reportError(String message);
4182    void reportError(ERROR_CODE errorCode, String message);
4183    void reportError(ERROR_CODE errorCode, String message, TableInfo table);
4184    void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
4185    void reportError(
4186      ERROR_CODE errorCode,
4187      String message,
4188      TableInfo table,
4189      HbckInfo info1,
4190      HbckInfo info2
4191    );
4192    int summarize();
4193    void detail(String details);
4194    ArrayList<ERROR_CODE> getErrorList();
4195    void progress();
4196    void print(String message);
4197    void resetErrors();
4198    boolean tableHasErrors(TableInfo table);
4199  }
4200
4201  static class PrintingErrorReporter implements ErrorReporter {
4202    public int errorCount = 0;
4203    private int showProgress;
4204    // How frequently calls to progress() will create output
4205    private static final int progressThreshold = 100;
4206
4207    Set<TableInfo> errorTables = new HashSet<>();
4208
4209    // for use by unit tests to verify which errors were discovered
4210    private ArrayList<ERROR_CODE> errorList = new ArrayList<>();
4211
4212    @Override
4213    public void clear() {
4214      errorTables.clear();
4215      errorList.clear();
4216      errorCount = 0;
4217    }
4218
4219    @Override
4220    public synchronized void reportError(ERROR_CODE errorCode, String message) {
4221      if (errorCode == ERROR_CODE.WRONG_USAGE) {
4222        System.err.println(message);
4223        return;
4224      }
4225
4226      errorList.add(errorCode);
4227      if (!summary) {
4228        System.out.println("ERROR: " + message);
4229      }
4230      errorCount++;
4231      showProgress = 0;
4232    }
4233
4234    @Override
4235    public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
4236      errorTables.add(table);
4237      reportError(errorCode, message);
4238    }
4239
4240    @Override
4241    public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
4242                                         HbckInfo info) {
4243      errorTables.add(table);
4244      String reference = "(region " + info.getRegionNameAsString() + ")";
4245      reportError(errorCode, reference + " " + message);
4246    }
4247
4248    @Override
4249    public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
4250                                         HbckInfo info1, HbckInfo info2) {
4251      errorTables.add(table);
4252      String reference = "(regions " + info1.getRegionNameAsString()
4253          + " and " + info2.getRegionNameAsString() + ")";
4254      reportError(errorCode, reference + " " + message);
4255    }
4256
4257    @Override
4258    public synchronized void reportError(String message) {
4259      reportError(ERROR_CODE.UNKNOWN, message);
4260    }
4261
4262    /**
4263     * Report error information, but do not increment the error count.  Intended for cases
4264     * where the actual error would have been reported previously.
4265     * @param message
4266     */
4267    @Override
4268    public synchronized void report(String message) {
4269      if (! summary) {
4270        System.out.println("ERROR: " + message);
4271      }
4272      showProgress = 0;
4273    }
4274
4275    @Override
4276    public synchronized int summarize() {
4277      System.out.println(Integer.toString(errorCount) +
4278                         " inconsistencies detected.");
4279      if (errorCount == 0) {
4280        System.out.println("Status: OK");
4281        return 0;
4282      } else {
4283        System.out.println("Status: INCONSISTENT");
4284        return -1;
4285      }
4286    }
4287
4288    @Override
4289    public ArrayList<ERROR_CODE> getErrorList() {
4290      return errorList;
4291    }
4292
4293    @Override
4294    public synchronized void print(String message) {
4295      if (!summary) {
4296        System.out.println(message);
4297      }
4298    }
4299
4300    @Override
4301    public boolean tableHasErrors(TableInfo table) {
4302      return errorTables.contains(table);
4303    }
4304
4305    @Override
4306    public void resetErrors() {
4307      errorCount = 0;
4308    }
4309
4310    @Override
4311    public synchronized void detail(String message) {
4312      if (details) {
4313        System.out.println(message);
4314      }
4315      showProgress = 0;
4316    }
4317
4318    @Override
4319    public synchronized void progress() {
4320      if (showProgress++ == progressThreshold) {
4321        if (!summary) {
4322          System.out.print(".");
4323        }
4324        showProgress = 0;
4325      }
4326    }
4327  }
4328
4329  /**
4330   * Contact a region server and get all information from it
4331   */
4332  static class WorkItemRegion implements Callable<Void> {
4333    private final HBaseFsck hbck;
4334    private final ServerName rsinfo;
4335    private final ErrorReporter errors;
4336    private final ClusterConnection connection;
4337
4338    WorkItemRegion(HBaseFsck hbck, ServerName info,
4339                   ErrorReporter errors, ClusterConnection connection) {
4340      this.hbck = hbck;
4341      this.rsinfo = info;
4342      this.errors = errors;
4343      this.connection = connection;
4344    }
4345
4346    @Override
4347    public synchronized Void call() throws IOException {
4348      errors.progress();
4349      try {
4350        BlockingInterface server = connection.getAdmin(rsinfo);
4351
4352        // list all online regions from this region server
4353        List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
4354        regions = filterRegions(regions);
4355
4356        if (details) {
4357          errors.detail("RegionServer: " + rsinfo.getServerName() +
4358                           " number of regions: " + regions.size());
4359          for (RegionInfo rinfo: regions) {
4360            errors.detail("  " + rinfo.getRegionNameAsString() +
4361                             " id: " + rinfo.getRegionId() +
4362                             " encoded_name: " + rinfo.getEncodedName() +
4363                             " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
4364                             " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
4365          }
4366        }
4367
4368        // check to see if the existence of this region matches the region in META
4369        for (RegionInfo r:regions) {
4370          HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
4371          hbi.addServer(r, rsinfo);
4372        }
4373      } catch (IOException e) {          // unable to connect to the region server.
4374        errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
4375          " Unable to fetch region information. " + e);
4376        throw e;
4377      }
4378      return null;
4379    }
4380
4381    private List<RegionInfo> filterRegions(List<RegionInfo> regions) {
4382      List<RegionInfo> ret = Lists.newArrayList();
4383      for (RegionInfo hri : regions) {
4384        if (hri.isMetaRegion() || (!hbck.checkMetaOnly
4385            && hbck.isTableIncluded(hri.getTable()))) {
4386          ret.add(hri);
4387        }
4388      }
4389      return ret;
4390    }
4391  }
4392
4393  /**
4394   * Contact hdfs and get all information about specified table directory into
4395   * regioninfo list.
4396   */
4397  class WorkItemHdfsDir implements Callable<Void> {
4398    private FileStatus tableDir;
4399    private ErrorReporter errors;
4400    private FileSystem fs;
4401
4402    WorkItemHdfsDir(FileSystem fs, ErrorReporter errors,
4403                    FileStatus status) {
4404      this.fs = fs;
4405      this.tableDir = status;
4406      this.errors = errors;
4407    }
4408
4409    @Override
4410    public synchronized Void call() throws InterruptedException, ExecutionException {
4411      final Vector<Exception> exceptions = new Vector<>();
4412
4413      try {
4414        final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
4415        final List<Future<?>> futures = new ArrayList<>(regionDirs.length);
4416
4417        for (final FileStatus regionDir : regionDirs) {
4418          errors.progress();
4419          final String encodedName = regionDir.getPath().getName();
4420          // ignore directories that aren't hexadecimal
4421          if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
4422            continue;
4423          }
4424
4425          if (!exceptions.isEmpty()) {
4426            break;
4427          }
4428
4429          futures.add(executor.submit(new Runnable() {
4430            @Override
4431            public void run() {
4432              try {
4433                LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
4434
4435                Path regioninfoFile = new Path(regionDir.getPath(), HRegionFileSystem.REGION_INFO_FILE);
4436                boolean regioninfoFileExists = fs.exists(regioninfoFile);
4437
4438                if (!regioninfoFileExists) {
4439                  // As tables become larger it is more and more likely that by the time you
4440                  // reach a given region that it will be gone due to region splits/merges.
4441                  if (!fs.exists(regionDir.getPath())) {
4442                    LOG.warn("By the time we tried to process this region dir it was already gone: "
4443                        + regionDir.getPath());
4444                    return;
4445                  }
4446                }
4447
4448                HbckInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
4449                HdfsEntry he = new HdfsEntry();
4450                synchronized (hbi) {
4451                  if (hbi.getHdfsRegionDir() != null) {
4452                    errors.print("Directory " + encodedName + " duplicate??" +
4453                                 hbi.getHdfsRegionDir());
4454                  }
4455
4456                  he.hdfsRegionDir = regionDir.getPath();
4457                  he.hdfsRegionDirModTime = regionDir.getModificationTime();
4458                  he.hdfsRegioninfoFilePresent = regioninfoFileExists;
4459                  // we add to orphan list when we attempt to read .regioninfo
4460
4461                  // Set a flag if this region contains only edits
4462                  // This is special case if a region is left after split
4463                  he.hdfsOnlyEdits = true;
4464                  FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
4465                  Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath());
4466                  for (FileStatus subDir : subDirs) {
4467                    errors.progress();
4468                    String sdName = subDir.getPath().getName();
4469                    if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
4470                      he.hdfsOnlyEdits = false;
4471                      break;
4472                    }
4473                  }
4474                  hbi.hdfsEntry = he;
4475                }
4476              } catch (Exception e) {
4477                LOG.error("Could not load region dir", e);
4478                exceptions.add(e);
4479              }
4480            }
4481          }));
4482        }
4483
4484        // Ensure all pending tasks are complete (or that we run into an exception)
4485        for (Future<?> f : futures) {
4486          if (!exceptions.isEmpty()) {
4487            break;
4488          }
4489          try {
4490            f.get();
4491          } catch (ExecutionException e) {
4492            LOG.error("Unexpected exec exception!  Should've been caught already.  (Bug?)", e);
4493            // Shouldn't happen, we already logged/caught any exceptions in the Runnable
4494          }
4495        }
4496      } catch (IOException e) {
4497        LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
4498        exceptions.add(e);
4499      } finally {
4500        if (!exceptions.isEmpty()) {
4501          errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
4502              + tableDir.getPath().getName()
4503              + " Unable to fetch all HDFS region information. ");
4504          // Just throw the first exception as an indication something bad happened
4505          // Don't need to propagate all the exceptions, we already logged them all anyway
4506          throw new ExecutionException("First exception in WorkItemHdfsDir", exceptions.firstElement());
4507        }
4508      }
4509      return null;
4510    }
4511  }
4512
4513  /**
4514   * Contact hdfs and get all information about specified table directory into
4515   * regioninfo list.
4516   */
4517  static class WorkItemHdfsRegionInfo implements Callable<Void> {
4518    private HbckInfo hbi;
4519    private HBaseFsck hbck;
4520    private ErrorReporter errors;
4521
4522    WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
4523      this.hbi = hbi;
4524      this.hbck = hbck;
4525      this.errors = errors;
4526    }
4527
4528    @Override
4529    public synchronized Void call() throws IOException {
4530      // only load entries that haven't been loaded yet.
4531      if (hbi.getHdfsHRI() == null) {
4532        try {
4533          errors.progress();
4534          hbck.loadHdfsRegioninfo(hbi);
4535        } catch (IOException ioe) {
4536          String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
4537              + hbi.getTableName() + " in hdfs dir "
4538              + hbi.getHdfsRegionDir()
4539              + "!  It may be an invalid format or version file.  Treating as "
4540              + "an orphaned regiondir.";
4541          errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
4542          try {
4543            hbck.debugLsr(hbi.getHdfsRegionDir());
4544          } catch (IOException ioe2) {
4545            LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
4546            throw ioe2;
4547          }
4548          hbck.orphanHdfsDirs.add(hbi);
4549          throw ioe;
4550        }
4551      }
4552      return null;
4553    }
4554  }
4555
4556  /**
4557   * Display the full report from fsck. This displays all live and dead region
4558   * servers, and all known regions.
4559   */
4560  public static void setDisplayFullReport() {
4561    details = true;
4562  }
4563
4564  /**
4565   * Set exclusive mode.
4566   */
4567  public static void setForceExclusive() {
4568    forceExclusive = true;
4569  }
4570
4571  /**
4572   * Only one instance of hbck can modify HBase at a time.
4573   */
4574  public boolean isExclusive() {
4575    return fixAny || forceExclusive;
4576  }
4577
4578  /**
4579   * Set summary mode.
4580   * Print only summary of the tables and status (OK or INCONSISTENT)
4581   */
4582  static void setSummary() {
4583    summary = true;
4584  }
4585
4586  /**
4587   * Set hbase:meta check mode.
4588   * Print only info about hbase:meta table deployment/state
4589   */
4590  void setCheckMetaOnly() {
4591    checkMetaOnly = true;
4592  }
4593
4594  /**
4595   * Set region boundaries check mode.
4596   */
4597  void setRegionBoundariesCheck() {
4598    checkRegionBoundaries = true;
4599  }
4600
4601  /**
4602   * Set replication fix mode.
4603   */
4604  public void setFixReplication(boolean shouldFix) {
4605    fixReplication = shouldFix;
4606    fixAny |= shouldFix;
4607  }
4608
4609  public void setCleanReplicationBarrier(boolean shouldClean) {
4610    cleanReplicationBarrier = shouldClean;
4611  }
4612
4613  /**
4614   * Check if we should rerun fsck again. This checks if we've tried to
4615   * fix something and we should rerun fsck tool again.
4616   * Display the full report from fsck. This displays all live and dead
4617   * region servers, and all known regions.
4618   */
4619  void setShouldRerun() {
4620    rerun = true;
4621  }
4622
4623  public boolean shouldRerun() {
4624    return rerun;
4625  }
4626
4627  /**
4628   * Fix inconsistencies found by fsck. This should try to fix errors (if any)
4629   * found by fsck utility.
4630   */
4631  public void setFixAssignments(boolean shouldFix) {
4632    fixAssignments = shouldFix;
4633    fixAny |= shouldFix;
4634  }
4635
4636  boolean shouldFixAssignments() {
4637    return fixAssignments;
4638  }
4639
4640  public void setFixMeta(boolean shouldFix) {
4641    fixMeta = shouldFix;
4642    fixAny |= shouldFix;
4643  }
4644
4645  boolean shouldFixMeta() {
4646    return fixMeta;
4647  }
4648
4649  public void setFixEmptyMetaCells(boolean shouldFix) {
4650    fixEmptyMetaCells = shouldFix;
4651    fixAny |= shouldFix;
4652  }
4653
4654  boolean shouldFixEmptyMetaCells() {
4655    return fixEmptyMetaCells;
4656  }
4657
4658  public void setCheckHdfs(boolean checking) {
4659    checkHdfs = checking;
4660  }
4661
4662  boolean shouldCheckHdfs() {
4663    return checkHdfs;
4664  }
4665
4666  public void setFixHdfsHoles(boolean shouldFix) {
4667    fixHdfsHoles = shouldFix;
4668    fixAny |= shouldFix;
4669  }
4670
4671  boolean shouldFixHdfsHoles() {
4672    return fixHdfsHoles;
4673  }
4674
4675  public void setFixTableOrphans(boolean shouldFix) {
4676    fixTableOrphans = shouldFix;
4677    fixAny |= shouldFix;
4678  }
4679
4680  boolean shouldFixTableOrphans() {
4681    return fixTableOrphans;
4682  }
4683
4684  public void setFixHdfsOverlaps(boolean shouldFix) {
4685    fixHdfsOverlaps = shouldFix;
4686    fixAny |= shouldFix;
4687  }
4688
4689  boolean shouldFixHdfsOverlaps() {
4690    return fixHdfsOverlaps;
4691  }
4692
4693  public void setFixHdfsOrphans(boolean shouldFix) {
4694    fixHdfsOrphans = shouldFix;
4695    fixAny |= shouldFix;
4696  }
4697
4698  boolean shouldFixHdfsOrphans() {
4699    return fixHdfsOrphans;
4700  }
4701
4702  public void setFixVersionFile(boolean shouldFix) {
4703    fixVersionFile = shouldFix;
4704    fixAny |= shouldFix;
4705  }
4706
4707  public boolean shouldFixVersionFile() {
4708    return fixVersionFile;
4709  }
4710
4711  public void setSidelineBigOverlaps(boolean sbo) {
4712    this.sidelineBigOverlaps = sbo;
4713  }
4714
4715  public boolean shouldSidelineBigOverlaps() {
4716    return sidelineBigOverlaps;
4717  }
4718
4719  public void setFixSplitParents(boolean shouldFix) {
4720    fixSplitParents = shouldFix;
4721    fixAny |= shouldFix;
4722  }
4723
4724  public void setRemoveParents(boolean shouldFix) {
4725    removeParents = shouldFix;
4726    fixAny |= shouldFix;
4727  }
4728
4729  boolean shouldFixSplitParents() {
4730    return fixSplitParents;
4731  }
4732
4733  boolean shouldRemoveParents() {
4734    return removeParents;
4735  }
4736
4737  public void setFixReferenceFiles(boolean shouldFix) {
4738    fixReferenceFiles = shouldFix;
4739    fixAny |= shouldFix;
4740  }
4741
4742  boolean shouldFixReferenceFiles() {
4743    return fixReferenceFiles;
4744  }
4745
4746  public void setFixHFileLinks(boolean shouldFix) {
4747    fixHFileLinks = shouldFix;
4748    fixAny |= shouldFix;
4749  }
4750
4751  boolean shouldFixHFileLinks() {
4752    return fixHFileLinks;
4753  }
4754
4755  public boolean shouldIgnorePreCheckPermission() {
4756    return !fixAny || ignorePreCheckPermission;
4757  }
4758
4759  public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
4760    this.ignorePreCheckPermission = ignorePreCheckPermission;
4761  }
4762
4763  /**
4764   * @param mm maximum number of regions to merge into a single region.
4765   */
4766  public void setMaxMerge(int mm) {
4767    this.maxMerge = mm;
4768  }
4769
4770  public int getMaxMerge() {
4771    return maxMerge;
4772  }
4773
4774  public void setMaxOverlapsToSideline(int mo) {
4775    this.maxOverlapsToSideline = mo;
4776  }
4777
4778  public int getMaxOverlapsToSideline() {
4779    return maxOverlapsToSideline;
4780  }
4781
4782  /**
4783   * Only check/fix tables specified by the list,
4784   * Empty list means all tables are included.
4785   */
4786  boolean isTableIncluded(TableName table) {
4787    return (tablesIncluded.isEmpty()) || tablesIncluded.contains(table);
4788  }
4789
4790  public void includeTable(TableName table) {
4791    tablesIncluded.add(table);
4792  }
4793
4794  Set<TableName> getIncludedTables() {
4795    return new HashSet<>(tablesIncluded);
4796  }
4797
4798  /**
4799   * We are interested in only those tables that have not changed their state in
4800   * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
4801   * @param seconds - the time in seconds
4802   */
4803  public void setTimeLag(long seconds) {
4804    timelag = seconds * 1000; // convert to milliseconds
4805  }
4806
4807  /**
4808   *
4809   * @param sidelineDir - HDFS path to sideline data
4810   */
4811  public void setSidelineDir(String sidelineDir) {
4812    this.sidelineDir = new Path(sidelineDir);
4813  }
4814
4815  protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4816    return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4817  }
4818
4819  public HFileCorruptionChecker getHFilecorruptionChecker() {
4820    return hfcc;
4821  }
4822
4823  public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4824    this.hfcc = hfcc;
4825  }
4826
4827  public void setRetCode(int code) {
4828    this.retcode = code;
4829  }
4830
4831  public int getRetCode() {
4832    return retcode;
4833  }
4834
4835  protected HBaseFsck printUsageAndExit() {
4836    StringWriter sw = new StringWriter(2048);
4837    PrintWriter out = new PrintWriter(sw);
4838    out.println("");
4839    out.println("-----------------------------------------------------------------------");
4840    out.println("NOTE: As of HBase version 2.0, the hbck tool is significantly changed.");
4841    out.println("In general, all Read-Only options are supported and can be be used");
4842    out.println("safely. Most -fix/ -repair options are NOT supported. Please see usage");
4843    out.println("below for details on which options are not supported.");
4844    out.println("-----------------------------------------------------------------------");
4845    out.println("");
4846    out.println("Usage: fsck [opts] {only tables}");
4847    out.println(" where [opts] are:");
4848    out.println("   -help Display help options (this)");
4849    out.println("   -details Display full report of all regions.");
4850    out.println("   -timelag <timeInSeconds>  Process only regions that " +
4851                       " have not experienced any metadata updates in the last " +
4852                       " <timeInSeconds> seconds.");
4853    out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4854        " before checking if the fix worked if run with -fix");
4855    out.println("   -summary Print only summary of the tables and status.");
4856    out.println("   -metaonly Only check the state of the hbase:meta table.");
4857    out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4858    out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
4859    out.println("   -exclusive Abort if another hbck is exclusive or fixing.");
4860
4861    out.println("");
4862    out.println("  Datafile Repair options: (expert features, use with caution!)");
4863    out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
4864    out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
4865
4866    out.println("");
4867    out.println(" Replication options");
4868    out.println("   -fixReplication   Deletes replication queues for removed peers");
4869
4870    out.println("");
4871    out.println("  Metadata Repair options supported as of version 2.0: (expert features, use with caution!)");
4872    out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
4873    out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
4874    out.println("   -fixHFileLinks  Try to offline lingering HFileLinks");
4875    out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
4876        + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4877    out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
4878
4879    out.println("");
4880    out.println("NOTE: Following options are NOT supported as of HBase version 2.0+.");
4881    out.println("");
4882    out.println("  UNSUPPORTED Metadata Repair options: (expert features, use with caution!)");
4883    out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
4884    out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
4885    out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
4886    out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
4887    out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
4888    out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4889    out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
4890    out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4891    out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
4892    out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4893    out.println("   -fixSplitParents  Try to force offline split parents to be online.");
4894    out.println("   -removeParents    Try to offline and sideline lingering parents and keep daughter regions.");
4895    out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
4896        + " (empty REGIONINFO_QUALIFIER rows)");
4897
4898    out.println("");
4899    out.println("  UNSUPPORTED Metadata Repair shortcuts");
4900    out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4901        "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles" +
4902        "-fixHFileLinks");
4903    out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4904    out.println("");
4905    out.println(" Replication options");
4906    out.println("   -fixReplication   Deletes replication queues for removed peers");
4907    out.println("   -cleanReplicationBrarier [tableName] clean the replication barriers " +
4908        "of a specified table, tableName is required");
4909    out.flush();
4910    errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4911
4912    setRetCode(-2);
4913    return this;
4914  }
4915
4916  /**
4917   * Main program
4918   *
4919   * @param args
4920   * @throws Exception
4921   */
4922  public static void main(String[] args) throws Exception {
4923    // create a fsck object
4924    Configuration conf = HBaseConfiguration.create();
4925    Path hbasedir = FSUtils.getRootDir(conf);
4926    URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4927    FSUtils.setFsDefault(conf, new Path(defaultFs));
4928    int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4929    System.exit(ret);
4930  }
4931
4932  /**
4933   * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
4934   */
4935  static class HBaseFsckTool extends Configured implements Tool {
4936    HBaseFsckTool(Configuration conf) { super(conf); }
4937    @Override
4938    public int run(String[] args) throws Exception {
4939      HBaseFsck hbck = new HBaseFsck(getConf());
4940      hbck.exec(hbck.executor, args);
4941      hbck.close();
4942      return hbck.getRetCode();
4943    }
4944  }
4945
4946  public HBaseFsck exec(ExecutorService exec, String[] args)
4947      throws KeeperException, IOException, InterruptedException, ReplicationException {
4948    long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4949
4950    boolean checkCorruptHFiles = false;
4951    boolean sidelineCorruptHFiles = false;
4952
4953    // Process command-line args.
4954    for (int i = 0; i < args.length; i++) {
4955      String cmd = args[i];
4956      if (cmd.equals("-help") || cmd.equals("-h")) {
4957        return printUsageAndExit();
4958      } else if (cmd.equals("-details")) {
4959        setDisplayFullReport();
4960      } else if (cmd.equals("-exclusive")) {
4961        setForceExclusive();
4962      } else if (cmd.equals("-timelag")) {
4963        if (i == args.length - 1) {
4964          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4965          return printUsageAndExit();
4966        }
4967        try {
4968          long timelag = Long.parseLong(args[++i]);
4969          setTimeLag(timelag);
4970        } catch (NumberFormatException e) {
4971          errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4972          return printUsageAndExit();
4973        }
4974      } else if (cmd.equals("-sleepBeforeRerun")) {
4975        if (i == args.length - 1) {
4976          errors.reportError(ERROR_CODE.WRONG_USAGE,
4977            "HBaseFsck: -sleepBeforeRerun needs a value.");
4978          return printUsageAndExit();
4979        }
4980        try {
4981          sleepBeforeRerun = Long.parseLong(args[++i]);
4982        } catch (NumberFormatException e) {
4983          errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4984          return printUsageAndExit();
4985        }
4986      } else if (cmd.equals("-sidelineDir")) {
4987        if (i == args.length - 1) {
4988          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4989          return printUsageAndExit();
4990        }
4991        setSidelineDir(args[++i]);
4992      } else if (cmd.equals("-fix")) {
4993        errors.reportError(ERROR_CODE.WRONG_USAGE,
4994          "This option is deprecated, please use  -fixAssignments instead.");
4995        setFixAssignments(true);
4996      } else if (cmd.equals("-fixAssignments")) {
4997        setFixAssignments(true);
4998      } else if (cmd.equals("-fixMeta")) {
4999        setFixMeta(true);
5000      } else if (cmd.equals("-noHdfsChecking")) {
5001        setCheckHdfs(false);
5002      } else if (cmd.equals("-fixHdfsHoles")) {
5003        setFixHdfsHoles(true);
5004      } else if (cmd.equals("-fixHdfsOrphans")) {
5005        setFixHdfsOrphans(true);
5006      } else if (cmd.equals("-fixTableOrphans")) {
5007        setFixTableOrphans(true);
5008      } else if (cmd.equals("-fixHdfsOverlaps")) {
5009        setFixHdfsOverlaps(true);
5010      } else if (cmd.equals("-fixVersionFile")) {
5011        setFixVersionFile(true);
5012      } else if (cmd.equals("-sidelineBigOverlaps")) {
5013        setSidelineBigOverlaps(true);
5014      } else if (cmd.equals("-fixSplitParents")) {
5015        setFixSplitParents(true);
5016      } else if (cmd.equals("-removeParents")) {
5017        setRemoveParents(true);
5018      } else if (cmd.equals("-ignorePreCheckPermission")) {
5019        setIgnorePreCheckPermission(true);
5020      } else if (cmd.equals("-checkCorruptHFiles")) {
5021        checkCorruptHFiles = true;
5022      } else if (cmd.equals("-sidelineCorruptHFiles")) {
5023        sidelineCorruptHFiles = true;
5024      } else if (cmd.equals("-fixReferenceFiles")) {
5025        setFixReferenceFiles(true);
5026      } else if (cmd.equals("-fixHFileLinks")) {
5027        setFixHFileLinks(true);
5028      } else if (cmd.equals("-fixEmptyMetaCells")) {
5029        setFixEmptyMetaCells(true);
5030      } else if (cmd.equals("-repair")) {
5031        // this attempts to merge overlapping hdfs regions, needs testing
5032        // under load
5033        setFixHdfsHoles(true);
5034        setFixHdfsOrphans(true);
5035        setFixMeta(true);
5036        setFixAssignments(true);
5037        setFixHdfsOverlaps(true);
5038        setFixVersionFile(true);
5039        setSidelineBigOverlaps(true);
5040        setFixSplitParents(false);
5041        setCheckHdfs(true);
5042        setFixReferenceFiles(true);
5043        setFixHFileLinks(true);
5044      } else if (cmd.equals("-repairHoles")) {
5045        // this will make all missing hdfs regions available but may lose data
5046        setFixHdfsHoles(true);
5047        setFixHdfsOrphans(false);
5048        setFixMeta(true);
5049        setFixAssignments(true);
5050        setFixHdfsOverlaps(false);
5051        setSidelineBigOverlaps(false);
5052        setFixSplitParents(false);
5053        setCheckHdfs(true);
5054      } else if (cmd.equals("-maxOverlapsToSideline")) {
5055        if (i == args.length - 1) {
5056          errors.reportError(ERROR_CODE.WRONG_USAGE,
5057            "-maxOverlapsToSideline needs a numeric value argument.");
5058          return printUsageAndExit();
5059        }
5060        try {
5061          int maxOverlapsToSideline = Integer.parseInt(args[++i]);
5062          setMaxOverlapsToSideline(maxOverlapsToSideline);
5063        } catch (NumberFormatException e) {
5064          errors.reportError(ERROR_CODE.WRONG_USAGE,
5065            "-maxOverlapsToSideline needs a numeric value argument.");
5066          return printUsageAndExit();
5067        }
5068      } else if (cmd.equals("-maxMerge")) {
5069        if (i == args.length - 1) {
5070          errors.reportError(ERROR_CODE.WRONG_USAGE,
5071            "-maxMerge needs a numeric value argument.");
5072          return printUsageAndExit();
5073        }
5074        try {
5075          int maxMerge = Integer.parseInt(args[++i]);
5076          setMaxMerge(maxMerge);
5077        } catch (NumberFormatException e) {
5078          errors.reportError(ERROR_CODE.WRONG_USAGE,
5079            "-maxMerge needs a numeric value argument.");
5080          return printUsageAndExit();
5081        }
5082      } else if (cmd.equals("-summary")) {
5083        setSummary();
5084      } else if (cmd.equals("-metaonly")) {
5085        setCheckMetaOnly();
5086      } else if (cmd.equals("-boundaries")) {
5087        setRegionBoundariesCheck();
5088      } else if (cmd.equals("-fixReplication")) {
5089        setFixReplication(true);
5090      } else if (cmd.equals("-cleanReplicationBarrier")) {
5091        setCleanReplicationBarrier(true);
5092        if(args[++i].startsWith("-")){
5093          printUsageAndExit();
5094        }
5095        setCleanReplicationBarrierTable(args[i]);
5096      } else if (cmd.startsWith("-")) {
5097        errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
5098        return printUsageAndExit();
5099      } else {
5100        includeTable(TableName.valueOf(cmd));
5101        errors.print("Allow checking/fixes for table: " + cmd);
5102      }
5103    }
5104
5105    errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
5106
5107    // pre-check current user has FS write permission or not
5108    try {
5109      preCheckPermission();
5110    } catch (AccessDeniedException ace) {
5111      Runtime.getRuntime().exit(-1);
5112    } catch (IOException ioe) {
5113      Runtime.getRuntime().exit(-1);
5114    }
5115
5116    // do the real work of hbck
5117    connect();
5118
5119    // after connecting to server above, we have server version
5120    // check if unsupported option is specified based on server version
5121    if (!isOptionsSupported(args)) {
5122      return printUsageAndExit();
5123    }
5124
5125    try {
5126      // if corrupt file mode is on, first fix them since they may be opened later
5127      if (checkCorruptHFiles || sidelineCorruptHFiles) {
5128        LOG.info("Checking all hfiles for corruption");
5129        HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
5130        setHFileCorruptionChecker(hfcc); // so we can get result
5131        Collection<TableName> tables = getIncludedTables();
5132        Collection<Path> tableDirs = new ArrayList<>();
5133        Path rootdir = FSUtils.getRootDir(getConf());
5134        if (tables.size() > 0) {
5135          for (TableName t : tables) {
5136            tableDirs.add(FSUtils.getTableDir(rootdir, t));
5137          }
5138        } else {
5139          tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
5140        }
5141        hfcc.checkTables(tableDirs);
5142        hfcc.report(errors);
5143      }
5144
5145      // check and fix table integrity, region consistency.
5146      int code = onlineHbck();
5147      setRetCode(code);
5148      // If we have changed the HBase state it is better to run hbck again
5149      // to see if we haven't broken something else in the process.
5150      // We run it only once more because otherwise we can easily fall into
5151      // an infinite loop.
5152      if (shouldRerun()) {
5153        try {
5154          LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
5155          Thread.sleep(sleepBeforeRerun);
5156        } catch (InterruptedException ie) {
5157          LOG.warn("Interrupted while sleeping");
5158          return this;
5159        }
5160        // Just report
5161        setFixAssignments(false);
5162        setFixMeta(false);
5163        setFixHdfsHoles(false);
5164        setFixHdfsOverlaps(false);
5165        setFixVersionFile(false);
5166        setFixTableOrphans(false);
5167        errors.resetErrors();
5168        code = onlineHbck();
5169        setRetCode(code);
5170      }
5171    } finally {
5172      IOUtils.closeQuietly(this);
5173    }
5174    return this;
5175  }
5176
5177  private boolean isOptionsSupported(String[] args) {
5178    boolean result = true;
5179    String hbaseServerVersion = status.getHBaseVersion();
5180    if (VersionInfo.compareVersion("2.any.any", hbaseServerVersion) < 0) {
5181      // Process command-line args.
5182      for (String arg : args) {
5183        if (unsupportedOptionsInV2.contains(arg)) {
5184          errors.reportError(ERROR_CODE.UNSUPPORTED_OPTION,
5185              "option '" + arg + "' is not " + "supportted!");
5186          result = false;
5187          break;
5188        }
5189      }
5190    }
5191    return result;
5192  }
5193
5194  public void setCleanReplicationBarrierTable(String cleanReplicationBarrierTable) {
5195    this.cleanReplicationBarrierTable = TableName.valueOf(cleanReplicationBarrierTable);
5196  }
5197
5198  public void cleanReplicationBarrier() throws IOException {
5199    if (!cleanReplicationBarrier || cleanReplicationBarrierTable == null) {
5200      return;
5201    }
5202    if (cleanReplicationBarrierTable.isSystemTable()) {
5203      errors.reportError(ERROR_CODE.INVALID_TABLE,
5204        "invalid table: " + cleanReplicationBarrierTable);
5205      return;
5206    }
5207
5208    boolean isGlobalScope = false;
5209    try {
5210      isGlobalScope = admin.getDescriptor(cleanReplicationBarrierTable).hasGlobalReplicationScope();
5211    } catch (TableNotFoundException e) {
5212      LOG.info("we may need to clean some erroneous data due to bugs");
5213    }
5214
5215    if (isGlobalScope) {
5216      errors.reportError(ERROR_CODE.INVALID_TABLE,
5217        "table's replication scope is global: " + cleanReplicationBarrierTable);
5218      return;
5219    }
5220    List<byte[]> regionNames = new ArrayList<>();
5221    Scan barrierScan = new Scan();
5222    barrierScan.setCaching(100);
5223    barrierScan.addFamily(HConstants.REPLICATION_BARRIER_FAMILY);
5224    barrierScan
5225        .withStartRow(MetaTableAccessor.getTableStartRowForMeta(cleanReplicationBarrierTable,
5226          MetaTableAccessor.QueryType.REGION))
5227        .withStopRow(MetaTableAccessor.getTableStopRowForMeta(cleanReplicationBarrierTable,
5228          MetaTableAccessor.QueryType.REGION));
5229    Result result;
5230    try (ResultScanner scanner = meta.getScanner(barrierScan)) {
5231      while ((result = scanner.next()) != null) {
5232        regionNames.add(result.getRow());
5233      }
5234    }
5235    if (regionNames.size() <= 0) {
5236      errors.reportError(ERROR_CODE.INVALID_TABLE,
5237        "there is no barriers of this table: " + cleanReplicationBarrierTable);
5238      return;
5239    }
5240    ReplicationQueueStorage queueStorage =
5241        ReplicationStorageFactory.getReplicationQueueStorage(zkw, getConf());
5242    List<ReplicationPeerDescription> peerDescriptions = admin.listReplicationPeers();
5243    if (peerDescriptions != null && peerDescriptions.size() > 0) {
5244      List<String> peers = peerDescriptions.stream()
5245          .filter(peerConfig -> ReplicationUtils.contains(peerConfig.getPeerConfig(),
5246            cleanReplicationBarrierTable))
5247          .map(peerConfig -> peerConfig.getPeerId()).collect(Collectors.toList());
5248      try {
5249        List<String> batch = new ArrayList<>();
5250        for (String peer : peers) {
5251          for (byte[] regionName : regionNames) {
5252            batch.add(RegionInfo.encodeRegionName(regionName));
5253            if (batch.size() % 100 == 0) {
5254              queueStorage.removeLastSequenceIds(peer, batch);
5255              batch.clear();
5256            }
5257          }
5258          if (batch.size() > 0) {
5259            queueStorage.removeLastSequenceIds(peer, batch);
5260            batch.clear();
5261          }
5262        }
5263      } catch (ReplicationException re) {
5264        throw new IOException(re);
5265      }
5266    }
5267    for (byte[] regionName : regionNames) {
5268      meta.delete(new Delete(regionName).addFamily(HConstants.REPLICATION_BARRIER_FAMILY));
5269    }
5270    setShouldRerun();
5271  }
5272
5273  /**
5274   * ls -r for debugging purposes
5275   */
5276  void debugLsr(Path p) throws IOException {
5277    debugLsr(getConf(), p, errors);
5278  }
5279
5280  /**
5281   * ls -r for debugging purposes
5282   */
5283  public static void debugLsr(Configuration conf,
5284      Path p) throws IOException {
5285    debugLsr(conf, p, new PrintingErrorReporter());
5286  }
5287
5288  /**
5289   * ls -r for debugging purposes
5290   */
5291  public static void debugLsr(Configuration conf,
5292      Path p, ErrorReporter errors) throws IOException {
5293    if (!LOG.isDebugEnabled() || p == null) {
5294      return;
5295    }
5296    FileSystem fs = p.getFileSystem(conf);
5297
5298    if (!fs.exists(p)) {
5299      // nothing
5300      return;
5301    }
5302    errors.print(p.toString());
5303
5304    if (fs.isFile(p)) {
5305      return;
5306    }
5307
5308    if (fs.getFileStatus(p).isDirectory()) {
5309      FileStatus[] fss= fs.listStatus(p);
5310      for (FileStatus status : fss) {
5311        debugLsr(conf, status.getPath(), errors);
5312      }
5313    }
5314  }
5315}