001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.io.Closeable;
021import java.io.FileNotFoundException;
022import java.io.IOException;
023import java.io.InterruptedIOException;
024import java.io.PrintWriter;
025import java.io.StringWriter;
026import java.net.InetAddress;
027import java.net.URI;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Collection;
031import java.util.Collections;
032import java.util.Comparator;
033import java.util.EnumSet;
034import java.util.HashMap;
035import java.util.HashSet;
036import java.util.Iterator;
037import java.util.List;
038import java.util.Locale;
039import java.util.Map;
040import java.util.Map.Entry;
041import java.util.Objects;
042import java.util.Optional;
043import java.util.Set;
044import java.util.SortedMap;
045import java.util.SortedSet;
046import java.util.TreeMap;
047import java.util.TreeSet;
048import java.util.Vector;
049import java.util.concurrent.Callable;
050import java.util.concurrent.ConcurrentSkipListMap;
051import java.util.concurrent.ExecutionException;
052import java.util.concurrent.ExecutorService;
053import java.util.concurrent.Executors;
054import java.util.concurrent.Future;
055import java.util.concurrent.FutureTask;
056import java.util.concurrent.ScheduledThreadPoolExecutor;
057import java.util.concurrent.TimeUnit;
058import java.util.concurrent.TimeoutException;
059import java.util.concurrent.atomic.AtomicBoolean;
060import java.util.concurrent.atomic.AtomicInteger;
061import java.util.stream.Collectors;
062import org.apache.commons.io.IOUtils;
063import org.apache.commons.lang3.RandomStringUtils;
064import org.apache.commons.lang3.StringUtils;
065import org.apache.hadoop.conf.Configuration;
066import org.apache.hadoop.conf.Configured;
067import org.apache.hadoop.fs.FSDataOutputStream;
068import org.apache.hadoop.fs.FileStatus;
069import org.apache.hadoop.fs.FileSystem;
070import org.apache.hadoop.fs.Path;
071import org.apache.hadoop.fs.permission.FsAction;
072import org.apache.hadoop.fs.permission.FsPermission;
073import org.apache.hadoop.hbase.Abortable;
074import org.apache.hadoop.hbase.Cell;
075import org.apache.hadoop.hbase.CellUtil;
076import org.apache.hadoop.hbase.ClusterMetrics;
077import org.apache.hadoop.hbase.ClusterMetrics.Option;
078import org.apache.hadoop.hbase.HBaseConfiguration;
079import org.apache.hadoop.hbase.HBaseInterfaceAudience;
080import org.apache.hadoop.hbase.HConstants;
081import org.apache.hadoop.hbase.HRegionInfo;
082import org.apache.hadoop.hbase.HRegionLocation;
083import org.apache.hadoop.hbase.KeyValue;
084import org.apache.hadoop.hbase.MasterNotRunningException;
085import org.apache.hadoop.hbase.MetaTableAccessor;
086import org.apache.hadoop.hbase.RegionLocations;
087import org.apache.hadoop.hbase.ServerName;
088import org.apache.hadoop.hbase.TableName;
089import org.apache.hadoop.hbase.TableNotFoundException;
090import org.apache.hadoop.hbase.ZooKeeperConnectionException;
091import org.apache.hadoop.hbase.client.Admin;
092import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
093import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
094import org.apache.hadoop.hbase.client.Connection;
095import org.apache.hadoop.hbase.client.ConnectionFactory;
096import org.apache.hadoop.hbase.client.Delete;
097import org.apache.hadoop.hbase.client.Get;
098import org.apache.hadoop.hbase.client.Put;
099import org.apache.hadoop.hbase.client.RegionInfo;
100import org.apache.hadoop.hbase.client.RegionInfoBuilder;
101import org.apache.hadoop.hbase.client.RegionLocator;
102import org.apache.hadoop.hbase.client.RegionReplicaUtil;
103import org.apache.hadoop.hbase.client.Result;
104import org.apache.hadoop.hbase.client.ResultScanner;
105import org.apache.hadoop.hbase.client.RowMutations;
106import org.apache.hadoop.hbase.client.Scan;
107import org.apache.hadoop.hbase.client.Table;
108import org.apache.hadoop.hbase.client.TableDescriptor;
109import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
110import org.apache.hadoop.hbase.client.TableState;
111import org.apache.hadoop.hbase.io.FileLink;
112import org.apache.hadoop.hbase.io.HFileLink;
113import org.apache.hadoop.hbase.io.hfile.CacheConfig;
114import org.apache.hadoop.hbase.io.hfile.HFile;
115import org.apache.hadoop.hbase.log.HBaseMarkers;
116import org.apache.hadoop.hbase.master.MasterFileSystem;
117import org.apache.hadoop.hbase.master.RegionState;
118import org.apache.hadoop.hbase.regionserver.HRegion;
119import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
120import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
121import org.apache.hadoop.hbase.replication.ReplicationException;
122import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
123import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
124import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
125import org.apache.hadoop.hbase.replication.ReplicationUtils;
126import org.apache.hadoop.hbase.security.AccessDeniedException;
127import org.apache.hadoop.hbase.security.UserProvider;
128import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool;
129import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
130import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
131import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
132import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
133import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
134import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
135import org.apache.hadoop.hbase.wal.WAL;
136import org.apache.hadoop.hbase.wal.WALFactory;
137import org.apache.hadoop.hbase.wal.WALSplitUtil;
138import org.apache.hadoop.hbase.zookeeper.ZKUtil;
139import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
140import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
141import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
142import org.apache.hadoop.ipc.RemoteException;
143import org.apache.hadoop.security.UserGroupInformation;
144import org.apache.hadoop.util.ReflectionUtils;
145import org.apache.hadoop.util.Tool;
146import org.apache.hadoop.util.ToolRunner;
147import org.apache.yetus.audience.InterfaceAudience;
148import org.apache.yetus.audience.InterfaceStability;
149import org.apache.zookeeper.KeeperException;
150import org.slf4j.Logger;
151import org.slf4j.LoggerFactory;
152
153import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
154import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
155import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
156import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList;
157import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
158import org.apache.hbase.thirdparty.com.google.common.collect.Multimap;
159import org.apache.hbase.thirdparty.com.google.common.collect.Ordering;
160import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
161import org.apache.hbase.thirdparty.com.google.common.collect.TreeMultimap;
162
163/**
164 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
165 * table integrity problems in a corrupted HBase. This tool was written for hbase-1.x. It does not
166 * work with hbase-2.x; it can read state but is not allowed to change state; i.e. effect 'repair'.
167 * See hbck2 (HBASE-19121) for a hbck tool for hbase2.
168 *
169 * <p>
170 * Region consistency checks verify that hbase:meta, region deployment on region
171 * servers and the state of data in HDFS (.regioninfo files) all are in
172 * accordance.
173 * <p>
174 * Table integrity checks verify that all possible row keys resolve to exactly
175 * one region of a table.  This means there are no individual degenerate
176 * or backwards regions; no holes between regions; and that there are no
177 * overlapping regions.
178 * <p>
179 * The general repair strategy works in two phases:
180 * <ol>
181 * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
182 * <li> Repair Region Consistency with hbase:meta and assignments
183 * </ol>
184 * <p>
185 * For table integrity repairs, the tables' region directories are scanned
186 * for .regioninfo files.  Each table's integrity is then verified.  If there
187 * are any orphan regions (regions with no .regioninfo files) or holes, new
188 * regions are fabricated.  Backwards regions are sidelined as well as empty
189 * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
190 * a new region is created and all data is merged into the new region.
191 * <p>
192 * Table integrity repairs deal solely with HDFS and could potentially be done
193 * offline -- the hbase region servers or master do not need to be running.
194 * This phase can eventually be used to completely reconstruct the hbase:meta table in
195 * an offline fashion.
196 * <p>
197 * Region consistency requires three conditions -- 1) valid .regioninfo file
198 * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
199 * and 3) a region is deployed only at the regionserver that was assigned to
200 * with proper state in the master.
201 * <p>
202 * Region consistency repairs require hbase to be online so that hbck can
203 * contact the HBase master and region servers.  The hbck#connect() method must
204 * first be called successfully.  Much of the region consistency information
205 * is transient and less risky to repair.
206 * <p>
207 * If hbck is run from the command line, there are a handful of arguments that
208 * can be used to limit the kinds of repairs hbck will do.  See the code in
209 * {@link #printUsageAndExit()} for more details.
210 */
211@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
212@InterfaceStability.Evolving
213public class HBaseFsck extends Configured implements Closeable {
214  public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
215  public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
216  private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
217  private static boolean rsSupportsOffline = true;
218  private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
219  private static final int DEFAULT_MAX_MERGE = 5;
220  private static final String TO_BE_LOADED = "to_be_loaded";
221  /**
222   * Here is where hbase-1.x used to default the lock for hbck1.
223   * It puts in place a lock when it goes to write/make changes.
224   */
225  @VisibleForTesting
226  public static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
227  private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
228  private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
229  private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
230  // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
231  // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
232  // AlreadyBeingCreatedException which is implies timeout on this operations up to
233  // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
234  private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
235  private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
236  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
237  private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
238
239  /**********************
240   * Internal resources
241   **********************/
242  private static final Logger LOG = LoggerFactory.getLogger(HBaseFsck.class.getName());
243  private ClusterMetrics status;
244  private Connection connection;
245  private Admin admin;
246  private Table meta;
247  // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
248  protected ExecutorService executor;
249  private long startMillis = EnvironmentEdgeManager.currentTime();
250  private HFileCorruptionChecker hfcc;
251  private int retcode = 0;
252  private Path HBCK_LOCK_PATH;
253  private FSDataOutputStream hbckOutFd;
254  // This lock is to prevent cleanup of balancer resources twice between
255  // ShutdownHook and the main code. We cleanup only if the connect() is
256  // successful
257  private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
258
259  // Unsupported options in HBase 2.0+
260  private static final Set<String> unsupportedOptionsInV2 = Sets.newHashSet("-fix",
261      "-fixAssignments", "-fixMeta", "-fixHdfsHoles", "-fixHdfsOrphans", "-fixTableOrphans",
262      "-fixHdfsOverlaps", "-sidelineBigOverlaps", "-fixSplitParents", "-removeParents",
263      "-fixEmptyMetaCells", "-repair", "-repairHoles", "-maxOverlapsToSideline", "-maxMerge");
264
265  /***********
266   * Options
267   ***********/
268  private static boolean details = false; // do we display the full report
269  private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
270  private static boolean forceExclusive = false; // only this hbck can modify HBase
271  private boolean fixAssignments = false; // fix assignment errors?
272  private boolean fixMeta = false; // fix meta errors?
273  private boolean checkHdfs = true; // load and check fs consistency?
274  private boolean fixHdfsHoles = false; // fix fs holes?
275  private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
276  private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
277  private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
278  private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
279  private boolean fixSplitParents = false; // fix lingering split parents
280  private boolean removeParents = false; // remove split parents
281  private boolean fixReferenceFiles = false; // fix lingering reference store file
282  private boolean fixHFileLinks = false; // fix lingering HFileLinks
283  private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
284  private boolean fixReplication = false; // fix undeleted replication queues for removed peer
285  private boolean cleanReplicationBarrier = false; // clean replication barriers of a table
286  private boolean fixAny = false; // Set to true if any of the fix is required.
287
288  // limit checking/fixes to listed tables, if empty attempt to check/fix all
289  // hbase:meta are always checked
290  private Set<TableName> tablesIncluded = new HashSet<>();
291  private TableName cleanReplicationBarrierTable;
292  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
293  // maximum number of overlapping regions to sideline
294  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
295  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
296  private Path sidelineDir = null;
297
298  private boolean rerun = false; // if we tried to fix something, rerun hbck
299  private static boolean summary = false; // if we want to print less output
300  private boolean checkMetaOnly = false;
301  private boolean checkRegionBoundaries = false;
302  private boolean ignorePreCheckPermission = false; // if pre-check permission
303
304  /*********
305   * State
306   *********/
307  final private ErrorReporter errors;
308  int fixes = 0;
309
310  /**
311   * This map contains the state of all hbck items.  It maps from encoded region
312   * name to HbckInfo structure.  The information contained in HbckInfo is used
313   * to detect and correct consistency (hdfs/meta/deployment) problems.
314   */
315  private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<>();
316  // Empty regioninfo qualifiers in hbase:meta
317  private Set<Result> emptyRegionInfoQualifiers = new HashSet<>();
318
319  /**
320   * This map from Tablename -> TableInfo contains the structures necessary to
321   * detect table consistency problems (holes, dupes, overlaps).  It is sorted
322   * to prevent dupes.
323   *
324   * If tablesIncluded is empty, this map contains all tables.
325   * Otherwise, it contains only meta tables and tables in tablesIncluded,
326   * unless checkMetaOnly is specified, in which case, it contains only
327   * the meta table
328   */
329  private SortedMap<TableName, TableInfo> tablesInfo = new ConcurrentSkipListMap<>();
330
331  /**
332   * When initially looking at HDFS, we attempt to find any orphaned data.
333   */
334  private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
335
336  private Map<TableName, Set<String>> orphanTableDirs = new HashMap<>();
337  private Map<TableName, TableState> tableStates = new HashMap<>();
338  private final RetryCounterFactory lockFileRetryCounterFactory;
339  private final RetryCounterFactory createZNodeRetryCounterFactory;
340
341  private Map<TableName, Set<String>> skippedRegions = new HashMap<>();
342
343  private ZKWatcher zkw = null;
344  private String hbckEphemeralNodePath = null;
345  private boolean hbckZodeCreated = false;
346
347  /**
348   * Constructor
349   *
350   * @param conf Configuration object
351   * @throws MasterNotRunningException if the master is not running
352   * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
353   */
354  public HBaseFsck(Configuration conf) throws IOException, ClassNotFoundException {
355    this(conf, createThreadPool(conf));
356  }
357
358  private static ExecutorService createThreadPool(Configuration conf) {
359    int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
360    return new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
361  }
362
363  /**
364   * Constructor
365   *
366   * @param conf
367   *          Configuration object
368   * @throws MasterNotRunningException
369   *           if the master is not running
370   * @throws ZooKeeperConnectionException
371   *           if unable to connect to ZooKeeper
372   */
373  public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
374      ZooKeeperConnectionException, IOException, ClassNotFoundException {
375    super(conf);
376    errors = getErrorReporter(getConf());
377    this.executor = exec;
378    lockFileRetryCounterFactory = createLockRetryCounterFactory(getConf());
379    createZNodeRetryCounterFactory = createZnodeRetryCounterFactory(getConf());
380    zkw = createZooKeeperWatcher();
381  }
382
383  /**
384   * @return A retry counter factory configured for retrying lock file creation.
385   */
386  public static RetryCounterFactory createLockRetryCounterFactory(Configuration conf) {
387    return new RetryCounterFactory(
388        conf.getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
389        conf.getInt("hbase.hbck.lockfile.attempt.sleep.interval",
390            DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
391        conf.getInt("hbase.hbck.lockfile.attempt.maxsleeptime",
392            DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
393  }
394
395  /**
396   * @return A retry counter factory configured for retrying znode creation.
397   */
398  private static RetryCounterFactory createZnodeRetryCounterFactory(Configuration conf) {
399    return new RetryCounterFactory(
400        conf.getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
401        conf.getInt("hbase.hbck.createznode.attempt.sleep.interval",
402            DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
403        conf.getInt("hbase.hbck.createznode.attempt.maxsleeptime",
404            DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
405  }
406
407  /**
408   * @return Return the tmp dir this tool writes too.
409   */
410  @VisibleForTesting
411  public static Path getTmpDir(Configuration conf) throws IOException {
412    return new Path(FSUtils.getRootDir(conf), HConstants.HBASE_TEMP_DIRECTORY);
413  }
414
415  private static class FileLockCallable implements Callable<FSDataOutputStream> {
416    RetryCounter retryCounter;
417    private final Configuration conf;
418    private Path hbckLockPath = null;
419
420    public FileLockCallable(Configuration conf, RetryCounter retryCounter) {
421      this.retryCounter = retryCounter;
422      this.conf = conf;
423    }
424
425    /**
426     * @return Will be <code>null</code> unless you call {@link #call()}
427     */
428    Path getHbckLockPath() {
429      return this.hbckLockPath;
430    }
431
432    @Override
433    public FSDataOutputStream call() throws IOException {
434      try {
435        FileSystem fs = FSUtils.getCurrentFileSystem(this.conf);
436        FsPermission defaultPerms = FSUtils.getFilePermissions(fs, this.conf,
437            HConstants.DATA_FILE_UMASK_KEY);
438        Path tmpDir = getTmpDir(conf);
439        this.hbckLockPath = new Path(tmpDir, HBCK_LOCK_FILE);
440        fs.mkdirs(tmpDir);
441        final FSDataOutputStream out = createFileWithRetries(fs, this.hbckLockPath, defaultPerms);
442        out.writeBytes(InetAddress.getLocalHost().toString());
443        // Add a note into the file we write on why hbase2 is writing out an hbck1 lock file.
444        out.writeBytes(" Written by an hbase-2.x Master to block an " +
445            "attempt by an hbase-1.x HBCK tool making modification to state. " +
446            "See 'HBCK must match HBase server version' in the hbase refguide.");
447        out.flush();
448        return out;
449      } catch(RemoteException e) {
450        if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
451          return null;
452        } else {
453          throw e;
454        }
455      }
456    }
457
458    private FSDataOutputStream createFileWithRetries(final FileSystem fs,
459        final Path hbckLockFilePath, final FsPermission defaultPerms)
460        throws IOException {
461      IOException exception = null;
462      do {
463        try {
464          return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
465        } catch (IOException ioe) {
466          LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
467              + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
468              + retryCounter.getMaxAttempts());
469          LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(),
470              ioe);
471          try {
472            exception = ioe;
473            retryCounter.sleepUntilNextRetry();
474          } catch (InterruptedException ie) {
475            throw (InterruptedIOException) new InterruptedIOException(
476                "Can't create lock file " + hbckLockFilePath.getName())
477            .initCause(ie);
478          }
479        }
480      } while (retryCounter.shouldRetry());
481
482      throw exception;
483    }
484  }
485
486  /**
487   * This method maintains a lock using a file. If the creation fails we return null
488   *
489   * @return FSDataOutputStream object corresponding to the newly opened lock file
490   * @throws IOException if IO failure occurs
491   */
492  public static Pair<Path, FSDataOutputStream> checkAndMarkRunningHbck(Configuration conf,
493      RetryCounter retryCounter) throws IOException {
494    FileLockCallable callable = new FileLockCallable(conf, retryCounter);
495    ExecutorService executor = Executors.newFixedThreadPool(1);
496    FutureTask<FSDataOutputStream> futureTask = new FutureTask<>(callable);
497    executor.execute(futureTask);
498    final int timeoutInSeconds = conf.getInt(
499      "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
500    FSDataOutputStream stream = null;
501    try {
502      stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
503    } catch (ExecutionException ee) {
504      LOG.warn("Encountered exception when opening lock file", ee);
505    } catch (InterruptedException ie) {
506      LOG.warn("Interrupted when opening lock file", ie);
507      Thread.currentThread().interrupt();
508    } catch (TimeoutException exception) {
509      // took too long to obtain lock
510      LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
511      futureTask.cancel(true);
512    } finally {
513      executor.shutdownNow();
514    }
515    return new Pair<Path, FSDataOutputStream>(callable.getHbckLockPath(), stream);
516  }
517
518  private void unlockHbck() {
519    if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) {
520      RetryCounter retryCounter = lockFileRetryCounterFactory.create();
521      do {
522        try {
523          IOUtils.closeQuietly(hbckOutFd);
524          FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
525          LOG.info("Finishing hbck");
526          return;
527        } catch (IOException ioe) {
528          LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
529              + (retryCounter.getAttemptTimes() + 1) + " of "
530              + retryCounter.getMaxAttempts());
531          LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
532          try {
533            retryCounter.sleepUntilNextRetry();
534          } catch (InterruptedException ie) {
535            Thread.currentThread().interrupt();
536            LOG.warn("Interrupted while deleting lock file" +
537                HBCK_LOCK_PATH);
538            return;
539          }
540        }
541      } while (retryCounter.shouldRetry());
542    }
543  }
544
545  /**
546   * To repair region consistency, one must call connect() in order to repair
547   * online state.
548   */
549  public void connect() throws IOException {
550
551    if (isExclusive()) {
552      // Grab the lock
553      Pair<Path, FSDataOutputStream> pair =
554          checkAndMarkRunningHbck(getConf(), this.lockFileRetryCounterFactory.create());
555      HBCK_LOCK_PATH = pair.getFirst();
556      this.hbckOutFd = pair.getSecond();
557      if (hbckOutFd == null) {
558        setRetCode(-1);
559        LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " +
560            "[If you are sure no other instance is running, delete the lock file " +
561            HBCK_LOCK_PATH + " and rerun the tool]");
562        throw new IOException("Duplicate hbck - Abort");
563      }
564
565      // Make sure to cleanup the lock
566      hbckLockCleanup.set(true);
567    }
568
569
570    // Add a shutdown hook to this thread, in case user tries to
571    // kill the hbck with a ctrl-c, we want to cleanup the lock so that
572    // it is available for further calls
573    Runtime.getRuntime().addShutdownHook(new Thread() {
574      @Override
575      public void run() {
576        IOUtils.closeQuietly(HBaseFsck.this);
577        cleanupHbckZnode();
578        unlockHbck();
579      }
580    });
581
582    LOG.info("Launching hbck");
583
584    connection = ConnectionFactory.createConnection(getConf());
585    admin = connection.getAdmin();
586    meta = connection.getTable(TableName.META_TABLE_NAME);
587    status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS,
588      Option.DEAD_SERVERS, Option.MASTER, Option.BACKUP_MASTERS,
589      Option.REGIONS_IN_TRANSITION, Option.HBASE_VERSION));
590  }
591
592  /**
593   * Get deployed regions according to the region servers.
594   */
595  private void loadDeployedRegions() throws IOException, InterruptedException {
596    // From the master, get a list of all known live region servers
597    Collection<ServerName> regionServers = status.getLiveServerMetrics().keySet();
598    errors.print("Number of live region servers: " + regionServers.size());
599    if (details) {
600      for (ServerName rsinfo: regionServers) {
601        errors.print("  " + rsinfo.getServerName());
602      }
603    }
604
605    // From the master, get a list of all dead region servers
606    Collection<ServerName> deadRegionServers = status.getDeadServerNames();
607    errors.print("Number of dead region servers: " + deadRegionServers.size());
608    if (details) {
609      for (ServerName name: deadRegionServers) {
610        errors.print("  " + name);
611      }
612    }
613
614    // Print the current master name and state
615    errors.print("Master: " + status.getMasterName());
616
617    // Print the list of all backup masters
618    Collection<ServerName> backupMasters = status.getBackupMasterNames();
619    errors.print("Number of backup masters: " + backupMasters.size());
620    if (details) {
621      for (ServerName name: backupMasters) {
622        errors.print("  " + name);
623      }
624    }
625
626    errors.print("Average load: " + status.getAverageLoad());
627    errors.print("Number of requests: " + status.getRequestCount());
628    errors.print("Number of regions: " + status.getRegionCount());
629
630    List<RegionState> rits = status.getRegionStatesInTransition();
631    errors.print("Number of regions in transition: " + rits.size());
632    if (details) {
633      for (RegionState state: rits) {
634        errors.print("  " + state.toDescriptiveString());
635      }
636    }
637
638    // Determine what's deployed
639    processRegionServers(regionServers);
640  }
641
642  /**
643   * Clear the current state of hbck.
644   */
645  private void clearState() {
646    // Make sure regionInfo is empty before starting
647    fixes = 0;
648    regionInfoMap.clear();
649    emptyRegionInfoQualifiers.clear();
650    tableStates.clear();
651    errors.clear();
652    tablesInfo.clear();
653    orphanHdfsDirs.clear();
654    skippedRegions.clear();
655  }
656
657  /**
658   * This repair method analyzes hbase data in hdfs and repairs it to satisfy
659   * the table integrity rules.  HBase doesn't need to be online for this
660   * operation to work.
661   */
662  public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
663    // Initial pass to fix orphans.
664    if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
665        || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
666      LOG.info("Loading regioninfos HDFS");
667      // if nothing is happening this should always complete in two iterations.
668      int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
669      int curIter = 0;
670      do {
671        clearState(); // clears hbck state and reset fixes to 0 and.
672        // repair what's on HDFS
673        restoreHdfsIntegrity();
674        curIter++;// limit the number of iterations.
675      } while (fixes > 0 && curIter <= maxIterations);
676
677      // Repairs should be done in the first iteration and verification in the second.
678      // If there are more than 2 passes, something funny has happened.
679      if (curIter > 2) {
680        if (curIter == maxIterations) {
681          LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
682              + "Tables integrity may not be fully repaired!");
683        } else {
684          LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
685        }
686      }
687    }
688  }
689
690  /**
691   * This repair method requires the cluster to be online since it contacts
692   * region servers and the masters.  It makes each region's state in HDFS, in
693   * hbase:meta, and deployments consistent.
694   *
695   * @return If &gt; 0 , number of errors detected, if &lt; 0 there was an unrecoverable
696   *     error.  If 0, we have a clean hbase.
697   */
698  public int onlineConsistencyRepair() throws IOException, KeeperException,
699    InterruptedException {
700
701    // get regions according to what is online on each RegionServer
702    loadDeployedRegions();
703    // check whether hbase:meta is deployed and online
704    recordMetaRegion();
705    // Check if hbase:meta is found only once and in the right place
706    if (!checkMetaRegion()) {
707      String errorMsg = "hbase:meta table is not consistent. ";
708      if (shouldFixAssignments()) {
709        errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
710      } else {
711        errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
712      }
713      errors.reportError(errorMsg + " Exiting...");
714      return -2;
715    }
716    // Not going with further consistency check for tables when hbase:meta itself is not consistent.
717    LOG.info("Loading regionsinfo from the hbase:meta table");
718    boolean success = loadMetaEntries();
719    if (!success) return -1;
720
721    // Empty cells in hbase:meta?
722    reportEmptyMetaCells();
723
724    // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
725    if (shouldFixEmptyMetaCells()) {
726      fixEmptyMetaCells();
727    }
728
729    // get a list of all tables that have not changed recently.
730    if (!checkMetaOnly) {
731      reportTablesInFlux();
732    }
733
734    // Get disabled tables states
735    loadTableStates();
736
737    // load regiondirs and regioninfos from HDFS
738    if (shouldCheckHdfs()) {
739      LOG.info("Loading region directories from HDFS");
740      loadHdfsRegionDirs();
741      LOG.info("Loading region information from HDFS");
742      loadHdfsRegionInfos();
743    }
744
745    // fix the orphan tables
746    fixOrphanTables();
747
748    LOG.info("Checking and fixing region consistency");
749    // Check and fix consistency
750    checkAndFixConsistency();
751
752    // Check integrity (does not fix)
753    checkIntegrity();
754    return errors.getErrorList().size();
755  }
756
757  /**
758   * This method maintains an ephemeral znode. If the creation fails we return false or throw
759   * exception
760   *
761   * @return true if creating znode succeeds; false otherwise
762   * @throws IOException if IO failure occurs
763   */
764  private boolean setMasterInMaintenanceMode() throws IOException {
765    RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
766    hbckEphemeralNodePath = ZNodePaths.joinZNode(
767      zkw.getZNodePaths().masterMaintZNode,
768      "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
769    do {
770      try {
771        hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
772        if (hbckZodeCreated) {
773          break;
774        }
775      } catch (KeeperException e) {
776        if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
777           throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
778        }
779        // fall through and retry
780      }
781
782      LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try=" +
783          (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
784
785      try {
786        retryCounter.sleepUntilNextRetry();
787      } catch (InterruptedException ie) {
788        throw (InterruptedIOException) new InterruptedIOException(
789              "Can't create znode " + hbckEphemeralNodePath).initCause(ie);
790      }
791    } while (retryCounter.shouldRetry());
792    return hbckZodeCreated;
793  }
794
795  private void cleanupHbckZnode() {
796    try {
797      if (zkw != null && hbckZodeCreated) {
798        ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
799        hbckZodeCreated = false;
800      }
801    } catch (KeeperException e) {
802      // Ignore
803      if (!e.code().equals(KeeperException.Code.NONODE)) {
804        LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
805      }
806    }
807  }
808
809  /**
810   * Contacts the master and prints out cluster-wide information
811   * @return 0 on success, non-zero on failure
812   */
813  public int onlineHbck()
814      throws IOException, KeeperException, InterruptedException, ReplicationException {
815    // print hbase server version
816    errors.print("Version: " + status.getHBaseVersion());
817
818    // Clean start
819    clearState();
820    // Do offline check and repair first
821    offlineHdfsIntegrityRepair();
822    offlineReferenceFileRepair();
823    offlineHLinkFileRepair();
824    // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
825    // hbck, it is likely that hbck would be misled and report transient errors.  Therefore, it
826    // is better to set Master into maintenance mode during online hbck.
827    //
828    if (!setMasterInMaintenanceMode()) {
829      LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
830        + "error.  Please run HBCK multiple times to reduce the chance of transient error.");
831    }
832
833    onlineConsistencyRepair();
834
835    if (checkRegionBoundaries) {
836      checkRegionBoundaries();
837    }
838
839    checkAndFixReplication();
840
841    cleanReplicationBarrier();
842
843    // Remove the hbck znode
844    cleanupHbckZnode();
845
846    // Remove the hbck lock
847    unlockHbck();
848
849    // Print table summary
850    printTableSummary(tablesInfo);
851    return errors.summarize();
852  }
853
854  public static byte[] keyOnly (byte[] b) {
855    if (b == null)
856      return b;
857    int rowlength = Bytes.toShort(b, 0);
858    byte[] result = new byte[rowlength];
859    System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
860    return result;
861  }
862
863  @Override
864  public void close() throws IOException {
865    try {
866      cleanupHbckZnode();
867      unlockHbck();
868    } catch (Exception io) {
869      LOG.warn(io.toString(), io);
870    } finally {
871      if (zkw != null) {
872        zkw.close();
873        zkw = null;
874      }
875      IOUtils.closeQuietly(admin);
876      IOUtils.closeQuietly(meta);
877      IOUtils.closeQuietly(connection);
878    }
879  }
880
881  private static class RegionBoundariesInformation {
882    public byte [] regionName;
883    public byte [] metaFirstKey;
884    public byte [] metaLastKey;
885    public byte [] storesFirstKey;
886    public byte [] storesLastKey;
887    @Override
888    public String toString () {
889      return "regionName=" + Bytes.toStringBinary(regionName) +
890             "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
891             "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
892             "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
893             "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
894    }
895  }
896
897  public void checkRegionBoundaries() {
898    try {
899      ByteArrayComparator comparator = new ByteArrayComparator();
900      List<RegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
901      final RegionBoundariesInformation currentRegionBoundariesInformation =
902          new RegionBoundariesInformation();
903      Path hbaseRoot = FSUtils.getRootDir(getConf());
904      for (RegionInfo regionInfo : regions) {
905        Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
906        currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
907        // For each region, get the start and stop key from the META and compare them to the
908        // same information from the Stores.
909        Path path = new Path(tableDir, regionInfo.getEncodedName());
910        FileSystem fs = path.getFileSystem(getConf());
911        FileStatus[] files = fs.listStatus(path);
912        // For all the column families in this region...
913        byte[] storeFirstKey = null;
914        byte[] storeLastKey = null;
915        for (FileStatus file : files) {
916          String fileName = file.getPath().toString();
917          fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
918          if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
919            FileStatus[] storeFiles = fs.listStatus(file.getPath());
920            // For all the stores in this column family.
921            for (FileStatus storeFile : storeFiles) {
922              HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(),
923                CacheConfig.DISABLED, true, getConf());
924              if ((reader.getFirstKey() != null)
925                  && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
926                      ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey()) > 0))) {
927                storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey().get()).getKey();
928              }
929              if ((reader.getLastKey() != null)
930                  && ((storeLastKey == null) || (comparator.compare(storeLastKey,
931                      ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey())) < 0)) {
932                storeLastKey = ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey();
933              }
934              reader.close();
935            }
936          }
937        }
938        currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
939        currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
940        currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
941        currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
942        if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
943          currentRegionBoundariesInformation.metaFirstKey = null;
944        if (currentRegionBoundariesInformation.metaLastKey.length == 0)
945          currentRegionBoundariesInformation.metaLastKey = null;
946
947        // For a region to be correct, we need the META start key to be smaller or equal to the
948        // smallest start key from all the stores, and the start key from the next META entry to
949        // be bigger than the last key from all the current stores. First region start key is null;
950        // Last region end key is null; some regions can be empty and not have any store.
951
952        boolean valid = true;
953        // Checking start key.
954        if ((currentRegionBoundariesInformation.storesFirstKey != null)
955            && (currentRegionBoundariesInformation.metaFirstKey != null)) {
956          valid = valid
957              && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
958                currentRegionBoundariesInformation.metaFirstKey) >= 0;
959        }
960        // Checking stop key.
961        if ((currentRegionBoundariesInformation.storesLastKey != null)
962            && (currentRegionBoundariesInformation.metaLastKey != null)) {
963          valid = valid
964              && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
965                currentRegionBoundariesInformation.metaLastKey) < 0;
966        }
967        if (!valid) {
968          errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
969            tablesInfo.get(regionInfo.getTable()));
970          LOG.warn("Region's boundaries not aligned between stores and META for:");
971          LOG.warn(Objects.toString(currentRegionBoundariesInformation));
972        }
973      }
974    } catch (IOException e) {
975      LOG.error(e.toString(), e);
976    }
977  }
978
979  /**
980   * Iterates through the list of all orphan/invalid regiondirs.
981   */
982  private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
983    for (HbckInfo hi : orphanHdfsDirs) {
984      LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
985      adoptHdfsOrphan(hi);
986    }
987  }
988
989  /**
990   * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
991   * these orphans by creating a new region, and moving the column families,
992   * recovered edits, WALs, into the new region dir.  We determine the region
993   * startkey and endkeys by looking at all of the hfiles inside the column
994   * families to identify the min and max keys. The resulting region will
995   * likely violate table integrity but will be dealt with by merging
996   * overlapping regions.
997   */
998  @SuppressWarnings("deprecation")
999  private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
1000    Path p = hi.getHdfsRegionDir();
1001    FileSystem fs = p.getFileSystem(getConf());
1002    FileStatus[] dirs = fs.listStatus(p);
1003    if (dirs == null) {
1004      LOG.warn("Attempt to adopt orphan hdfs region skipped because no files present in " +
1005          p + ". This dir could probably be deleted.");
1006      return ;
1007    }
1008
1009    TableName tableName = hi.getTableName();
1010    TableInfo tableInfo = tablesInfo.get(tableName);
1011    Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
1012    TableDescriptor template = tableInfo.getHTD();
1013
1014    // find min and max key values
1015    Pair<byte[],byte[]> orphanRegionRange = null;
1016    for (FileStatus cf : dirs) {
1017      String cfName= cf.getPath().getName();
1018      // TODO Figure out what the special dirs are
1019      if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
1020
1021      FileStatus[] hfiles = fs.listStatus(cf.getPath());
1022      for (FileStatus hfile : hfiles) {
1023        byte[] start, end;
1024        HFile.Reader hf = null;
1025        try {
1026          hf = HFile.createReader(fs, hfile.getPath(), CacheConfig.DISABLED, true, getConf());
1027          hf.loadFileInfo();
1028          Optional<Cell> startKv = hf.getFirstKey();
1029          start = CellUtil.cloneRow(startKv.get());
1030          Optional<Cell> endKv = hf.getLastKey();
1031          end = CellUtil.cloneRow(endKv.get());
1032        } catch (IOException ioe) {
1033          LOG.warn("Problem reading orphan file " + hfile + ", skipping");
1034          continue;
1035        } catch (NullPointerException ioe) {
1036          LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
1037          continue;
1038        } finally {
1039          if (hf != null) {
1040            hf.close();
1041          }
1042        }
1043
1044        // expand the range to include the range of all hfiles
1045        if (orphanRegionRange == null) {
1046          // first range
1047          orphanRegionRange = new Pair<>(start, end);
1048        } else {
1049          // TODO add test
1050
1051          // expand range only if the hfile is wider.
1052          if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
1053            orphanRegionRange.setFirst(start);
1054          }
1055          if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
1056            orphanRegionRange.setSecond(end);
1057          }
1058        }
1059      }
1060    }
1061    if (orphanRegionRange == null) {
1062      LOG.warn("No data in dir " + p + ", sidelining data");
1063      fixes++;
1064      sidelineRegionDir(fs, hi);
1065      return;
1066    }
1067    LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
1068        Bytes.toString(orphanRegionRange.getSecond()) + ")");
1069
1070    // create new region on hdfs. move data into place.
1071    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(template.getTableName())
1072        .setStartKey(orphanRegionRange.getFirst())
1073        .setEndKey(Bytes.add(orphanRegionRange.getSecond(), new byte[1]))
1074        .build();
1075    LOG.info("Creating new region : " + regionInfo);
1076    HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), regionInfo, template);
1077    Path target = region.getRegionFileSystem().getRegionDir();
1078
1079    // rename all the data to new region
1080    mergeRegionDirs(target, hi);
1081    fixes++;
1082  }
1083
1084  /**
1085   * This method determines if there are table integrity errors in HDFS.  If
1086   * there are errors and the appropriate "fix" options are enabled, the method
1087   * will first correct orphan regions making them into legit regiondirs, and
1088   * then reload to merge potentially overlapping regions.
1089   *
1090   * @return number of table integrity errors found
1091   */
1092  private int restoreHdfsIntegrity() throws IOException, InterruptedException {
1093    // Determine what's on HDFS
1094    LOG.info("Loading HBase regioninfo from HDFS...");
1095    loadHdfsRegionDirs(); // populating regioninfo table.
1096
1097    int errs = errors.getErrorList().size();
1098    // First time just get suggestions.
1099    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1100    checkHdfsIntegrity(false, false);
1101
1102    if (errors.getErrorList().size() == errs) {
1103      LOG.info("No integrity errors.  We are done with this phase. Glorious.");
1104      return 0;
1105    }
1106
1107    if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
1108      adoptHdfsOrphans(orphanHdfsDirs);
1109      // TODO optimize by incrementally adding instead of reloading.
1110    }
1111
1112    // Make sure there are no holes now.
1113    if (shouldFixHdfsHoles()) {
1114      clearState(); // this also resets # fixes.
1115      loadHdfsRegionDirs();
1116      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1117      tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1118    }
1119
1120    // Now we fix overlaps
1121    if (shouldFixHdfsOverlaps()) {
1122      // second pass we fix overlaps.
1123      clearState(); // this also resets # fixes.
1124      loadHdfsRegionDirs();
1125      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1126      tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1127    }
1128
1129    return errors.getErrorList().size();
1130  }
1131
1132  /**
1133   * Scan all the store file names to find any lingering reference files,
1134   * which refer to some none-exiting files. If "fix" option is enabled,
1135   * any lingering reference file will be sidelined if found.
1136   * <p>
1137   * Lingering reference file prevents a region from opening. It has to
1138   * be fixed before a cluster can start properly.
1139   */
1140  private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1141    clearState();
1142    Configuration conf = getConf();
1143    Path hbaseRoot = FSUtils.getRootDir(conf);
1144    FileSystem fs = hbaseRoot.getFileSystem(conf);
1145    LOG.info("Computing mapping of all store files");
1146    Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1147      new FSUtils.ReferenceFileFilter(fs), executor, errors);
1148    errors.print("");
1149    LOG.info("Validating mapping using HDFS state");
1150    for (Path path: allFiles.values()) {
1151      Path referredToFile = StoreFileInfo.getReferredToFile(path);
1152      if (fs.exists(referredToFile)) continue;  // good, expected
1153
1154      // Found a lingering reference file
1155      errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1156        "Found lingering reference file " + path);
1157      if (!shouldFixReferenceFiles()) continue;
1158
1159      // Now, trying to fix it since requested
1160      boolean success = false;
1161      String pathStr = path.toString();
1162
1163      // A reference file path should be like
1164      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1165      // Up 5 directories to get the root folder.
1166      // So the file will be sidelined to a similar folder structure.
1167      int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1168      for (int i = 0; index > 0 && i < 5; i++) {
1169        index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1170      }
1171      if (index > 0) {
1172        Path rootDir = getSidelineDir();
1173        Path dst = new Path(rootDir, pathStr.substring(index + 1));
1174        fs.mkdirs(dst.getParent());
1175        LOG.info("Trying to sideline reference file "
1176          + path + " to " + dst);
1177        setShouldRerun();
1178
1179        success = fs.rename(path, dst);
1180        debugLsr(dst);
1181
1182      }
1183      if (!success) {
1184        LOG.error("Failed to sideline reference file " + path);
1185      }
1186    }
1187  }
1188
1189  /**
1190   * Scan all the store file names to find any lingering HFileLink files,
1191   * which refer to some none-exiting files. If "fix" option is enabled,
1192   * any lingering HFileLink file will be sidelined if found.
1193   */
1194  private void offlineHLinkFileRepair() throws IOException, InterruptedException {
1195    Configuration conf = getConf();
1196    Path hbaseRoot = FSUtils.getRootDir(conf);
1197    FileSystem fs = hbaseRoot.getFileSystem(conf);
1198    LOG.info("Computing mapping of all link files");
1199    Map<String, Path> allFiles = FSUtils
1200        .getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors);
1201    errors.print("");
1202
1203    LOG.info("Validating mapping using HDFS state");
1204    for (Path path : allFiles.values()) {
1205      // building HFileLink object to gather locations
1206      HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
1207      if (actualLink.exists(fs)) continue; // good, expected
1208
1209      // Found a lingering HFileLink
1210      errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
1211      if (!shouldFixHFileLinks()) continue;
1212
1213      // Now, trying to fix it since requested
1214      setShouldRerun();
1215
1216      // An HFileLink path should be like
1217      // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
1218      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1219      boolean success = sidelineFile(fs, hbaseRoot, path);
1220
1221      if (!success) {
1222        LOG.error("Failed to sideline HFileLink file " + path);
1223      }
1224
1225      // An HFileLink backreference path should be like
1226      // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
1227      // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1228      Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil
1229              .getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()),
1230                  HFileLink.getReferencedRegionName(path.getName().toString()),
1231                  path.getParent().getName()),
1232          HFileLink.getReferencedHFileName(path.getName().toString()));
1233      success = sidelineFile(fs, hbaseRoot, backRefPath);
1234
1235      if (!success) {
1236        LOG.error("Failed to sideline HFileLink backreference file " + path);
1237      }
1238    }
1239  }
1240
1241  private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
1242    URI uri = hbaseRoot.toUri().relativize(path.toUri());
1243    if (uri.isAbsolute()) return false;
1244    String relativePath = uri.getPath();
1245    Path rootDir = getSidelineDir();
1246    Path dst = new Path(rootDir, relativePath);
1247    boolean pathCreated = fs.mkdirs(dst.getParent());
1248    if (!pathCreated) {
1249      LOG.error("Failed to create path: " + dst.getParent());
1250      return false;
1251    }
1252    LOG.info("Trying to sideline file " + path + " to " + dst);
1253    return fs.rename(path, dst);
1254  }
1255
1256  /**
1257   * TODO -- need to add tests for this.
1258   */
1259  private void reportEmptyMetaCells() {
1260    errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1261      emptyRegionInfoQualifiers.size());
1262    if (details) {
1263      for (Result r: emptyRegionInfoQualifiers) {
1264        errors.print("  " + r);
1265      }
1266    }
1267  }
1268
1269  /**
1270   * TODO -- need to add tests for this.
1271   */
1272  private void reportTablesInFlux() {
1273    AtomicInteger numSkipped = new AtomicInteger(0);
1274    TableDescriptor[] allTables = getTables(numSkipped);
1275    errors.print("Number of Tables: " + allTables.length);
1276    if (details) {
1277      if (numSkipped.get() > 0) {
1278        errors.detail("Number of Tables in flux: " + numSkipped.get());
1279      }
1280      for (TableDescriptor td : allTables) {
1281        errors.detail("  Table: " + td.getTableName() + "\t" +
1282                           (td.isReadOnly() ? "ro" : "rw") + "\t" +
1283                            (td.isMetaRegion() ? "META" : "    ") + "\t" +
1284                           " families: " + td.getColumnFamilyCount());
1285      }
1286    }
1287  }
1288
1289  public ErrorReporter getErrors() {
1290    return errors;
1291  }
1292
1293  /**
1294   * Read the .regioninfo file from the file system.  If there is no
1295   * .regioninfo, add it to the orphan hdfs region list.
1296   */
1297  private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
1298    Path regionDir = hbi.getHdfsRegionDir();
1299    if (regionDir == null) {
1300      if (hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1301        // Log warning only for default/ primary replica with no region dir
1302        LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
1303      }
1304      return;
1305    }
1306
1307    if (hbi.hdfsEntry.hri != null) {
1308      // already loaded data
1309      return;
1310    }
1311
1312    FileSystem fs = FileSystem.get(getConf());
1313    RegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
1314    LOG.debug("RegionInfo read: " + hri.toString());
1315    hbi.hdfsEntry.hri = hri;
1316  }
1317
1318  /**
1319   * Exception thrown when a integrity repair operation fails in an
1320   * unresolvable way.
1321   */
1322  public static class RegionRepairException extends IOException {
1323    private static final long serialVersionUID = 1L;
1324    final IOException ioe;
1325    public RegionRepairException(String s, IOException ioe) {
1326      super(s);
1327      this.ioe = ioe;
1328    }
1329  }
1330
1331  /**
1332   * Populate hbi's from regionInfos loaded from file system.
1333   */
1334  private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1335      throws IOException, InterruptedException {
1336    tablesInfo.clear(); // regenerating the data
1337    // generate region split structure
1338    Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1339
1340    // Parallelized read of .regioninfo files.
1341    List<WorkItemHdfsRegionInfo> hbis = new ArrayList<>(hbckInfos.size());
1342    List<Future<Void>> hbiFutures;
1343
1344    for (HbckInfo hbi : hbckInfos) {
1345      WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1346      hbis.add(work);
1347    }
1348
1349    // Submit and wait for completion
1350    hbiFutures = executor.invokeAll(hbis);
1351
1352    for(int i=0; i<hbiFutures.size(); i++) {
1353      WorkItemHdfsRegionInfo work = hbis.get(i);
1354      Future<Void> f = hbiFutures.get(i);
1355      try {
1356        f.get();
1357      } catch(ExecutionException e) {
1358        LOG.warn("Failed to read .regioninfo file for region " +
1359              work.hbi.getRegionNameAsString(), e.getCause());
1360      }
1361    }
1362
1363    Path hbaseRoot = FSUtils.getRootDir(getConf());
1364    FileSystem fs = hbaseRoot.getFileSystem(getConf());
1365    // serialized table info gathering.
1366    for (HbckInfo hbi: hbckInfos) {
1367
1368      if (hbi.getHdfsHRI() == null) {
1369        // was an orphan
1370        continue;
1371      }
1372
1373
1374      // get table name from hdfs, populate various HBaseFsck tables.
1375      TableName tableName = hbi.getTableName();
1376      if (tableName == null) {
1377        // There was an entry in hbase:meta not in the HDFS?
1378        LOG.warn("tableName was null for: " + hbi);
1379        continue;
1380      }
1381
1382      TableInfo modTInfo = tablesInfo.get(tableName);
1383      if (modTInfo == null) {
1384        // only executed once per table.
1385        modTInfo = new TableInfo(tableName);
1386        tablesInfo.put(tableName, modTInfo);
1387        try {
1388          TableDescriptor htd =
1389              FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1390          modTInfo.htds.add(htd);
1391        } catch (IOException ioe) {
1392          if (!orphanTableDirs.containsKey(tableName)) {
1393            LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1394            //should only report once for each table
1395            errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1396                "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1397            Set<String> columns = new HashSet<>();
1398            orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1399          }
1400        }
1401      }
1402      if (!hbi.isSkipChecks()) {
1403        modTInfo.addRegionInfo(hbi);
1404      }
1405    }
1406
1407    loadTableInfosForTablesWithNoRegion();
1408    errors.print("");
1409
1410    return tablesInfo;
1411  }
1412
1413  /**
1414   * To get the column family list according to the column family dirs
1415   * @param columns
1416   * @param hbi
1417   * @return a set of column families
1418   * @throws IOException
1419   */
1420  private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1421    Path regionDir = hbi.getHdfsRegionDir();
1422    FileSystem fs = regionDir.getFileSystem(getConf());
1423    FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1424    for (FileStatus subdir : subDirs) {
1425      String columnfamily = subdir.getPath().getName();
1426      columns.add(columnfamily);
1427    }
1428    return columns;
1429  }
1430
1431  /**
1432   * To fabricate a .tableinfo file with following contents<br>
1433   * 1. the correct tablename <br>
1434   * 2. the correct colfamily list<br>
1435   * 3. the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1436   * @throws IOException
1437   */
1438  private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1439      Set<String> columns) throws IOException {
1440    if (columns ==null || columns.isEmpty()) return false;
1441    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
1442    for (String columnfamimly : columns) {
1443      builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(columnfamimly));
1444    }
1445    fstd.createTableDescriptor(builder.build(), true);
1446    return true;
1447  }
1448
1449  /**
1450   * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1451   * @throws IOException
1452   */
1453  public void fixEmptyMetaCells() throws IOException {
1454    if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1455      LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1456      for (Result region : emptyRegionInfoQualifiers) {
1457        deleteMetaRegion(region.getRow());
1458        errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1459      }
1460      emptyRegionInfoQualifiers.clear();
1461    }
1462  }
1463
1464  /**
1465   * To fix orphan table by creating a .tableinfo file under tableDir <br>
1466   * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1467   * 2. else create a default .tableinfo file with following items<br>
1468   * &nbsp;2.1 the correct tablename <br>
1469   * &nbsp;2.2 the correct colfamily list<br>
1470   * &nbsp;2.3 the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1471   * @throws IOException
1472   */
1473  public void fixOrphanTables() throws IOException {
1474    if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1475
1476      List<TableName> tmpList = new ArrayList<>(orphanTableDirs.keySet().size());
1477      tmpList.addAll(orphanTableDirs.keySet());
1478      TableDescriptor[] htds = getTableDescriptors(tmpList);
1479      Iterator<Entry<TableName, Set<String>>> iter =
1480          orphanTableDirs.entrySet().iterator();
1481      int j = 0;
1482      int numFailedCase = 0;
1483      FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1484      while (iter.hasNext()) {
1485        Entry<TableName, Set<String>> entry =
1486            iter.next();
1487        TableName tableName = entry.getKey();
1488        LOG.info("Trying to fix orphan table error: " + tableName);
1489        if (j < htds.length) {
1490          if (tableName.equals(htds[j].getTableName())) {
1491            TableDescriptor htd = htds[j];
1492            LOG.info("fixing orphan table: " + tableName + " from cache");
1493            fstd.createTableDescriptor(htd, true);
1494            j++;
1495            iter.remove();
1496          }
1497        } else {
1498          if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1499            LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1500            LOG.warn("Strongly recommend to modify the TableDescriptor if necessary for: " + tableName);
1501            iter.remove();
1502          } else {
1503            LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1504            numFailedCase++;
1505          }
1506        }
1507        fixes++;
1508      }
1509
1510      if (orphanTableDirs.isEmpty()) {
1511        // all orphanTableDirs are luckily recovered
1512        // re-run doFsck after recovering the .tableinfo file
1513        setShouldRerun();
1514        LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1515      } else if (numFailedCase > 0) {
1516        LOG.error("Failed to fix " + numFailedCase
1517            + " OrphanTables with default .tableinfo files");
1518      }
1519
1520    }
1521    //cleanup the list
1522    orphanTableDirs.clear();
1523
1524  }
1525
1526  /**
1527   * This borrows code from MasterFileSystem.bootstrap(). Explicitly creates it's own WAL, so be
1528   * sure to close it as well as the region when you're finished.
1529   * @param walFactoryID A unique identifier for WAL factory. Filesystem implementations will use
1530   *          this ID to make a directory inside WAL directory path.
1531   * @return an open hbase:meta HRegion
1532   */
1533  private HRegion createNewMeta(String walFactoryID) throws IOException {
1534    Path rootdir = FSUtils.getRootDir(getConf());
1535    Configuration c = getConf();
1536    RegionInfo metaHRI = RegionInfoBuilder.FIRST_META_REGIONINFO;
1537    TableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1538    MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1539    // The WAL subsystem will use the default rootDir rather than the passed in rootDir
1540    // unless I pass along via the conf.
1541    Configuration confForWAL = new Configuration(c);
1542    confForWAL.set(HConstants.HBASE_DIR, rootdir.toString());
1543    WAL wal = new WALFactory(confForWAL, walFactoryID).getWAL(metaHRI);
1544    HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor, wal);
1545    MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1546    return meta;
1547  }
1548
1549  /**
1550   * Generate set of puts to add to new meta.  This expects the tables to be
1551   * clean with no overlaps or holes.  If there are any problems it returns null.
1552   *
1553   * @return An array list of puts to do in bulk, null if tables have problems
1554   */
1555  private ArrayList<Put> generatePuts(SortedMap<TableName, TableInfo> tablesInfo)
1556      throws IOException {
1557    ArrayList<Put> puts = new ArrayList<>();
1558    boolean hasProblems = false;
1559    for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1560      TableName name = e.getKey();
1561
1562      // skip "hbase:meta"
1563      if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1564        continue;
1565      }
1566
1567      TableInfo ti = e.getValue();
1568      puts.add(MetaTableAccessor.makePutFromTableState(
1569        new TableState(ti.tableName, TableState.State.ENABLED),
1570        EnvironmentEdgeManager.currentTime()));
1571      for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1572          .entrySet()) {
1573        Collection<HbckInfo> his = spl.getValue();
1574        int sz = his.size();
1575        if (sz != 1) {
1576          // problem
1577          LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1578              + " had " +  sz + " regions instead of exactly 1." );
1579          hasProblems = true;
1580          continue;
1581        }
1582
1583        // add the row directly to meta.
1584        HbckInfo hi = his.iterator().next();
1585        RegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
1586        Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime());
1587        puts.add(p);
1588      }
1589    }
1590    return hasProblems ? null : puts;
1591  }
1592
1593  /**
1594   * Suggest fixes for each table
1595   */
1596  private void suggestFixes(
1597      SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1598    logParallelMerge();
1599    for (TableInfo tInfo : tablesInfo.values()) {
1600      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1601      tInfo.checkRegionChain(handler);
1602    }
1603  }
1604
1605  /**
1606   * Rebuilds meta from information in hdfs/fs.  Depends on configuration settings passed into
1607   * hbck constructor to point to a particular fs/dir. Assumes HBase is OFFLINE.
1608   *
1609   * @param fix flag that determines if method should attempt to fix holes
1610   * @return true if successful, false if attempt failed.
1611   */
1612  public boolean rebuildMeta(boolean fix) throws IOException,
1613      InterruptedException {
1614
1615    // TODO check to make sure hbase is offline. (or at least the table
1616    // currently being worked on is off line)
1617
1618    // Determine what's on HDFS
1619    LOG.info("Loading HBase regioninfo from HDFS...");
1620    loadHdfsRegionDirs(); // populating regioninfo table.
1621
1622    int errs = errors.getErrorList().size();
1623    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1624    checkHdfsIntegrity(false, false);
1625
1626    // make sure ok.
1627    if (errors.getErrorList().size() != errs) {
1628      // While in error state, iterate until no more fixes possible
1629      while(true) {
1630        fixes = 0;
1631        suggestFixes(tablesInfo);
1632        errors.clear();
1633        loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1634        checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1635
1636        int errCount = errors.getErrorList().size();
1637
1638        if (fixes == 0) {
1639          if (errCount > 0) {
1640            return false; // failed to fix problems.
1641          } else {
1642            break; // no fixes and no problems? drop out and fix stuff!
1643          }
1644        }
1645      }
1646    }
1647
1648    // we can rebuild, move old meta out of the way and start
1649    LOG.info("HDFS regioninfo's seems good.  Sidelining old hbase:meta");
1650    Path backupDir = sidelineOldMeta();
1651
1652    LOG.info("Creating new hbase:meta");
1653    String walFactoryId = "hbck-meta-recovery-" + RandomStringUtils.randomNumeric(8);
1654    HRegion meta = createNewMeta(walFactoryId);
1655
1656    // populate meta
1657    List<Put> puts = generatePuts(tablesInfo);
1658    if (puts == null) {
1659      LOG.error(HBaseMarkers.FATAL, "Problem encountered when creating new hbase:meta "
1660          + "entries. You may need to restore the previously sidelined hbase:meta");
1661      return false;
1662    }
1663    meta.batchMutate(puts.toArray(new Put[puts.size()]), HConstants.NO_NONCE, HConstants.NO_NONCE);
1664    meta.close();
1665    if (meta.getWAL() != null) {
1666      meta.getWAL().close();
1667    }
1668    // clean up the temporary hbck meta recovery WAL directory
1669    removeHBCKMetaRecoveryWALDir(walFactoryId);
1670    LOG.info("Success! hbase:meta table rebuilt.");
1671    LOG.info("Old hbase:meta is moved into " + backupDir);
1672    return true;
1673  }
1674
1675  /**
1676   * Removes the empty Meta recovery WAL directory.
1677   * @param walFactoryId A unique identifier for WAL factory which was used by Filesystem to make a
1678   *          Meta recovery WAL directory inside WAL directory path.
1679   */
1680  private void removeHBCKMetaRecoveryWALDir(String walFactoryId) throws IOException {
1681    Path walLogDir = new Path(new Path(CommonFSUtils.getWALRootDir(getConf()),
1682          HConstants.HREGION_LOGDIR_NAME), walFactoryId);
1683    FileSystem fs = CommonFSUtils.getWALFileSystem(getConf());
1684    FileStatus[] walFiles = FSUtils.listStatus(fs, walLogDir, null);
1685    if (walFiles == null || walFiles.length == 0) {
1686      LOG.info("HBCK meta recovery WAL directory is empty, removing it now.");
1687      if (!FSUtils.deleteDirectory(fs, walLogDir)) {
1688        LOG.warn("Couldn't clear the HBCK Meta recovery WAL directory " + walLogDir);
1689      }
1690    }
1691  }
1692
1693  /**
1694   * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1695   */
1696  private void logParallelMerge() {
1697    if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1698      LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1699          " false to run serially.");
1700    } else {
1701      LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
1702          " true to run in parallel.");
1703    }
1704  }
1705
1706  private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1707      boolean fixOverlaps) throws IOException {
1708    LOG.info("Checking HBase region split map from HDFS data...");
1709    logParallelMerge();
1710    for (TableInfo tInfo : tablesInfo.values()) {
1711      TableIntegrityErrorHandler handler;
1712      if (fixHoles || fixOverlaps) {
1713        handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1714          fixHoles, fixOverlaps);
1715      } else {
1716        handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1717      }
1718      if (!tInfo.checkRegionChain(handler)) {
1719        // should dump info as well.
1720        errors.report("Found inconsistency in table " + tInfo.getName());
1721      }
1722    }
1723    return tablesInfo;
1724  }
1725
1726  private Path getSidelineDir() throws IOException {
1727    if (sidelineDir == null) {
1728      Path hbaseDir = FSUtils.getRootDir(getConf());
1729      Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1730      sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1731          + startMillis);
1732    }
1733    return sidelineDir;
1734  }
1735
1736  /**
1737   * Sideline a region dir (instead of deleting it)
1738   */
1739  Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1740    return sidelineRegionDir(fs, null, hi);
1741  }
1742
1743  /**
1744   * Sideline a region dir (instead of deleting it)
1745   *
1746   * @param parentDir if specified, the region will be sidelined to folder like
1747   *     {@literal .../parentDir/<table name>/<region name>}. The purpose is to group together
1748   *     similar regions sidelined, for example, those regions should be bulk loaded back later
1749   *     on. If NULL, it is ignored.
1750   */
1751  Path sidelineRegionDir(FileSystem fs,
1752      String parentDir, HbckInfo hi) throws IOException {
1753    TableName tableName = hi.getTableName();
1754    Path regionDir = hi.getHdfsRegionDir();
1755
1756    if (!fs.exists(regionDir)) {
1757      LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1758      return null;
1759    }
1760
1761    Path rootDir = getSidelineDir();
1762    if (parentDir != null) {
1763      rootDir = new Path(rootDir, parentDir);
1764    }
1765    Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1766    Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1767    fs.mkdirs(sidelineRegionDir);
1768    boolean success = false;
1769    FileStatus[] cfs =  fs.listStatus(regionDir);
1770    if (cfs == null) {
1771      LOG.info("Region dir is empty: " + regionDir);
1772    } else {
1773      for (FileStatus cf : cfs) {
1774        Path src = cf.getPath();
1775        Path dst =  new Path(sidelineRegionDir, src.getName());
1776        if (fs.isFile(src)) {
1777          // simple file
1778          success = fs.rename(src, dst);
1779          if (!success) {
1780            String msg = "Unable to rename file " + src +  " to " + dst;
1781            LOG.error(msg);
1782            throw new IOException(msg);
1783          }
1784          continue;
1785        }
1786
1787        // is a directory.
1788        fs.mkdirs(dst);
1789
1790        LOG.info("Sidelining files from " + src + " into containing region " + dst);
1791        // FileSystem.rename is inconsistent with directories -- if the
1792        // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1793        // it moves the src into the dst dir resulting in (foo/a/b).  If
1794        // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1795        FileStatus[] hfiles = fs.listStatus(src);
1796        if (hfiles != null && hfiles.length > 0) {
1797          for (FileStatus hfile : hfiles) {
1798            success = fs.rename(hfile.getPath(), dst);
1799            if (!success) {
1800              String msg = "Unable to rename file " + src +  " to " + dst;
1801              LOG.error(msg);
1802              throw new IOException(msg);
1803            }
1804          }
1805        }
1806        LOG.debug("Sideline directory contents:");
1807        debugLsr(sidelineRegionDir);
1808      }
1809    }
1810
1811    LOG.info("Removing old region dir: " + regionDir);
1812    success = fs.delete(regionDir, true);
1813    if (!success) {
1814      String msg = "Unable to delete dir " + regionDir;
1815      LOG.error(msg);
1816      throw new IOException(msg);
1817    }
1818    return sidelineRegionDir;
1819  }
1820
1821  /**
1822   * Side line an entire table.
1823   */
1824  void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1825      Path backupHbaseDir) throws IOException {
1826    Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1827    if (fs.exists(tableDir)) {
1828      Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1829      fs.mkdirs(backupTableDir.getParent());
1830      boolean success = fs.rename(tableDir, backupTableDir);
1831      if (!success) {
1832        throw new IOException("Failed to move  " + tableName + " from "
1833            +  tableDir + " to " + backupTableDir);
1834      }
1835    } else {
1836      LOG.info("No previous " + tableName +  " exists.  Continuing.");
1837    }
1838  }
1839
1840  /**
1841   * @return Path to backup of original directory
1842   */
1843  Path sidelineOldMeta() throws IOException {
1844    // put current hbase:meta aside.
1845    Path hbaseDir = FSUtils.getRootDir(getConf());
1846    FileSystem fs = hbaseDir.getFileSystem(getConf());
1847    Path backupDir = getSidelineDir();
1848    fs.mkdirs(backupDir);
1849
1850    try {
1851      sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1852    } catch (IOException e) {
1853        LOG.error(HBaseMarkers.FATAL, "... failed to sideline meta. Currently in "
1854            + "inconsistent state.  To restore try to rename hbase:meta in " +
1855            backupDir.getName() + " to " + hbaseDir.getName() + ".", e);
1856      throw e; // throw original exception
1857    }
1858    return backupDir;
1859  }
1860
1861  /**
1862   * Load the list of disabled tables in ZK into local set.
1863   * @throws ZooKeeperConnectionException
1864   * @throws IOException
1865   */
1866  private void loadTableStates()
1867  throws IOException {
1868    tableStates = MetaTableAccessor.getTableStates(connection);
1869    // Add hbase:meta so this tool keeps working. In hbase2, meta is always enabled though it
1870    // has no entry in the table states. HBCK doesn't work right w/ hbase2 but just do this in
1871    // meantime.
1872    this.tableStates.put(TableName.META_TABLE_NAME,
1873        new TableState(TableName.META_TABLE_NAME, TableState.State.ENABLED));
1874  }
1875
1876  /**
1877   * Check if the specified region's table is disabled.
1878   * @param tableName table to check status of
1879   */
1880  private boolean isTableDisabled(TableName tableName) {
1881    return tableStates.containsKey(tableName)
1882        && tableStates.get(tableName)
1883        .inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1884  }
1885
1886  /**
1887   * Scan HDFS for all regions, recording their information into
1888   * regionInfoMap
1889   */
1890  public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1891    Path rootDir = FSUtils.getRootDir(getConf());
1892    FileSystem fs = rootDir.getFileSystem(getConf());
1893
1894    // list all tables from HDFS
1895    List<FileStatus> tableDirs = Lists.newArrayList();
1896
1897    boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1898
1899    List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1900    for (Path path : paths) {
1901      TableName tableName = FSUtils.getTableName(path);
1902       if ((!checkMetaOnly &&
1903           isTableIncluded(tableName)) ||
1904           tableName.equals(TableName.META_TABLE_NAME)) {
1905         tableDirs.add(fs.getFileStatus(path));
1906       }
1907    }
1908
1909    // verify that version file exists
1910    if (!foundVersionFile) {
1911      errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1912          "Version file does not exist in root dir " + rootDir);
1913      if (shouldFixVersionFile()) {
1914        LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1915            + " file.");
1916        setShouldRerun();
1917        FSUtils.setVersion(fs, rootDir, getConf().getInt(
1918            HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1919            HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1920            HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1921      }
1922    }
1923
1924    // Avoid multithreading at table-level because already multithreaded internally at
1925    // region-level.  Additionally multithreading at table-level can lead to deadlock
1926    // if there are many tables in the cluster.  Since there are a limited # of threads
1927    // in the executor's thread pool and if we multithread at the table-level by putting
1928    // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1929    // executor tied up solely in waiting for the tables' region-level calls to complete.
1930    // If there are enough tables then there will be no actual threads in the pool left
1931    // for the region-level callables to be serviced.
1932    for (FileStatus tableDir : tableDirs) {
1933      LOG.debug("Loading region dirs from " +tableDir.getPath());
1934      WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1935      try {
1936        item.call();
1937      } catch (ExecutionException e) {
1938        LOG.warn("Could not completely load table dir " +
1939            tableDir.getPath(), e.getCause());
1940      }
1941    }
1942    errors.print("");
1943  }
1944
1945  /**
1946   * Record the location of the hbase:meta region as found in ZooKeeper.
1947   */
1948  private boolean recordMetaRegion() throws IOException {
1949    List<HRegionLocation> locs;
1950    try (RegionLocator locator = connection.getRegionLocator(TableName.META_TABLE_NAME)) {
1951      locs = locator.getRegionLocations(HConstants.EMPTY_START_ROW, true);
1952    }
1953    if (locs == null || locs.isEmpty()) {
1954      errors.reportError(ERROR_CODE.NULL_META_REGION, "META region was not found in ZooKeeper");
1955      return false;
1956    }
1957    for (HRegionLocation metaLocation : locs) {
1958      // Check if Meta region is valid and existing
1959      if (metaLocation == null) {
1960        errors.reportError(ERROR_CODE.NULL_META_REGION, "META region location is null");
1961        return false;
1962      }
1963      if (metaLocation.getRegion() == null) {
1964        errors.reportError(ERROR_CODE.NULL_META_REGION, "META location regionInfo is null");
1965        return false;
1966      }
1967      if (metaLocation.getHostname() == null) {
1968        errors.reportError(ERROR_CODE.NULL_META_REGION, "META location hostName is null");
1969        return false;
1970      }
1971      ServerName sn = metaLocation.getServerName();
1972      MetaEntry m =
1973        new MetaEntry(metaLocation.getRegion(), sn, EnvironmentEdgeManager.currentTime());
1974      HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegion().getEncodedName());
1975      if (hbckInfo == null) {
1976        regionInfoMap.put(metaLocation.getRegion().getEncodedName(), new HbckInfo(m));
1977      } else {
1978        hbckInfo.metaEntry = m;
1979      }
1980    }
1981    return true;
1982  }
1983
1984  private ZKWatcher createZooKeeperWatcher() throws IOException {
1985    return new ZKWatcher(getConf(), "hbase Fsck", new Abortable() {
1986      @Override
1987      public void abort(String why, Throwable e) {
1988        LOG.error(why, e);
1989        System.exit(1);
1990      }
1991
1992      @Override
1993      public boolean isAborted() {
1994        return false;
1995      }
1996
1997    });
1998  }
1999
2000  /**
2001   * Contacts each regionserver and fetches metadata about regions.
2002   * @param regionServerList - the list of region servers to connect to
2003   * @throws IOException if a remote or network exception occurs
2004   */
2005  void processRegionServers(Collection<ServerName> regionServerList)
2006    throws IOException, InterruptedException {
2007
2008    List<WorkItemRegion> workItems = new ArrayList<>(regionServerList.size());
2009    List<Future<Void>> workFutures;
2010
2011    // loop to contact each region server in parallel
2012    for (ServerName rsinfo: regionServerList) {
2013      workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
2014    }
2015
2016    workFutures = executor.invokeAll(workItems);
2017
2018    for(int i=0; i<workFutures.size(); i++) {
2019      WorkItemRegion item = workItems.get(i);
2020      Future<Void> f = workFutures.get(i);
2021      try {
2022        f.get();
2023      } catch(ExecutionException e) {
2024        LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
2025            e.getCause());
2026      }
2027    }
2028  }
2029
2030  /**
2031   * Check consistency of all regions that have been found in previous phases.
2032   */
2033  private void checkAndFixConsistency()
2034  throws IOException, KeeperException, InterruptedException {
2035    // Divide the checks in two phases. One for default/primary replicas and another
2036    // for the non-primary ones. Keeps code cleaner this way.
2037
2038    List<CheckRegionConsistencyWorkItem> workItems = new ArrayList<>(regionInfoMap.size());
2039    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
2040      if (e.getValue().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2041        workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
2042      }
2043    }
2044    checkRegionConsistencyConcurrently(workItems);
2045
2046    boolean prevHdfsCheck = shouldCheckHdfs();
2047    setCheckHdfs(false); //replicas don't have any hdfs data
2048    // Run a pass over the replicas and fix any assignment issues that exist on the currently
2049    // deployed/undeployed replicas.
2050    List<CheckRegionConsistencyWorkItem> replicaWorkItems = new ArrayList<>(regionInfoMap.size());
2051    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
2052      if (e.getValue().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2053        replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
2054      }
2055    }
2056    checkRegionConsistencyConcurrently(replicaWorkItems);
2057    setCheckHdfs(prevHdfsCheck);
2058
2059    // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
2060    // not get accurate state of the hbase if continuing. The config here allows users to tune
2061    // the tolerance of number of skipped region.
2062    // TODO: evaluate the consequence to continue the hbck operation without config.
2063    int terminateThreshold =  getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
2064    int numOfSkippedRegions = skippedRegions.size();
2065    if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
2066      throw new IOException(numOfSkippedRegions
2067        + " region(s) could not be checked or repaired.  See logs for detail.");
2068    }
2069
2070    if (shouldCheckHdfs()) {
2071      checkAndFixTableStates();
2072    }
2073  }
2074
2075  /**
2076   * Check consistency of all regions using mulitple threads concurrently.
2077   */
2078  private void checkRegionConsistencyConcurrently(
2079    final List<CheckRegionConsistencyWorkItem> workItems)
2080    throws IOException, KeeperException, InterruptedException {
2081    if (workItems.isEmpty()) {
2082      return;  // nothing to check
2083    }
2084
2085    List<Future<Void>> workFutures = executor.invokeAll(workItems);
2086    for(Future<Void> f: workFutures) {
2087      try {
2088        f.get();
2089      } catch(ExecutionException e1) {
2090        LOG.warn("Could not check region consistency " , e1.getCause());
2091        if (e1.getCause() instanceof IOException) {
2092          throw (IOException)e1.getCause();
2093        } else if (e1.getCause() instanceof KeeperException) {
2094          throw (KeeperException)e1.getCause();
2095        } else if (e1.getCause() instanceof InterruptedException) {
2096          throw (InterruptedException)e1.getCause();
2097        } else {
2098          throw new IOException(e1.getCause());
2099        }
2100      }
2101    }
2102  }
2103
2104  class CheckRegionConsistencyWorkItem implements Callable<Void> {
2105    private final String key;
2106    private final HbckInfo hbi;
2107
2108    CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
2109      this.key = key;
2110      this.hbi = hbi;
2111    }
2112
2113    @Override
2114    public synchronized Void call() throws Exception {
2115      try {
2116        checkRegionConsistency(key, hbi);
2117      } catch (Exception e) {
2118        // If the region is non-META region, skip this region and send warning/error message; if
2119        // the region is META region, we should not continue.
2120        LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()
2121          + "'.", e);
2122        if (hbi.getHdfsHRI().isMetaRegion()) {
2123          throw e;
2124        }
2125        LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
2126        addSkippedRegion(hbi);
2127      }
2128      return null;
2129    }
2130  }
2131
2132  private void addSkippedRegion(final HbckInfo hbi) {
2133    Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
2134    if (skippedRegionNames == null) {
2135      skippedRegionNames = new HashSet<>();
2136    }
2137    skippedRegionNames.add(hbi.getRegionNameAsString());
2138    skippedRegions.put(hbi.getTableName(), skippedRegionNames);
2139  }
2140
2141  /**
2142   * Check and fix table states, assumes full info available:
2143   * - tableInfos
2144   * - empty tables loaded
2145   */
2146  private void checkAndFixTableStates() throws IOException {
2147    // first check dangling states
2148    for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
2149      TableName tableName = entry.getKey();
2150      TableState tableState = entry.getValue();
2151      TableInfo tableInfo = tablesInfo.get(tableName);
2152      if (isTableIncluded(tableName)
2153          && !tableName.isSystemTable()
2154          && tableInfo == null) {
2155        if (fixMeta) {
2156          MetaTableAccessor.deleteTableState(connection, tableName);
2157          TableState state = MetaTableAccessor.getTableState(connection, tableName);
2158          if (state != null) {
2159            errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
2160                tableName + " unable to delete dangling table state " + tableState);
2161          }
2162        } else if (!checkMetaOnly) {
2163          // dangling table state in meta if checkMetaOnly is false. If checkMetaOnly is
2164          // true, tableInfo will be null as tablesInfo are not polulated for all tables from hdfs
2165          errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
2166              tableName + " has dangling table state " + tableState);
2167        }
2168      }
2169    }
2170    // check that all tables have states
2171    for (TableName tableName : tablesInfo.keySet()) {
2172      if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
2173        if (fixMeta) {
2174          MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
2175          TableState newState = MetaTableAccessor.getTableState(connection, tableName);
2176          if (newState == null) {
2177            errors.reportError(ERROR_CODE.NO_TABLE_STATE,
2178                "Unable to change state for table " + tableName + " in meta ");
2179          }
2180        } else {
2181          errors.reportError(ERROR_CODE.NO_TABLE_STATE,
2182              tableName + " has no state in meta ");
2183        }
2184      }
2185    }
2186  }
2187
2188  private void preCheckPermission() throws IOException, AccessDeniedException {
2189    if (shouldIgnorePreCheckPermission()) {
2190      return;
2191    }
2192
2193    Path hbaseDir = FSUtils.getRootDir(getConf());
2194    FileSystem fs = hbaseDir.getFileSystem(getConf());
2195    UserProvider userProvider = UserProvider.instantiate(getConf());
2196    UserGroupInformation ugi = userProvider.getCurrent().getUGI();
2197    FileStatus[] files = fs.listStatus(hbaseDir);
2198    for (FileStatus file : files) {
2199      try {
2200        FSUtils.checkAccess(ugi, file, FsAction.WRITE);
2201      } catch (AccessDeniedException ace) {
2202        LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
2203        errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
2204          + " does not have write perms to " + file.getPath()
2205          + ". Please rerun hbck as hdfs user " + file.getOwner());
2206        throw ace;
2207      }
2208    }
2209  }
2210
2211  /**
2212   * Deletes region from meta table
2213   */
2214  private void deleteMetaRegion(HbckInfo hi) throws IOException {
2215    deleteMetaRegion(hi.metaEntry.getRegionName());
2216  }
2217
2218  /**
2219   * Deletes region from meta table
2220   */
2221  private void deleteMetaRegion(byte[] metaKey) throws IOException {
2222    Delete d = new Delete(metaKey);
2223    meta.delete(d);
2224    LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
2225  }
2226
2227  /**
2228   * Reset the split parent region info in meta table
2229   */
2230  private void resetSplitParent(HbckInfo hi) throws IOException {
2231    RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
2232    Delete d = new Delete(hi.metaEntry.getRegionName());
2233    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
2234    d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
2235    mutations.add(d);
2236
2237    RegionInfo hri = RegionInfoBuilder.newBuilder(hi.metaEntry)
2238        .setOffline(false)
2239        .setSplit(false)
2240        .build();
2241    Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime());
2242    mutations.add(p);
2243
2244    meta.mutateRow(mutations);
2245    LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
2246  }
2247
2248  /**
2249   * This backwards-compatibility wrapper for permanently offlining a region
2250   * that should not be alive.  If the region server does not support the
2251   * "offline" method, it will use the closest unassign method instead.  This
2252   * will basically work until one attempts to disable or delete the affected
2253   * table.  The problem has to do with in-memory only master state, so
2254   * restarting the HMaster or failing over to another should fix this.
2255   */
2256  private void offline(byte[] regionName) throws IOException {
2257    String regionString = Bytes.toStringBinary(regionName);
2258    if (!rsSupportsOffline) {
2259      LOG.warn("Using unassign region " + regionString
2260          + " instead of using offline method, you should"
2261          + " restart HMaster after these repairs");
2262      admin.unassign(regionName, true);
2263      return;
2264    }
2265
2266    // first time we assume the rs's supports #offline.
2267    try {
2268      LOG.info("Offlining region " + regionString);
2269      admin.offline(regionName);
2270    } catch (IOException ioe) {
2271      String notFoundMsg = "java.lang.NoSuchMethodException: " +
2272        "org.apache.hadoop.hbase.master.HMaster.offline([B)";
2273      if (ioe.getMessage().contains(notFoundMsg)) {
2274        LOG.warn("Using unassign region " + regionString
2275            + " instead of using offline method, you should"
2276            + " restart HMaster after these repairs");
2277        rsSupportsOffline = false; // in the future just use unassign
2278        admin.unassign(regionName, true);
2279        return;
2280      }
2281      throw ioe;
2282    }
2283  }
2284
2285  private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
2286    undeployRegionsForHbi(hi);
2287    // undeploy replicas of the region (but only if the method is invoked for the primary)
2288    if (hi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2289      return;
2290    }
2291    int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2292    for (int i = 1; i < numReplicas; i++) {
2293      if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2294      RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
2295          hi.getPrimaryHRIForDeployedReplica(), i);
2296      HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2297      if (h != null) {
2298        undeployRegionsForHbi(h);
2299        //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2300        //in consistency checks
2301        h.setSkipChecks(true);
2302      }
2303    }
2304  }
2305
2306  private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {
2307    for (OnlineEntry rse : hi.deployedEntries) {
2308      LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
2309      try {
2310        HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);
2311        offline(rse.hri.getRegionName());
2312      } catch (IOException ioe) {
2313        LOG.warn("Got exception when attempting to offline region "
2314            + Bytes.toString(rse.hri.getRegionName()), ioe);
2315      }
2316    }
2317  }
2318
2319  /**
2320   * Attempts to undeploy a region from a region server based in information in
2321   * META.  Any operations that modify the file system should make sure that
2322   * its corresponding region is not deployed to prevent data races.
2323   *
2324   * A separate call is required to update the master in-memory region state
2325   * kept in the AssignementManager.  Because disable uses this state instead of
2326   * that found in META, we can't seem to cleanly disable/delete tables that
2327   * have been hbck fixed.  When used on a version of HBase that does not have
2328   * the offline ipc call exposed on the master (&lt;0.90.5, &lt;0.92.0) a master
2329   * restart or failover may be required.
2330   */
2331  private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
2332    if (hi.metaEntry == null && hi.hdfsEntry == null) {
2333      undeployRegions(hi);
2334      return;
2335    }
2336
2337    // get assignment info and hregioninfo from meta.
2338    Get get = new Get(hi.getRegionName());
2339    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2340    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2341    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2342    // also get the locations of the replicas to close if the primary region is being closed
2343    if (hi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2344      int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2345      for (int i = 0; i < numReplicas; i++) {
2346        get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
2347        get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
2348      }
2349    }
2350    Result r = meta.get(get);
2351    RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2352    if (rl == null) {
2353      LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2354          " since meta does not have handle to reach it");
2355      return;
2356    }
2357    for (HRegionLocation h : rl.getRegionLocations()) {
2358      ServerName serverName = h.getServerName();
2359      if (serverName == null) {
2360        errors.reportError("Unable to close region "
2361            + hi.getRegionNameAsString() +  " because meta does not "
2362            + "have handle to reach it.");
2363        continue;
2364      }
2365      RegionInfo hri = h.getRegion();
2366      if (hri == null) {
2367        LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2368            + " because hbase:meta had invalid or missing "
2369            + HConstants.CATALOG_FAMILY_STR + ":"
2370            + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2371            + " qualifier value.");
2372        continue;
2373      }
2374      // close the region -- close files and remove assignment
2375      HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2376    }
2377  }
2378
2379  private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
2380    KeeperException, InterruptedException {
2381    // If we are trying to fix the errors
2382    if (shouldFixAssignments()) {
2383      errors.print(msg);
2384      undeployRegions(hbi);
2385      setShouldRerun();
2386      RegionInfo hri = hbi.getHdfsHRI();
2387      if (hri == null) {
2388        hri = hbi.metaEntry;
2389      }
2390      HBaseFsckRepair.fixUnassigned(admin, hri);
2391      HBaseFsckRepair.waitUntilAssigned(admin, hri);
2392
2393      // also assign replicas if needed (do it only when this call operates on a primary replica)
2394      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) return;
2395      int replicationCount = admin.getDescriptor(hri.getTable()).getRegionReplication();
2396      for (int i = 1; i < replicationCount; i++) {
2397        hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2398        HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2399        if (h != null) {
2400          undeployRegions(h);
2401          //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2402          //in consistency checks
2403          h.setSkipChecks(true);
2404        }
2405        HBaseFsckRepair.fixUnassigned(admin, hri);
2406        HBaseFsckRepair.waitUntilAssigned(admin, hri);
2407      }
2408
2409    }
2410  }
2411
2412  /**
2413   * Check a single region for consistency and correct deployment.
2414   */
2415  private void checkRegionConsistency(final String key, final HbckInfo hbi)
2416  throws IOException, KeeperException, InterruptedException {
2417
2418    if (hbi.isSkipChecks()) return;
2419    String descriptiveName = hbi.toString();
2420    boolean inMeta = hbi.metaEntry != null;
2421    // In case not checking HDFS, assume the region is on HDFS
2422    boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2423    boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
2424    boolean isDeployed = !hbi.deployedOn.isEmpty();
2425    boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
2426    boolean deploymentMatchesMeta =
2427      hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2428      hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
2429    boolean splitParent =
2430        inMeta && hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
2431    boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry.getTable());
2432    boolean recentlyModified = inHdfs &&
2433      hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2434
2435    // ========== First the healthy cases =============
2436    if (hbi.containsOnlyHdfsEdits()) {
2437      return;
2438    }
2439    if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2440      return;
2441    } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2442      LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2443        "tabled that is not deployed");
2444      return;
2445    } else if (recentlyModified) {
2446      LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2447      return;
2448    }
2449    // ========== Cases where the region is not in hbase:meta =============
2450    else if (!inMeta && !inHdfs && !isDeployed) {
2451      // We shouldn't have record of this region at all then!
2452      assert false : "Entry for region with no data";
2453    } else if (!inMeta && !inHdfs && isDeployed) {
2454      errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2455          + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2456          "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2457      if (shouldFixAssignments()) {
2458        undeployRegions(hbi);
2459      }
2460
2461    } else if (!inMeta && inHdfs && !isDeployed) {
2462      if (hbi.isMerged()) {
2463        // This region has already been merged, the remaining hdfs file will be
2464        // cleaned by CatalogJanitor later
2465        hbi.setSkipChecks(true);
2466        LOG.info("Region " + descriptiveName
2467            + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2468        return;
2469      }
2470      errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2471          + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2472          "or deployed on any region server");
2473      // restore region consistency of an adopted orphan
2474      if (shouldFixMeta()) {
2475        if (!hbi.isHdfsRegioninfoPresent()) {
2476          LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2477              +  " in table integrity repair phase if -fixHdfsOrphans was" +
2478              " used.");
2479          return;
2480        }
2481
2482        RegionInfo hri = hbi.getHdfsHRI();
2483        TableInfo tableInfo = tablesInfo.get(hri.getTable());
2484
2485        for (RegionInfo region : tableInfo.getRegionsFromMeta()) {
2486          if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2487              && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2488                hri.getEndKey()) >= 0)
2489              && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2490            if(region.isSplit() || region.isOffline()) continue;
2491            Path regionDir = hbi.getHdfsRegionDir();
2492            FileSystem fs = regionDir.getFileSystem(getConf());
2493            List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2494            for (Path familyDir : familyDirs) {
2495              List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2496              for (Path referenceFilePath : referenceFilePaths) {
2497                Path parentRegionDir =
2498                    StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2499                if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2500                  LOG.warn(hri + " start and stop keys are in the range of " + region
2501                      + ". The region might not be cleaned up from hdfs when region " + region
2502                      + " split failed. Hence deleting from hdfs.");
2503                  HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2504                    regionDir.getParent(), hri);
2505                  return;
2506                }
2507              }
2508            }
2509          }
2510        }
2511        LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2512        int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
2513        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2514            admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
2515              .getLiveServerMetrics().keySet(), numReplicas);
2516
2517        tryAssignmentRepair(hbi, "Trying to reassign region...");
2518      }
2519
2520    } else if (!inMeta && inHdfs && isDeployed) {
2521      errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2522          + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2523      debugLsr(hbi.getHdfsRegionDir());
2524      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2525        // for replicas, this means that we should undeploy the region (we would have
2526        // gone over the primaries and fixed meta holes in first phase under
2527        // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2528        // this stage unless unwanted replica)
2529        if (shouldFixAssignments()) {
2530          undeployRegionsForHbi(hbi);
2531        }
2532      }
2533      if (shouldFixMeta() && hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2534        if (!hbi.isHdfsRegioninfoPresent()) {
2535          LOG.error("This should have been repaired in table integrity repair phase");
2536          return;
2537        }
2538
2539        LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2540        int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
2541        HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2542            admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
2543              .getLiveServerMetrics().keySet(), numReplicas);
2544        tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2545      }
2546
2547    // ========== Cases where the region is in hbase:meta =============
2548    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2549      // check whether this is an actual error, or just transient state where parent
2550      // is not cleaned
2551      if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2552        // check that split daughters are there
2553        HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2554        HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2555        if (infoA != null && infoB != null) {
2556          // we already processed or will process daughters. Move on, nothing to see here.
2557          hbi.setSkipChecks(true);
2558          return;
2559        }
2560      }
2561
2562      // For Replica region, we need to do a similar check. If replica is not split successfully,
2563      // error is going to be reported against primary daughter region.
2564      if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2565        LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, "
2566            + "and not deployed on any region server. This may be transient.");
2567        hbi.setSkipChecks(true);
2568        return;
2569      }
2570
2571      errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2572          + descriptiveName + " is a split parent in META, in HDFS, "
2573          + "and not deployed on any region server. This could be transient, "
2574          + "consider to run the catalog janitor first!");
2575      if (shouldFixSplitParents()) {
2576        setShouldRerun();
2577        resetSplitParent(hbi);
2578      }
2579    } else if (inMeta && !inHdfs && !isDeployed) {
2580      errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2581          + descriptiveName + " found in META, but not in HDFS "
2582          + "or deployed on any region server.");
2583      if (shouldFixMeta()) {
2584        deleteMetaRegion(hbi);
2585      }
2586    } else if (inMeta && !inHdfs && isDeployed) {
2587      errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2588          + " found in META, but not in HDFS, " +
2589          "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2590      // We treat HDFS as ground truth.  Any information in meta is transient
2591      // and equivalent data can be regenerated.  So, lets unassign and remove
2592      // these problems from META.
2593      if (shouldFixAssignments()) {
2594        errors.print("Trying to fix unassigned region...");
2595        undeployRegions(hbi);
2596      }
2597      if (shouldFixMeta()) {
2598        // wait for it to complete
2599        deleteMetaRegion(hbi);
2600      }
2601    } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2602      errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2603          + " not deployed on any region server.");
2604      tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2605    } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2606      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2607          "Region " + descriptiveName + " should not be deployed according " +
2608          "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2609      if (shouldFixAssignments()) {
2610        errors.print("Trying to close the region " + descriptiveName);
2611        setShouldRerun();
2612        HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2613      }
2614    } else if (inMeta && inHdfs && isMultiplyDeployed) {
2615      errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2616          + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2617          + " but is multiply assigned to region servers " +
2618          Joiner.on(", ").join(hbi.deployedOn));
2619      // If we are trying to fix the errors
2620      if (shouldFixAssignments()) {
2621        errors.print("Trying to fix assignment error...");
2622        setShouldRerun();
2623        HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2624      }
2625    } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2626      errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2627          + descriptiveName + " listed in hbase:meta on region server " +
2628          hbi.metaEntry.regionServer + " but found on region server " +
2629          hbi.deployedOn.get(0));
2630      // If we are trying to fix the errors
2631      if (shouldFixAssignments()) {
2632        errors.print("Trying to fix assignment error...");
2633        setShouldRerun();
2634        HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2635        HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2636      }
2637    } else {
2638      errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2639          " is in an unforeseen state:" +
2640          " inMeta=" + inMeta +
2641          " inHdfs=" + inHdfs +
2642          " isDeployed=" + isDeployed +
2643          " isMultiplyDeployed=" + isMultiplyDeployed +
2644          " deploymentMatchesMeta=" + deploymentMatchesMeta +
2645          " shouldBeDeployed=" + shouldBeDeployed);
2646    }
2647  }
2648
2649  /**
2650   * Checks tables integrity. Goes over all regions and scans the tables.
2651   * Collects all the pieces for each table and checks if there are missing,
2652   * repeated or overlapping ones.
2653   * @throws IOException
2654   */
2655  SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2656    tablesInfo = new TreeMap<>();
2657    LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2658    for (HbckInfo hbi : regionInfoMap.values()) {
2659      // Check only valid, working regions
2660      if (hbi.metaEntry == null) {
2661        // this assumes that consistency check has run loadMetaEntry
2662        Path p = hbi.getHdfsRegionDir();
2663        if (p == null) {
2664          errors.report("No regioninfo in Meta or HDFS. " + hbi);
2665        }
2666
2667        // TODO test.
2668        continue;
2669      }
2670      if (hbi.metaEntry.regionServer == null) {
2671        errors.detail("Skipping region because no region server: " + hbi);
2672        continue;
2673      }
2674      if (hbi.metaEntry.isOffline()) {
2675        errors.detail("Skipping region because it is offline: " + hbi);
2676        continue;
2677      }
2678      if (hbi.containsOnlyHdfsEdits()) {
2679        errors.detail("Skipping region because it only contains edits" + hbi);
2680        continue;
2681      }
2682
2683      // Missing regionDir or over-deployment is checked elsewhere. Include
2684      // these cases in modTInfo, so we can evaluate those regions as part of
2685      // the region chain in META
2686      //if (hbi.foundRegionDir == null) continue;
2687      //if (hbi.deployedOn.size() != 1) continue;
2688      if (hbi.deployedOn.isEmpty()) continue;
2689
2690      // We should be safe here
2691      TableName tableName = hbi.metaEntry.getTable();
2692      TableInfo modTInfo = tablesInfo.get(tableName);
2693      if (modTInfo == null) {
2694        modTInfo = new TableInfo(tableName);
2695      }
2696      for (ServerName server : hbi.deployedOn) {
2697        modTInfo.addServer(server);
2698      }
2699
2700      if (!hbi.isSkipChecks()) {
2701        modTInfo.addRegionInfo(hbi);
2702      }
2703
2704      tablesInfo.put(tableName, modTInfo);
2705    }
2706
2707    loadTableInfosForTablesWithNoRegion();
2708
2709    logParallelMerge();
2710    for (TableInfo tInfo : tablesInfo.values()) {
2711      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2712      if (!tInfo.checkRegionChain(handler)) {
2713        errors.report("Found inconsistency in table " + tInfo.getName());
2714      }
2715    }
2716    return tablesInfo;
2717  }
2718
2719  /** Loads table info's for tables that may not have been included, since there are no
2720   * regions reported for the table, but table dir is there in hdfs
2721   */
2722  private void loadTableInfosForTablesWithNoRegion() throws IOException {
2723    Map<String, TableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2724    for (TableDescriptor htd : allTables.values()) {
2725      if (checkMetaOnly && !htd.isMetaTable()) {
2726        continue;
2727      }
2728
2729      TableName tableName = htd.getTableName();
2730      if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2731        TableInfo tableInfo = new TableInfo(tableName);
2732        tableInfo.htds.add(htd);
2733        tablesInfo.put(htd.getTableName(), tableInfo);
2734      }
2735    }
2736  }
2737
2738  /**
2739   * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2740   * @return number of file move fixes done to merge regions.
2741   */
2742  public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2743    int fileMoves = 0;
2744    String thread = Thread.currentThread().getName();
2745    LOG.debug("[" + thread + "] Contained region dir after close and pause");
2746    debugLsr(contained.getHdfsRegionDir());
2747
2748    // rename the contained into the container.
2749    FileSystem fs = targetRegionDir.getFileSystem(getConf());
2750    FileStatus[] dirs = null;
2751    try {
2752      dirs = fs.listStatus(contained.getHdfsRegionDir());
2753    } catch (FileNotFoundException fnfe) {
2754      // region we are attempting to merge in is not present!  Since this is a merge, there is
2755      // no harm skipping this region if it does not exist.
2756      if (!fs.exists(contained.getHdfsRegionDir())) {
2757        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2758            + " is missing. Assuming already sidelined or moved.");
2759      } else {
2760        sidelineRegionDir(fs, contained);
2761      }
2762      return fileMoves;
2763    }
2764
2765    if (dirs == null) {
2766      if (!fs.exists(contained.getHdfsRegionDir())) {
2767        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2768            + " already sidelined.");
2769      } else {
2770        sidelineRegionDir(fs, contained);
2771      }
2772      return fileMoves;
2773    }
2774
2775    for (FileStatus cf : dirs) {
2776      Path src = cf.getPath();
2777      Path dst =  new Path(targetRegionDir, src.getName());
2778
2779      if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2780        // do not copy the old .regioninfo file.
2781        continue;
2782      }
2783
2784      if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2785        // do not copy the .oldlogs files
2786        continue;
2787      }
2788
2789      LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2790      // FileSystem.rename is inconsistent with directories -- if the
2791      // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2792      // it moves the src into the dst dir resulting in (foo/a/b).  If
2793      // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2794      for (FileStatus hfile : fs.listStatus(src)) {
2795        boolean success = fs.rename(hfile.getPath(), dst);
2796        if (success) {
2797          fileMoves++;
2798        }
2799      }
2800      LOG.debug("[" + thread + "] Sideline directory contents:");
2801      debugLsr(targetRegionDir);
2802    }
2803
2804    // if all success.
2805    sidelineRegionDir(fs, contained);
2806    LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2807        getSidelineDir());
2808    debugLsr(contained.getHdfsRegionDir());
2809
2810    return fileMoves;
2811  }
2812
2813
2814  static class WorkItemOverlapMerge implements Callable<Void> {
2815    private TableIntegrityErrorHandler handler;
2816    Collection<HbckInfo> overlapgroup;
2817
2818    WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2819      this.handler = handler;
2820      this.overlapgroup = overlapgroup;
2821    }
2822
2823    @Override
2824    public Void call() throws Exception {
2825      handler.handleOverlapGroup(overlapgroup);
2826      return null;
2827    }
2828  }
2829
2830  /**
2831   * Maintain information about a particular table.
2832   */
2833  public class TableInfo {
2834    TableName tableName;
2835    TreeSet <ServerName> deployedOn;
2836
2837    // backwards regions
2838    final List<HbckInfo> backwards = new ArrayList<>();
2839
2840    // sidelined big overlapped regions
2841    final Map<Path, HbckInfo> sidelinedRegions = new HashMap<>();
2842
2843    // region split calculator
2844    final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<>(cmp);
2845
2846    // Histogram of different TableDescriptors found.  Ideally there is only one!
2847    final Set<TableDescriptor> htds = new HashSet<>();
2848
2849    // key = start split, values = set of splits in problem group
2850    final Multimap<byte[], HbckInfo> overlapGroups =
2851      TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2852
2853    // list of regions derived from meta entries.
2854    private ImmutableList<RegionInfo> regionsFromMeta = null;
2855
2856    TableInfo(TableName name) {
2857      this.tableName = name;
2858      deployedOn = new TreeSet <>();
2859    }
2860
2861    /**
2862     * @return descriptor common to all regions.  null if are none or multiple!
2863     */
2864    private TableDescriptor getHTD() {
2865      if (htds.size() == 1) {
2866        return (TableDescriptor)htds.toArray()[0];
2867      } else {
2868        LOG.error("None/Multiple table descriptors found for table '"
2869          + tableName + "' regions: " + htds);
2870      }
2871      return null;
2872    }
2873
2874    public void addRegionInfo(HbckInfo hir) {
2875      if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2876        // end key is absolute end key, just add it.
2877        // ignore replicas other than primary for these checks
2878        if (hir.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2879        return;
2880      }
2881
2882      // if not the absolute end key, check for cycle
2883      if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2884        errors.reportError(
2885            ERROR_CODE.REGION_CYCLE,
2886            String.format("The endkey for this region comes before the "
2887                + "startkey, startkey=%s, endkey=%s",
2888                Bytes.toStringBinary(hir.getStartKey()),
2889                Bytes.toStringBinary(hir.getEndKey())), this, hir);
2890        backwards.add(hir);
2891        return;
2892      }
2893
2894      // main case, add to split calculator
2895      // ignore replicas other than primary for these checks
2896      if (hir.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2897    }
2898
2899    public void addServer(ServerName server) {
2900      this.deployedOn.add(server);
2901    }
2902
2903    public TableName getName() {
2904      return tableName;
2905    }
2906
2907    public int getNumRegions() {
2908      return sc.getStarts().size() + backwards.size();
2909    }
2910
2911    public synchronized ImmutableList<RegionInfo> getRegionsFromMeta() {
2912      // lazy loaded, synchronized to ensure a single load
2913      if (regionsFromMeta == null) {
2914        List<RegionInfo> regions = new ArrayList<>();
2915        for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2916          if (tableName.equals(h.getTableName())) {
2917            if (h.metaEntry != null) {
2918              regions.add(h.metaEntry);
2919            }
2920          }
2921        }
2922        regionsFromMeta = Ordering.from(RegionInfo.COMPARATOR).immutableSortedCopy(regions);
2923      }
2924
2925      return regionsFromMeta;
2926    }
2927
2928    private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2929      ErrorReporter errors;
2930
2931      IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2932        this.errors = errors;
2933        setTableInfo(ti);
2934      }
2935
2936      @Override
2937      public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2938        errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2939            "First region should start with an empty key.  You need to "
2940            + " create a new region and regioninfo in HDFS to plug the hole.",
2941            getTableInfo(), hi);
2942      }
2943
2944      @Override
2945      public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2946        errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2947            "Last region should end with an empty key. You need to "
2948                + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2949      }
2950
2951      @Override
2952      public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2953        errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2954            "Region has the same start and end key.", getTableInfo(), hi);
2955      }
2956
2957      @Override
2958      public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2959        byte[] key = r1.getStartKey();
2960        // dup start key
2961        errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2962            "Multiple regions have the same startkey: "
2963            + Bytes.toStringBinary(key), getTableInfo(), r1);
2964        errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2965            "Multiple regions have the same startkey: "
2966            + Bytes.toStringBinary(key), getTableInfo(), r2);
2967      }
2968
2969      @Override
2970      public void handleSplit(HbckInfo r1, HbckInfo r2) throws IOException{
2971        byte[] key = r1.getStartKey();
2972        // dup start key
2973        errors.reportError(ERROR_CODE.DUPE_ENDKEYS,
2974          "Multiple regions have the same regionID: "
2975            + Bytes.toStringBinary(key), getTableInfo(), r1);
2976        errors.reportError(ERROR_CODE.DUPE_ENDKEYS,
2977          "Multiple regions have the same regionID: "
2978            + Bytes.toStringBinary(key), getTableInfo(), r2);
2979      }
2980
2981      @Override
2982      public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2983        errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2984            "There is an overlap in the region chain.",
2985            getTableInfo(), hi1, hi2);
2986      }
2987
2988      @Override
2989      public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2990        errors.reportError(
2991            ERROR_CODE.HOLE_IN_REGION_CHAIN,
2992            "There is a hole in the region chain between "
2993                + Bytes.toStringBinary(holeStart) + " and "
2994                + Bytes.toStringBinary(holeStop)
2995                + ".  You need to create a new .regioninfo and region "
2996                + "dir in hdfs to plug the hole.");
2997      }
2998    }
2999
3000    /**
3001     * This handler fixes integrity errors from hdfs information.  There are
3002     * basically three classes of integrity problems 1) holes, 2) overlaps, and
3003     * 3) invalid regions.
3004     *
3005     * This class overrides methods that fix holes and the overlap group case.
3006     * Individual cases of particular overlaps are handled by the general
3007     * overlap group merge repair case.
3008     *
3009     * If hbase is online, this forces regions offline before doing merge
3010     * operations.
3011     */
3012    private class HDFSIntegrityFixer extends IntegrityFixSuggester {
3013      Configuration conf;
3014
3015      boolean fixOverlaps = true;
3016
3017      HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
3018          boolean fixHoles, boolean fixOverlaps) {
3019        super(ti, errors);
3020        this.conf = conf;
3021        this.fixOverlaps = fixOverlaps;
3022        // TODO properly use fixHoles
3023      }
3024
3025      /**
3026       * This is a special case hole -- when the first region of a table is
3027       * missing from META, HBase doesn't acknowledge the existance of the
3028       * table.
3029       */
3030      @Override
3031      public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
3032        errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
3033            "First region should start with an empty key.  Creating a new " +
3034            "region and regioninfo in HDFS to plug the hole.",
3035            getTableInfo(), next);
3036        TableDescriptor htd = getTableInfo().getHTD();
3037        // from special EMPTY_START_ROW to next region's startKey
3038        RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
3039            .setStartKey(HConstants.EMPTY_START_ROW)
3040            .setEndKey(next.getStartKey())
3041            .build();
3042
3043        // TODO test
3044        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
3045        LOG.info("Table region start key was not empty.  Created new empty region: "
3046            + newRegion + " " +region);
3047        fixes++;
3048      }
3049
3050      @Override
3051      public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
3052        errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
3053            "Last region should end with an empty key.  Creating a new "
3054                + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
3055        TableDescriptor htd = getTableInfo().getHTD();
3056        // from curEndKey to EMPTY_START_ROW
3057        RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
3058            .setStartKey(curEndKey)
3059            .setEndKey(HConstants.EMPTY_START_ROW)
3060            .build();
3061
3062        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
3063        LOG.info("Table region end key was not empty.  Created new empty region: " + newRegion
3064            + " " + region);
3065        fixes++;
3066      }
3067
3068      /**
3069       * There is a hole in the hdfs regions that violates the table integrity
3070       * rules.  Create a new empty region that patches the hole.
3071       */
3072      @Override
3073      public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
3074        errors.reportError(
3075            ERROR_CODE.HOLE_IN_REGION_CHAIN,
3076            "There is a hole in the region chain between "
3077                + Bytes.toStringBinary(holeStartKey) + " and "
3078                + Bytes.toStringBinary(holeStopKey)
3079                + ".  Creating a new regioninfo and region "
3080                + "dir in hdfs to plug the hole.");
3081        TableDescriptor htd = getTableInfo().getHTD();
3082        RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
3083            .setStartKey(holeStartKey)
3084            .setEndKey(holeStopKey)
3085            .build();
3086        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
3087        LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
3088        fixes++;
3089      }
3090
3091      /**
3092       * This takes set of overlapping regions and merges them into a single
3093       * region.  This covers cases like degenerate regions, shared start key,
3094       * general overlaps, duplicate ranges, and partial overlapping regions.
3095       *
3096       * Cases:
3097       * - Clean regions that overlap
3098       * - Only .oldlogs regions (can't find start/stop range, or figure out)
3099       *
3100       * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
3101       */
3102      @Override
3103      public void handleOverlapGroup(Collection<HbckInfo> overlap)
3104          throws IOException {
3105        Preconditions.checkNotNull(overlap);
3106        Preconditions.checkArgument(overlap.size() >0);
3107
3108        if (!this.fixOverlaps) {
3109          LOG.warn("Not attempting to repair overlaps.");
3110          return;
3111        }
3112
3113        if (overlap.size() > maxMerge) {
3114          LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
3115            "regions which is greater than " + maxMerge + ", the max number of regions to merge");
3116          if (sidelineBigOverlaps) {
3117            // we only sideline big overlapped groups that exceeds the max number of regions to merge
3118            sidelineBigOverlaps(overlap);
3119          }
3120          return;
3121        }
3122        if (shouldRemoveParents()) {
3123          removeParentsAndFixSplits(overlap);
3124        }
3125        mergeOverlaps(overlap);
3126      }
3127
3128      void removeParentsAndFixSplits(Collection<HbckInfo> overlap) throws IOException {
3129        Pair<byte[], byte[]> range = null;
3130        HbckInfo parent = null;
3131        HbckInfo daughterA = null;
3132        HbckInfo daughterB = null;
3133        Collection<HbckInfo> daughters = new ArrayList<HbckInfo>(overlap);
3134
3135        String thread = Thread.currentThread().getName();
3136        LOG.info("== [" + thread + "] Attempting fix splits in overlap state.");
3137
3138        // we only can handle a single split per group at the time
3139        if (overlap.size() > 3) {
3140          LOG.info("Too many overlaps were found on this group, falling back to regular merge.");
3141          return;
3142        }
3143
3144        for (HbckInfo hi : overlap) {
3145          if (range == null) {
3146            range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
3147          } else {
3148            if (RegionSplitCalculator.BYTES_COMPARATOR
3149              .compare(hi.getStartKey(), range.getFirst()) < 0) {
3150              range.setFirst(hi.getStartKey());
3151            }
3152            if (RegionSplitCalculator.BYTES_COMPARATOR
3153              .compare(hi.getEndKey(), range.getSecond()) > 0) {
3154              range.setSecond(hi.getEndKey());
3155            }
3156          }
3157        }
3158
3159        LOG.info("This group range is [" + Bytes.toStringBinary(range.getFirst()) + ", "
3160          + Bytes.toStringBinary(range.getSecond()) + "]");
3161
3162        // attempt to find a possible parent for the edge case of a split
3163        for (HbckInfo hi : overlap) {
3164          if (Bytes.compareTo(hi.getHdfsHRI().getStartKey(), range.getFirst()) == 0
3165            && Bytes.compareTo(hi.getHdfsHRI().getEndKey(), range.getSecond()) == 0) {
3166            LOG.info("This is a parent for this group: " + hi.toString());
3167            parent = hi;
3168          }
3169        }
3170
3171        // Remove parent regions from daughters collection
3172        if (parent != null) {
3173          daughters.remove(parent);
3174        }
3175
3176        // Lets verify that daughters share the regionID at split time and they
3177        // were created after the parent
3178        for (HbckInfo hi : daughters) {
3179          if (Bytes.compareTo(hi.getHdfsHRI().getStartKey(), range.getFirst()) == 0) {
3180            if (parent.getHdfsHRI().getRegionId() < hi.getHdfsHRI().getRegionId()) {
3181              daughterA = hi;
3182            }
3183          }
3184          if (Bytes.compareTo(hi.getHdfsHRI().getEndKey(), range.getSecond()) == 0) {
3185            if (parent.getHdfsHRI().getRegionId() < hi.getHdfsHRI().getRegionId()) {
3186              daughterB = hi;
3187            }
3188          }
3189        }
3190
3191        // daughters must share the same regionID and we should have a parent too
3192        if (daughterA.getHdfsHRI().getRegionId() != daughterB.getHdfsHRI().getRegionId() || parent == null)
3193          return;
3194
3195        FileSystem fs = FileSystem.get(conf);
3196        LOG.info("Found parent: " + parent.getRegionNameAsString());
3197        LOG.info("Found potential daughter a: " + daughterA.getRegionNameAsString());
3198        LOG.info("Found potential daughter b: " + daughterB.getRegionNameAsString());
3199        LOG.info("Trying to fix parent in overlap by removing the parent.");
3200        try {
3201          closeRegion(parent);
3202        } catch (IOException ioe) {
3203          LOG.warn("Parent region could not be closed, continuing with regular merge...", ioe);
3204          return;
3205        } catch (InterruptedException ie) {
3206          LOG.warn("Parent region could not be closed, continuing with regular merge...", ie);
3207          return;
3208        }
3209
3210        try {
3211          offline(parent.getRegionName());
3212        } catch (IOException ioe) {
3213          LOG.warn("Unable to offline parent region: " + parent.getRegionNameAsString()
3214            + ".  Just continuing with regular merge... ", ioe);
3215          return;
3216        }
3217
3218        try {
3219          HBaseFsckRepair.removeParentInMeta(conf, parent.getHdfsHRI());
3220        } catch (IOException ioe) {
3221          LOG.warn("Unable to remove parent region in META: " + parent.getRegionNameAsString()
3222            + ".  Just continuing with regular merge... ", ioe);
3223          return;
3224        }
3225
3226        sidelineRegionDir(fs, parent);
3227        LOG.info("[" + thread + "] Sidelined parent region dir "+ parent.getHdfsRegionDir() + " into " +
3228          getSidelineDir());
3229        debugLsr(parent.getHdfsRegionDir());
3230
3231        // Make sure we don't have the parents and daughters around
3232        overlap.remove(parent);
3233        overlap.remove(daughterA);
3234        overlap.remove(daughterB);
3235
3236        LOG.info("Done fixing split.");
3237
3238      }
3239
3240      void mergeOverlaps(Collection<HbckInfo> overlap)
3241          throws IOException {
3242        String thread = Thread.currentThread().getName();
3243        LOG.info("== [" + thread + "] Merging regions into one region: "
3244          + Joiner.on(",").join(overlap));
3245        // get the min / max range and close all concerned regions
3246        Pair<byte[], byte[]> range = null;
3247        for (HbckInfo hi : overlap) {
3248          if (range == null) {
3249            range = new Pair<>(hi.getStartKey(), hi.getEndKey());
3250          } else {
3251            if (RegionSplitCalculator.BYTES_COMPARATOR
3252                .compare(hi.getStartKey(), range.getFirst()) < 0) {
3253              range.setFirst(hi.getStartKey());
3254            }
3255            if (RegionSplitCalculator.BYTES_COMPARATOR
3256                .compare(hi.getEndKey(), range.getSecond()) > 0) {
3257              range.setSecond(hi.getEndKey());
3258            }
3259          }
3260          // need to close files so delete can happen.
3261          LOG.debug("[" + thread + "] Closing region before moving data around: " +  hi);
3262          LOG.debug("[" + thread + "] Contained region dir before close");
3263          debugLsr(hi.getHdfsRegionDir());
3264          try {
3265            LOG.info("[" + thread + "] Closing region: " + hi);
3266            closeRegion(hi);
3267          } catch (IOException ioe) {
3268            LOG.warn("[" + thread + "] Was unable to close region " + hi
3269              + ".  Just continuing... ", ioe);
3270          } catch (InterruptedException e) {
3271            LOG.warn("[" + thread + "] Was unable to close region " + hi
3272              + ".  Just continuing... ", e);
3273          }
3274
3275          try {
3276            LOG.info("[" + thread + "] Offlining region: " + hi);
3277            offline(hi.getRegionName());
3278          } catch (IOException ioe) {
3279            LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
3280              + ".  Just continuing... ", ioe);
3281          }
3282        }
3283
3284        // create new empty container region.
3285        TableDescriptor htd = getTableInfo().getHTD();
3286        // from start key to end Key
3287        RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
3288            .setStartKey(range.getFirst())
3289            .setEndKey(range.getSecond())
3290            .build();
3291        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
3292        LOG.info("[" + thread + "] Created new empty container region: " +
3293            newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
3294        debugLsr(region.getRegionFileSystem().getRegionDir());
3295
3296        // all target regions are closed, should be able to safely cleanup.
3297        boolean didFix= false;
3298        Path target = region.getRegionFileSystem().getRegionDir();
3299        for (HbckInfo contained : overlap) {
3300          LOG.info("[" + thread + "] Merging " + contained  + " into " + target );
3301          int merges = mergeRegionDirs(target, contained);
3302          if (merges > 0) {
3303            didFix = true;
3304          }
3305        }
3306        if (didFix) {
3307          fixes++;
3308        }
3309      }
3310
3311      /**
3312       * Sideline some regions in a big overlap group so that it
3313       * will have fewer regions, and it is easier to merge them later on.
3314       *
3315       * @param bigOverlap the overlapped group with regions more than maxMerge
3316       * @throws IOException
3317       */
3318      void sidelineBigOverlaps(
3319          Collection<HbckInfo> bigOverlap) throws IOException {
3320        int overlapsToSideline = bigOverlap.size() - maxMerge;
3321        if (overlapsToSideline > maxOverlapsToSideline) {
3322          overlapsToSideline = maxOverlapsToSideline;
3323        }
3324        List<HbckInfo> regionsToSideline =
3325          RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
3326        FileSystem fs = FileSystem.get(conf);
3327        for (HbckInfo regionToSideline: regionsToSideline) {
3328          try {
3329            LOG.info("Closing region: " + regionToSideline);
3330            closeRegion(regionToSideline);
3331          } catch (IOException ioe) {
3332            LOG.warn("Was unable to close region " + regionToSideline
3333              + ".  Just continuing... ", ioe);
3334          } catch (InterruptedException e) {
3335            LOG.warn("Was unable to close region " + regionToSideline
3336              + ".  Just continuing... ", e);
3337          }
3338
3339          try {
3340            LOG.info("Offlining region: " + regionToSideline);
3341            offline(regionToSideline.getRegionName());
3342          } catch (IOException ioe) {
3343            LOG.warn("Unable to offline region from master: " + regionToSideline
3344              + ".  Just continuing... ", ioe);
3345          }
3346
3347          LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
3348          Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
3349          if (sidelineRegionDir != null) {
3350            sidelinedRegions.put(sidelineRegionDir, regionToSideline);
3351            LOG.info("After sidelined big overlapped region: "
3352              + regionToSideline.getRegionNameAsString()
3353              + " to " + sidelineRegionDir.toString());
3354            fixes++;
3355          }
3356        }
3357      }
3358    }
3359
3360    /**
3361     * Check the region chain (from META) of this table.  We are looking for
3362     * holes, overlaps, and cycles.
3363     * @return false if there are errors
3364     * @throws IOException
3365     */
3366    public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
3367      // When table is disabled no need to check for the region chain. Some of the regions
3368      // accidently if deployed, this below code might report some issues like missing start
3369      // or end regions or region hole in chain and may try to fix which is unwanted.
3370      if (isTableDisabled(this.tableName)) {
3371        return true;
3372      }
3373      int originalErrorsCount = errors.getErrorList().size();
3374      Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
3375      SortedSet<byte[]> splits = sc.getSplits();
3376
3377      byte[] prevKey = null;
3378      byte[] problemKey = null;
3379
3380      if (splits.isEmpty()) {
3381        // no region for this table
3382        handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
3383      }
3384
3385      for (byte[] key : splits) {
3386        Collection<HbckInfo> ranges = regions.get(key);
3387        if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
3388          for (HbckInfo rng : ranges) {
3389            handler.handleRegionStartKeyNotEmpty(rng);
3390          }
3391        }
3392
3393        // check for degenerate ranges
3394        for (HbckInfo rng : ranges) {
3395          // special endkey case converts '' to null
3396          byte[] endKey = rng.getEndKey();
3397          endKey = (endKey.length == 0) ? null : endKey;
3398          if (Bytes.equals(rng.getStartKey(),endKey)) {
3399            handler.handleDegenerateRegion(rng);
3400          }
3401        }
3402
3403        if (ranges.size() == 1) {
3404          // this split key is ok -- no overlap, not a hole.
3405          if (problemKey != null) {
3406            LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3407          }
3408          problemKey = null; // fell through, no more problem.
3409        } else if (ranges.size() > 1) {
3410          // set the new problem key group name, if already have problem key, just
3411          // keep using it.
3412          if (problemKey == null) {
3413            // only for overlap regions.
3414            LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
3415            problemKey = key;
3416          }
3417          overlapGroups.putAll(problemKey, ranges);
3418
3419          // record errors
3420          ArrayList<HbckInfo> subRange = new ArrayList<>(ranges);
3421          //  this dumb and n^2 but this shouldn't happen often
3422          for (HbckInfo r1 : ranges) {
3423            if (r1.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) continue;
3424            subRange.remove(r1);
3425            for (HbckInfo r2 : subRange) {
3426              if (r2.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) continue;
3427              // general case of same start key
3428              if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
3429                handler.handleDuplicateStartKeys(r1,r2);
3430              } else if (Bytes.compareTo(r1.getEndKey(), r2.getStartKey())==0 &&
3431                r1.getHdfsHRI().getRegionId() == r2.getHdfsHRI().getRegionId()) {
3432                LOG.info("this is a split, log to splits");
3433                handler.handleSplit(r1, r2);
3434              } else {
3435                // overlap
3436                handler.handleOverlapInRegionChain(r1, r2);
3437              }
3438            }
3439          }
3440
3441        } else if (ranges.isEmpty()) {
3442          if (problemKey != null) {
3443            LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3444          }
3445          problemKey = null;
3446
3447          byte[] holeStopKey = sc.getSplits().higher(key);
3448          // if higher key is null we reached the top.
3449          if (holeStopKey != null) {
3450            // hole
3451            handler.handleHoleInRegionChain(key, holeStopKey);
3452          }
3453        }
3454        prevKey = key;
3455      }
3456
3457      // When the last region of a table is proper and having an empty end key, 'prevKey'
3458      // will be null.
3459      if (prevKey != null) {
3460        handler.handleRegionEndKeyNotEmpty(prevKey);
3461      }
3462
3463      // TODO fold this into the TableIntegrityHandler
3464      if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
3465        boolean ok = handleOverlapsParallel(handler, prevKey);
3466        if (!ok) {
3467          return false;
3468        }
3469      } else {
3470        for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3471          handler.handleOverlapGroup(overlap);
3472        }
3473      }
3474
3475      if (details) {
3476        // do full region split map dump
3477        errors.print("---- Table '"  +  this.tableName
3478            + "': region split map");
3479        dump(splits, regions);
3480        errors.print("---- Table '"  +  this.tableName
3481            + "': overlap groups");
3482        dumpOverlapProblems(overlapGroups);
3483        errors.print("There are " + overlapGroups.keySet().size()
3484            + " overlap groups with " + overlapGroups.size()
3485            + " overlapping regions");
3486      }
3487      if (!sidelinedRegions.isEmpty()) {
3488        LOG.warn("Sidelined big overlapped regions, please bulk load them!");
3489        errors.print("---- Table '"  +  this.tableName
3490            + "': sidelined big overlapped regions");
3491        dumpSidelinedRegions(sidelinedRegions);
3492      }
3493      return errors.getErrorList().size() == originalErrorsCount;
3494    }
3495
3496    private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
3497        throws IOException {
3498      // we parallelize overlap handler for the case we have lots of groups to fix.  We can
3499      // safely assume each group is independent.
3500      List<WorkItemOverlapMerge> merges = new ArrayList<>(overlapGroups.size());
3501      List<Future<Void>> rets;
3502      for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3503        //
3504        merges.add(new WorkItemOverlapMerge(overlap, handler));
3505      }
3506      try {
3507        rets = executor.invokeAll(merges);
3508      } catch (InterruptedException e) {
3509        LOG.error("Overlap merges were interrupted", e);
3510        return false;
3511      }
3512      for(int i=0; i<merges.size(); i++) {
3513        WorkItemOverlapMerge work = merges.get(i);
3514        Future<Void> f = rets.get(i);
3515        try {
3516          f.get();
3517        } catch(ExecutionException e) {
3518          LOG.warn("Failed to merge overlap group" + work, e.getCause());
3519        } catch (InterruptedException e) {
3520          LOG.error("Waiting for overlap merges was interrupted", e);
3521          return false;
3522        }
3523      }
3524      return true;
3525    }
3526
3527    /**
3528     * This dumps data in a visually reasonable way for visual debugging
3529     *
3530     * @param splits
3531     * @param regions
3532     */
3533    void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
3534      // we display this way because the last end key should be displayed as well.
3535      StringBuilder sb = new StringBuilder();
3536      for (byte[] k : splits) {
3537        sb.setLength(0); // clear out existing buffer, if any.
3538        sb.append(Bytes.toStringBinary(k) + ":\t");
3539        for (HbckInfo r : regions.get(k)) {
3540          sb.append("[ "+ r.toString() + ", "
3541              + Bytes.toStringBinary(r.getEndKey())+ "]\t");
3542        }
3543        errors.print(sb.toString());
3544      }
3545    }
3546  }
3547
3548  public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
3549    // we display this way because the last end key should be displayed as
3550    // well.
3551    for (byte[] k : regions.keySet()) {
3552      errors.print(Bytes.toStringBinary(k) + ":");
3553      for (HbckInfo r : regions.get(k)) {
3554        errors.print("[ " + r.toString() + ", "
3555            + Bytes.toStringBinary(r.getEndKey()) + "]");
3556      }
3557      errors.print("----");
3558    }
3559  }
3560
3561  public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
3562    for (Map.Entry<Path, HbckInfo> entry : regions.entrySet()) {
3563      TableName tableName = entry.getValue().getTableName();
3564      Path path = entry.getKey();
3565      errors.print("This sidelined region dir should be bulk loaded: " + path.toString());
3566      errors.print("Bulk load command looks like: " + BulkLoadHFilesTool.NAME + " " +
3567        path.toUri().getPath() + " " + tableName);
3568    }
3569  }
3570
3571  public Multimap<byte[], HbckInfo> getOverlapGroups(
3572      TableName table) {
3573    TableInfo ti = tablesInfo.get(table);
3574    return ti.overlapGroups;
3575  }
3576
3577  /**
3578   * Return a list of user-space table names whose metadata have not been
3579   * modified in the last few milliseconds specified by timelag
3580   * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
3581   * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
3582   * milliseconds specified by timelag, then the table is a candidate to be returned.
3583   * @return tables that have not been modified recently
3584   * @throws IOException if an error is encountered
3585   */
3586  TableDescriptor[] getTables(AtomicInteger numSkipped) {
3587    List<TableName> tableNames = new ArrayList<>();
3588    long now = EnvironmentEdgeManager.currentTime();
3589
3590    for (HbckInfo hbi : regionInfoMap.values()) {
3591      MetaEntry info = hbi.metaEntry;
3592
3593      // if the start key is zero, then we have found the first region of a table.
3594      // pick only those tables that were not modified in the last few milliseconds.
3595      if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
3596        if (info.modTime + timelag < now) {
3597          tableNames.add(info.getTable());
3598        } else {
3599          numSkipped.incrementAndGet(); // one more in-flux table
3600        }
3601      }
3602    }
3603    return getTableDescriptors(tableNames);
3604  }
3605
3606  TableDescriptor[] getTableDescriptors(List<TableName> tableNames) {
3607      LOG.info("getTableDescriptors == tableNames => " + tableNames);
3608    try (Connection conn = ConnectionFactory.createConnection(getConf());
3609        Admin admin = conn.getAdmin()) {
3610      List<TableDescriptor> tds = admin.listTableDescriptors(tableNames);
3611      return tds.toArray(new TableDescriptor[tds.size()]);
3612    } catch (IOException e) {
3613      LOG.debug("Exception getting table descriptors", e);
3614    }
3615    return new TableDescriptor[0];
3616  }
3617
3618  /**
3619   * Gets the entry in regionInfo corresponding to the the given encoded
3620   * region name. If the region has not been seen yet, a new entry is added
3621   * and returned.
3622   */
3623  private synchronized HbckInfo getOrCreateInfo(String name) {
3624    HbckInfo hbi = regionInfoMap.get(name);
3625    if (hbi == null) {
3626      hbi = new HbckInfo(null);
3627      regionInfoMap.put(name, hbi);
3628    }
3629    return hbi;
3630  }
3631
3632  private void checkAndFixReplication() throws ReplicationException {
3633    ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, errors);
3634    checker.checkUnDeletedQueues();
3635
3636    if (checker.hasUnDeletedQueues() && this.fixReplication) {
3637      checker.fixUnDeletedQueues();
3638      setShouldRerun();
3639    }
3640  }
3641
3642  /**
3643    * Check values in regionInfo for hbase:meta
3644    * Check if zero or more than one regions with hbase:meta are found.
3645    * If there are inconsistencies (i.e. zero or more than one regions
3646    * pretend to be holding the hbase:meta) try to fix that and report an error.
3647    * @throws IOException from HBaseFsckRepair functions
3648    * @throws KeeperException
3649    * @throws InterruptedException
3650    */
3651  boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3652    Map<Integer, HbckInfo> metaRegions = new HashMap<>();
3653    for (HbckInfo value : regionInfoMap.values()) {
3654      if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3655        metaRegions.put(value.getReplicaId(), value);
3656      }
3657    }
3658    int metaReplication = admin.getDescriptor(TableName.META_TABLE_NAME)
3659        .getRegionReplication();
3660    boolean noProblem = true;
3661    // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
3662    // Check the deployed servers. It should be exactly one server for each replica.
3663    for (int i = 0; i < metaReplication; i++) {
3664      HbckInfo metaHbckInfo = metaRegions.remove(i);
3665      List<ServerName> servers = new ArrayList<>();
3666      if (metaHbckInfo != null) {
3667        servers = metaHbckInfo.deployedOn;
3668      }
3669      if (servers.size() != 1) {
3670        noProblem = false;
3671        if (servers.isEmpty()) {
3672          assignMetaReplica(i);
3673        } else if (servers.size() > 1) {
3674          errors
3675          .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
3676                       metaHbckInfo.getReplicaId() + " is found on more than one region.");
3677          if (shouldFixAssignments()) {
3678            errors.print("Trying to fix a problem with hbase:meta, replicaId " +
3679                         metaHbckInfo.getReplicaId() +"..");
3680            setShouldRerun();
3681            // try fix it (treat is a dupe assignment)
3682            HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers);
3683          }
3684        }
3685      }
3686    }
3687    // unassign whatever is remaining in metaRegions. They are excess replicas.
3688    for (Map.Entry<Integer, HbckInfo> entry : metaRegions.entrySet()) {
3689      noProblem = false;
3690      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
3691          "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
3692          ", deployed " + metaRegions.size());
3693      if (shouldFixAssignments()) {
3694        errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
3695            " of hbase:meta..");
3696        setShouldRerun();
3697        unassignMetaReplica(entry.getValue());
3698      }
3699    }
3700    // if noProblem is false, rerun hbck with hopefully fixed META
3701    // if noProblem is true, no errors, so continue normally
3702    return noProblem;
3703  }
3704
3705  private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException,
3706  KeeperException {
3707    undeployRegions(hi);
3708    ZKUtil.deleteNode(zkw, zkw.getZNodePaths().getZNodeForReplica(hi.metaEntry.getReplicaId()));
3709  }
3710
3711  private void assignMetaReplica(int replicaId)
3712      throws IOException, KeeperException, InterruptedException {
3713    errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
3714        replicaId +" is not found on any region.");
3715    if (shouldFixAssignments()) {
3716      errors.print("Trying to fix a problem with hbase:meta..");
3717      setShouldRerun();
3718      // try to fix it (treat it as unassigned region)
3719      RegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
3720          RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId);
3721      HBaseFsckRepair.fixUnassigned(admin, h);
3722      HBaseFsckRepair.waitUntilAssigned(admin, h);
3723    }
3724  }
3725
3726  /**
3727   * Scan hbase:meta, adding all regions found to the regionInfo map.
3728   * @throws IOException if an error is encountered
3729   */
3730  boolean loadMetaEntries() throws IOException {
3731    MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() {
3732      int countRecord = 1;
3733
3734      // comparator to sort KeyValues with latest modtime
3735      final Comparator<Cell> comp = new Comparator<Cell>() {
3736        @Override
3737        public int compare(Cell k1, Cell k2) {
3738          return Long.compare(k1.getTimestamp(), k2.getTimestamp());
3739        }
3740      };
3741
3742      @Override
3743      public boolean visit(Result result) throws IOException {
3744        try {
3745
3746          // record the latest modification of this META record
3747          long ts =  Collections.max(result.listCells(), comp).getTimestamp();
3748          RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
3749          if (rl == null) {
3750            emptyRegionInfoQualifiers.add(result);
3751            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3752              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3753            return true;
3754          }
3755          ServerName sn = null;
3756          if (rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID) == null ||
3757              rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegion() == null) {
3758            emptyRegionInfoQualifiers.add(result);
3759            errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3760              "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3761            return true;
3762          }
3763          RegionInfo hri = rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegion();
3764          if (!(isTableIncluded(hri.getTable())
3765              || hri.isMetaRegion())) {
3766            return true;
3767          }
3768          PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(result);
3769          for (HRegionLocation h : rl.getRegionLocations()) {
3770            if (h == null || h.getRegion() == null) {
3771              continue;
3772            }
3773            sn = h.getServerName();
3774            hri = h.getRegion();
3775
3776            MetaEntry m = null;
3777            if (hri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
3778              m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3779            } else {
3780              m = new MetaEntry(hri, sn, ts, null, null);
3781            }
3782            HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3783            if (previous == null) {
3784              regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3785            } else if (previous.metaEntry == null) {
3786              previous.metaEntry = m;
3787            } else {
3788              throw new IOException("Two entries in hbase:meta are same " + previous);
3789            }
3790          }
3791          PairOfSameType<RegionInfo> mergeRegions = MetaTableAccessor.getMergeRegions(result);
3792          for (RegionInfo mergeRegion : new RegionInfo[] {
3793              mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3794            if (mergeRegion != null) {
3795              // This region is already been merged
3796              HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3797              hbInfo.setMerged(true);
3798            }
3799          }
3800
3801          // show proof of progress to the user, once for every 100 records.
3802          if (countRecord % 100 == 0) {
3803            errors.progress();
3804          }
3805          countRecord++;
3806          return true;
3807        } catch (RuntimeException e) {
3808          LOG.error("Result=" + result);
3809          throw e;
3810        }
3811      }
3812    };
3813    if (!checkMetaOnly) {
3814      // Scan hbase:meta to pick up user regions
3815      MetaTableAccessor.fullScanRegions(connection, visitor);
3816    }
3817
3818    errors.print("");
3819    return true;
3820  }
3821
3822  /**
3823   * Stores the regioninfo entries scanned from META
3824   */
3825  static class MetaEntry extends HRegionInfo {
3826    ServerName regionServer;   // server hosting this region
3827    long modTime;          // timestamp of most recent modification metadata
3828    RegionInfo splitA, splitB; //split daughters
3829
3830    public MetaEntry(RegionInfo rinfo, ServerName regionServer, long modTime) {
3831      this(rinfo, regionServer, modTime, null, null);
3832    }
3833
3834    public MetaEntry(RegionInfo rinfo, ServerName regionServer, long modTime,
3835        RegionInfo splitA, RegionInfo splitB) {
3836      super(rinfo);
3837      this.regionServer = regionServer;
3838      this.modTime = modTime;
3839      this.splitA = splitA;
3840      this.splitB = splitB;
3841    }
3842
3843    @Override
3844    public boolean equals(Object o) {
3845      boolean superEq = super.equals(o);
3846      if (!superEq) {
3847        return superEq;
3848      }
3849
3850      MetaEntry me = (MetaEntry) o;
3851      if (!regionServer.equals(me.regionServer)) {
3852        return false;
3853      }
3854      return (modTime == me.modTime);
3855    }
3856
3857    @Override
3858    public int hashCode() {
3859      int hash = Arrays.hashCode(getRegionName());
3860      hash = (int) (hash ^ getRegionId());
3861      hash ^= Arrays.hashCode(getStartKey());
3862      hash ^= Arrays.hashCode(getEndKey());
3863      hash ^= Boolean.valueOf(isOffline()).hashCode();
3864      hash ^= getTable().hashCode();
3865      if (regionServer != null) {
3866        hash ^= regionServer.hashCode();
3867      }
3868      hash = (int) (hash ^ modTime);
3869      return hash;
3870    }
3871  }
3872
3873  /**
3874   * Stores the regioninfo entries from HDFS
3875   */
3876  static class HdfsEntry {
3877    RegionInfo hri;
3878    Path hdfsRegionDir = null;
3879    long hdfsRegionDirModTime  = 0;
3880    boolean hdfsRegioninfoFilePresent = false;
3881    boolean hdfsOnlyEdits = false;
3882  }
3883
3884  /**
3885   * Stores the regioninfo retrieved from Online region servers.
3886   */
3887  static class OnlineEntry {
3888    RegionInfo hri;
3889    ServerName hsa;
3890
3891    @Override
3892    public String toString() {
3893      return hsa.toString() + ";" + hri.getRegionNameAsString();
3894    }
3895  }
3896
3897  /**
3898   * Maintain information about a particular region.  It gathers information
3899   * from three places -- HDFS, META, and region servers.
3900   */
3901  public static class HbckInfo implements KeyRange {
3902    private MetaEntry metaEntry = null; // info in META
3903    private HdfsEntry hdfsEntry = null; // info in HDFS
3904    private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
3905    private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
3906    private boolean skipChecks = false; // whether to skip further checks to this region info.
3907    private boolean isMerged = false;// whether this region has already been merged into another one
3908    private int deployedReplicaId = RegionInfo.DEFAULT_REPLICA_ID;
3909    private RegionInfo primaryHRIForDeployedReplica = null;
3910
3911    HbckInfo(MetaEntry metaEntry) {
3912      this.metaEntry = metaEntry;
3913    }
3914
3915    public synchronized int getReplicaId() {
3916      return metaEntry != null? metaEntry.getReplicaId(): deployedReplicaId;
3917    }
3918
3919    public synchronized void addServer(RegionInfo hri, ServerName server) {
3920      OnlineEntry rse = new OnlineEntry() ;
3921      rse.hri = hri;
3922      rse.hsa = server;
3923      this.deployedEntries.add(rse);
3924      this.deployedOn.add(server);
3925      // save the replicaId that we see deployed in the cluster
3926      this.deployedReplicaId = hri.getReplicaId();
3927      this.primaryHRIForDeployedReplica =
3928          RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
3929    }
3930
3931    @Override
3932    public synchronized String toString() {
3933      StringBuilder sb = new StringBuilder();
3934      sb.append("{ meta => ");
3935      sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3936      sb.append( ", hdfs => " + getHdfsRegionDir());
3937      sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3938      sb.append( ", replicaId => " + getReplicaId());
3939      sb.append(" }");
3940      return sb.toString();
3941    }
3942
3943    @Override
3944    public byte[] getStartKey() {
3945      if (this.metaEntry != null) {
3946        return this.metaEntry.getStartKey();
3947      } else if (this.hdfsEntry != null) {
3948        return this.hdfsEntry.hri.getStartKey();
3949      } else {
3950        LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3951        return null;
3952      }
3953    }
3954
3955    @Override
3956    public byte[] getEndKey() {
3957      if (this.metaEntry != null) {
3958        return this.metaEntry.getEndKey();
3959      } else if (this.hdfsEntry != null) {
3960        return this.hdfsEntry.hri.getEndKey();
3961      } else {
3962        LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3963        return null;
3964      }
3965    }
3966
3967    public TableName getTableName() {
3968      if (this.metaEntry != null) {
3969        return this.metaEntry.getTable();
3970      } else if (this.hdfsEntry != null) {
3971        // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3972        // so we get the name from the Path
3973        Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3974        return FSUtils.getTableName(tableDir);
3975      } else {
3976        // return the info from the first online/deployed hri
3977        for (OnlineEntry e : deployedEntries) {
3978          return e.hri.getTable();
3979        }
3980        return null;
3981      }
3982    }
3983
3984    public String getRegionNameAsString() {
3985      if (metaEntry != null) {
3986        return metaEntry.getRegionNameAsString();
3987      } else if (hdfsEntry != null) {
3988        if (hdfsEntry.hri != null) {
3989          return hdfsEntry.hri.getRegionNameAsString();
3990        }
3991      } else {
3992        // return the info from the first online/deployed hri
3993        for (OnlineEntry e : deployedEntries) {
3994          return e.hri.getRegionNameAsString();
3995        }
3996      }
3997      return null;
3998    }
3999
4000    public byte[] getRegionName() {
4001      if (metaEntry != null) {
4002        return metaEntry.getRegionName();
4003      } else if (hdfsEntry != null) {
4004        return hdfsEntry.hri.getRegionName();
4005      } else {
4006        // return the info from the first online/deployed hri
4007        for (OnlineEntry e : deployedEntries) {
4008          return e.hri.getRegionName();
4009        }
4010        return null;
4011      }
4012    }
4013
4014    public RegionInfo getPrimaryHRIForDeployedReplica() {
4015      return primaryHRIForDeployedReplica;
4016    }
4017
4018    Path getHdfsRegionDir() {
4019      if (hdfsEntry == null) {
4020        return null;
4021      }
4022      return hdfsEntry.hdfsRegionDir;
4023    }
4024
4025    boolean containsOnlyHdfsEdits() {
4026      if (hdfsEntry == null) {
4027        return false;
4028      }
4029      return hdfsEntry.hdfsOnlyEdits;
4030    }
4031
4032    boolean isHdfsRegioninfoPresent() {
4033      if (hdfsEntry == null) {
4034        return false;
4035      }
4036      return hdfsEntry.hdfsRegioninfoFilePresent;
4037    }
4038
4039    long getModTime() {
4040      if (hdfsEntry == null) {
4041        return 0;
4042      }
4043      return hdfsEntry.hdfsRegionDirModTime;
4044    }
4045
4046    RegionInfo getHdfsHRI() {
4047      if (hdfsEntry == null) {
4048        return null;
4049      }
4050      return hdfsEntry.hri;
4051    }
4052
4053    public void setSkipChecks(boolean skipChecks) {
4054      this.skipChecks = skipChecks;
4055    }
4056
4057    public boolean isSkipChecks() {
4058      return skipChecks;
4059    }
4060
4061    public void setMerged(boolean isMerged) {
4062      this.isMerged = isMerged;
4063    }
4064
4065    public boolean isMerged() {
4066      return this.isMerged;
4067    }
4068  }
4069
4070  final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
4071    @Override
4072    public int compare(HbckInfo l, HbckInfo r) {
4073      if (l == r) {
4074        // same instance
4075        return 0;
4076      }
4077
4078      int tableCompare = l.getTableName().compareTo(r.getTableName());
4079      if (tableCompare != 0) {
4080        return tableCompare;
4081      }
4082
4083      int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
4084          l.getStartKey(), r.getStartKey());
4085      if (startComparison != 0) {
4086        return startComparison;
4087      }
4088
4089      // Special case for absolute endkey
4090      byte[] endKey = r.getEndKey();
4091      endKey = (endKey.length == 0) ? null : endKey;
4092      byte[] endKey2 = l.getEndKey();
4093      endKey2 = (endKey2.length == 0) ? null : endKey2;
4094      int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
4095          endKey2,  endKey);
4096
4097      if (endComparison != 0) {
4098        return endComparison;
4099      }
4100
4101      // use regionId as tiebreaker.
4102      // Null is considered after all possible values so make it bigger.
4103      if (l.hdfsEntry == null && r.hdfsEntry == null) {
4104        return 0;
4105      }
4106      if (l.hdfsEntry == null && r.hdfsEntry != null) {
4107        return 1;
4108      }
4109      // l.hdfsEntry must not be null
4110      if (r.hdfsEntry == null) {
4111        return -1;
4112      }
4113      // both l.hdfsEntry and r.hdfsEntry must not be null.
4114      return Long.compare(l.hdfsEntry.hri.getRegionId(), r.hdfsEntry.hri.getRegionId());
4115    }
4116  };
4117
4118  /**
4119   * Prints summary of all tables found on the system.
4120   */
4121  private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
4122    StringBuilder sb = new StringBuilder();
4123    int numOfSkippedRegions;
4124    errors.print("Summary:");
4125    for (TableInfo tInfo : tablesInfo.values()) {
4126      numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
4127          skippedRegions.get(tInfo.getName()).size() : 0;
4128
4129      if (errors.tableHasErrors(tInfo)) {
4130        errors.print("Table " + tInfo.getName() + " is inconsistent.");
4131      } else if (numOfSkippedRegions > 0){
4132        errors.print("Table " + tInfo.getName() + " is okay (with "
4133          + numOfSkippedRegions + " skipped regions).");
4134      }
4135      else {
4136        errors.print("Table " + tInfo.getName() + " is okay.");
4137      }
4138      errors.print("    Number of regions: " + tInfo.getNumRegions());
4139      if (numOfSkippedRegions > 0) {
4140        Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
4141        System.out.println("    Number of skipped regions: " + numOfSkippedRegions);
4142        System.out.println("      List of skipped regions:");
4143        for(String sr : skippedRegionStrings) {
4144          System.out.println("        " + sr);
4145        }
4146      }
4147      sb.setLength(0); // clear out existing buffer, if any.
4148      sb.append("    Deployed on: ");
4149      for (ServerName server : tInfo.deployedOn) {
4150        sb.append(" " + server.toString());
4151      }
4152      errors.print(sb.toString());
4153    }
4154  }
4155
4156  static ErrorReporter getErrorReporter(
4157      final Configuration conf) throws ClassNotFoundException {
4158    Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
4159    return ReflectionUtils.newInstance(reporter, conf);
4160  }
4161
4162  public interface ErrorReporter {
4163    enum ERROR_CODE {
4164      UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
4165      NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META,
4166      NOT_DEPLOYED, MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
4167      FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
4168      HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
4169      ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
4170      LINGERING_HFILELINK, WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR,
4171      ORPHAN_TABLE_STATE, NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE, DUPE_ENDKEYS,
4172      UNSUPPORTED_OPTION, INVALID_TABLE
4173    }
4174    void clear();
4175    void report(String message);
4176    void reportError(String message);
4177    void reportError(ERROR_CODE errorCode, String message);
4178    void reportError(ERROR_CODE errorCode, String message, TableInfo table);
4179    void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
4180    void reportError(
4181      ERROR_CODE errorCode,
4182      String message,
4183      TableInfo table,
4184      HbckInfo info1,
4185      HbckInfo info2
4186    );
4187    int summarize();
4188    void detail(String details);
4189    ArrayList<ERROR_CODE> getErrorList();
4190    void progress();
4191    void print(String message);
4192    void resetErrors();
4193    boolean tableHasErrors(TableInfo table);
4194  }
4195
4196  static class PrintingErrorReporter implements ErrorReporter {
4197    public int errorCount = 0;
4198    private int showProgress;
4199    // How frequently calls to progress() will create output
4200    private static final int progressThreshold = 100;
4201
4202    Set<TableInfo> errorTables = new HashSet<>();
4203
4204    // for use by unit tests to verify which errors were discovered
4205    private ArrayList<ERROR_CODE> errorList = new ArrayList<>();
4206
4207    @Override
4208    public void clear() {
4209      errorTables.clear();
4210      errorList.clear();
4211      errorCount = 0;
4212    }
4213
4214    @Override
4215    public synchronized void reportError(ERROR_CODE errorCode, String message) {
4216      if (errorCode == ERROR_CODE.WRONG_USAGE) {
4217        System.err.println(message);
4218        return;
4219      }
4220
4221      errorList.add(errorCode);
4222      if (!summary) {
4223        System.out.println("ERROR: " + message);
4224      }
4225      errorCount++;
4226      showProgress = 0;
4227    }
4228
4229    @Override
4230    public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
4231      errorTables.add(table);
4232      reportError(errorCode, message);
4233    }
4234
4235    @Override
4236    public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
4237                                         HbckInfo info) {
4238      errorTables.add(table);
4239      String reference = "(region " + info.getRegionNameAsString() + ")";
4240      reportError(errorCode, reference + " " + message);
4241    }
4242
4243    @Override
4244    public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
4245                                         HbckInfo info1, HbckInfo info2) {
4246      errorTables.add(table);
4247      String reference = "(regions " + info1.getRegionNameAsString()
4248          + " and " + info2.getRegionNameAsString() + ")";
4249      reportError(errorCode, reference + " " + message);
4250    }
4251
4252    @Override
4253    public synchronized void reportError(String message) {
4254      reportError(ERROR_CODE.UNKNOWN, message);
4255    }
4256
4257    /**
4258     * Report error information, but do not increment the error count.  Intended for cases
4259     * where the actual error would have been reported previously.
4260     * @param message
4261     */
4262    @Override
4263    public synchronized void report(String message) {
4264      if (! summary) {
4265        System.out.println("ERROR: " + message);
4266      }
4267      showProgress = 0;
4268    }
4269
4270    @Override
4271    public synchronized int summarize() {
4272      System.out.println(Integer.toString(errorCount) +
4273                         " inconsistencies detected.");
4274      if (errorCount == 0) {
4275        System.out.println("Status: OK");
4276        return 0;
4277      } else {
4278        System.out.println("Status: INCONSISTENT");
4279        return -1;
4280      }
4281    }
4282
4283    @Override
4284    public ArrayList<ERROR_CODE> getErrorList() {
4285      return errorList;
4286    }
4287
4288    @Override
4289    public synchronized void print(String message) {
4290      if (!summary) {
4291        System.out.println(message);
4292      }
4293    }
4294
4295    @Override
4296    public boolean tableHasErrors(TableInfo table) {
4297      return errorTables.contains(table);
4298    }
4299
4300    @Override
4301    public void resetErrors() {
4302      errorCount = 0;
4303    }
4304
4305    @Override
4306    public synchronized void detail(String message) {
4307      if (details) {
4308        System.out.println(message);
4309      }
4310      showProgress = 0;
4311    }
4312
4313    @Override
4314    public synchronized void progress() {
4315      if (showProgress++ == progressThreshold) {
4316        if (!summary) {
4317          System.out.print(".");
4318        }
4319        showProgress = 0;
4320      }
4321    }
4322  }
4323
4324  /**
4325   * Contact a region server and get all information from it
4326   */
4327  static class WorkItemRegion implements Callable<Void> {
4328    private final HBaseFsck hbck;
4329    private final ServerName rsinfo;
4330    private final ErrorReporter errors;
4331    private final Connection connection;
4332
4333    WorkItemRegion(HBaseFsck hbck, ServerName info,
4334                   ErrorReporter errors, Connection connection) {
4335      this.hbck = hbck;
4336      this.rsinfo = info;
4337      this.errors = errors;
4338      this.connection = connection;
4339    }
4340
4341    @Override
4342    public synchronized Void call() throws IOException {
4343      errors.progress();
4344      try {
4345        // list all online regions from this region server
4346        List<RegionInfo> regions = connection.getAdmin().getRegions(rsinfo);
4347        regions = filterRegions(regions);
4348
4349        if (details) {
4350          errors.detail(
4351            "RegionServer: " + rsinfo.getServerName() + " number of regions: " + regions.size());
4352          for (RegionInfo rinfo : regions) {
4353            errors.detail("  " + rinfo.getRegionNameAsString() + " id: " + rinfo.getRegionId() +
4354              " encoded_name: " + rinfo.getEncodedName() + " start: " +
4355              Bytes.toStringBinary(rinfo.getStartKey()) + " end: " +
4356              Bytes.toStringBinary(rinfo.getEndKey()));
4357          }
4358        }
4359
4360        // check to see if the existence of this region matches the region in META
4361        for (RegionInfo r : regions) {
4362          HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
4363          hbi.addServer(r, rsinfo);
4364        }
4365      } catch (IOException e) { // unable to connect to the region server.
4366        errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE,
4367          "RegionServer: " + rsinfo.getServerName() + " Unable to fetch region information. " + e);
4368        throw e;
4369      }
4370      return null;
4371    }
4372
4373    private List<RegionInfo> filterRegions(List<RegionInfo> regions) {
4374      List<RegionInfo> ret = Lists.newArrayList();
4375      for (RegionInfo hri : regions) {
4376        if (hri.isMetaRegion() || (!hbck.checkMetaOnly
4377            && hbck.isTableIncluded(hri.getTable()))) {
4378          ret.add(hri);
4379        }
4380      }
4381      return ret;
4382    }
4383  }
4384
4385  /**
4386   * Contact hdfs and get all information about specified table directory into
4387   * regioninfo list.
4388   */
4389  class WorkItemHdfsDir implements Callable<Void> {
4390    private FileStatus tableDir;
4391    private ErrorReporter errors;
4392    private FileSystem fs;
4393
4394    WorkItemHdfsDir(FileSystem fs, ErrorReporter errors,
4395                    FileStatus status) {
4396      this.fs = fs;
4397      this.tableDir = status;
4398      this.errors = errors;
4399    }
4400
4401    @Override
4402    public synchronized Void call() throws InterruptedException, ExecutionException {
4403      final Vector<Exception> exceptions = new Vector<>();
4404
4405      try {
4406        final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
4407        final List<Future<?>> futures = new ArrayList<>(regionDirs.length);
4408
4409        for (final FileStatus regionDir : regionDirs) {
4410          errors.progress();
4411          final String encodedName = regionDir.getPath().getName();
4412          // ignore directories that aren't hexadecimal
4413          if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
4414            continue;
4415          }
4416
4417          if (!exceptions.isEmpty()) {
4418            break;
4419          }
4420
4421          futures.add(executor.submit(new Runnable() {
4422            @Override
4423            public void run() {
4424              try {
4425                LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
4426
4427                Path regioninfoFile = new Path(regionDir.getPath(), HRegionFileSystem.REGION_INFO_FILE);
4428                boolean regioninfoFileExists = fs.exists(regioninfoFile);
4429
4430                if (!regioninfoFileExists) {
4431                  // As tables become larger it is more and more likely that by the time you
4432                  // reach a given region that it will be gone due to region splits/merges.
4433                  if (!fs.exists(regionDir.getPath())) {
4434                    LOG.warn("By the time we tried to process this region dir it was already gone: "
4435                        + regionDir.getPath());
4436                    return;
4437                  }
4438                }
4439
4440                HbckInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
4441                HdfsEntry he = new HdfsEntry();
4442                synchronized (hbi) {
4443                  if (hbi.getHdfsRegionDir() != null) {
4444                    errors.print("Directory " + encodedName + " duplicate??" +
4445                                 hbi.getHdfsRegionDir());
4446                  }
4447
4448                  he.hdfsRegionDir = regionDir.getPath();
4449                  he.hdfsRegionDirModTime = regionDir.getModificationTime();
4450                  he.hdfsRegioninfoFilePresent = regioninfoFileExists;
4451                  // we add to orphan list when we attempt to read .regioninfo
4452
4453                  // Set a flag if this region contains only edits
4454                  // This is special case if a region is left after split
4455                  he.hdfsOnlyEdits = true;
4456                  FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
4457                  Path ePath = WALSplitUtil.getRegionDirRecoveredEditsDir(regionDir.getPath());
4458                  for (FileStatus subDir : subDirs) {
4459                    errors.progress();
4460                    String sdName = subDir.getPath().getName();
4461                    if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
4462                      he.hdfsOnlyEdits = false;
4463                      break;
4464                    }
4465                  }
4466                  hbi.hdfsEntry = he;
4467                }
4468              } catch (Exception e) {
4469                LOG.error("Could not load region dir", e);
4470                exceptions.add(e);
4471              }
4472            }
4473          }));
4474        }
4475
4476        // Ensure all pending tasks are complete (or that we run into an exception)
4477        for (Future<?> f : futures) {
4478          if (!exceptions.isEmpty()) {
4479            break;
4480          }
4481          try {
4482            f.get();
4483          } catch (ExecutionException e) {
4484            LOG.error("Unexpected exec exception!  Should've been caught already.  (Bug?)", e);
4485            // Shouldn't happen, we already logged/caught any exceptions in the Runnable
4486          }
4487        }
4488      } catch (IOException e) {
4489        LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
4490        exceptions.add(e);
4491      } finally {
4492        if (!exceptions.isEmpty()) {
4493          errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
4494              + tableDir.getPath().getName()
4495              + " Unable to fetch all HDFS region information. ");
4496          // Just throw the first exception as an indication something bad happened
4497          // Don't need to propagate all the exceptions, we already logged them all anyway
4498          throw new ExecutionException("First exception in WorkItemHdfsDir", exceptions.firstElement());
4499        }
4500      }
4501      return null;
4502    }
4503  }
4504
4505  /**
4506   * Contact hdfs and get all information about specified table directory into
4507   * regioninfo list.
4508   */
4509  static class WorkItemHdfsRegionInfo implements Callable<Void> {
4510    private HbckInfo hbi;
4511    private HBaseFsck hbck;
4512    private ErrorReporter errors;
4513
4514    WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
4515      this.hbi = hbi;
4516      this.hbck = hbck;
4517      this.errors = errors;
4518    }
4519
4520    @Override
4521    public synchronized Void call() throws IOException {
4522      // only load entries that haven't been loaded yet.
4523      if (hbi.getHdfsHRI() == null) {
4524        try {
4525          errors.progress();
4526          hbck.loadHdfsRegioninfo(hbi);
4527        } catch (IOException ioe) {
4528          String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
4529              + hbi.getTableName() + " in hdfs dir "
4530              + hbi.getHdfsRegionDir()
4531              + "!  It may be an invalid format or version file.  Treating as "
4532              + "an orphaned regiondir.";
4533          errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
4534          try {
4535            hbck.debugLsr(hbi.getHdfsRegionDir());
4536          } catch (IOException ioe2) {
4537            LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
4538            throw ioe2;
4539          }
4540          hbck.orphanHdfsDirs.add(hbi);
4541          throw ioe;
4542        }
4543      }
4544      return null;
4545    }
4546  }
4547
4548  /**
4549   * Display the full report from fsck. This displays all live and dead region
4550   * servers, and all known regions.
4551   */
4552  public static void setDisplayFullReport() {
4553    details = true;
4554  }
4555
4556  /**
4557   * Set exclusive mode.
4558   */
4559  public static void setForceExclusive() {
4560    forceExclusive = true;
4561  }
4562
4563  /**
4564   * Only one instance of hbck can modify HBase at a time.
4565   */
4566  public boolean isExclusive() {
4567    return fixAny || forceExclusive;
4568  }
4569
4570  /**
4571   * Set summary mode.
4572   * Print only summary of the tables and status (OK or INCONSISTENT)
4573   */
4574  static void setSummary() {
4575    summary = true;
4576  }
4577
4578  /**
4579   * Set hbase:meta check mode.
4580   * Print only info about hbase:meta table deployment/state
4581   */
4582  void setCheckMetaOnly() {
4583    checkMetaOnly = true;
4584  }
4585
4586  /**
4587   * Set region boundaries check mode.
4588   */
4589  void setRegionBoundariesCheck() {
4590    checkRegionBoundaries = true;
4591  }
4592
4593  /**
4594   * Set replication fix mode.
4595   */
4596  public void setFixReplication(boolean shouldFix) {
4597    fixReplication = shouldFix;
4598    fixAny |= shouldFix;
4599  }
4600
4601  public void setCleanReplicationBarrier(boolean shouldClean) {
4602    cleanReplicationBarrier = shouldClean;
4603  }
4604
4605  /**
4606   * Check if we should rerun fsck again. This checks if we've tried to
4607   * fix something and we should rerun fsck tool again.
4608   * Display the full report from fsck. This displays all live and dead
4609   * region servers, and all known regions.
4610   */
4611  void setShouldRerun() {
4612    rerun = true;
4613  }
4614
4615  public boolean shouldRerun() {
4616    return rerun;
4617  }
4618
4619  /**
4620   * Fix inconsistencies found by fsck. This should try to fix errors (if any)
4621   * found by fsck utility.
4622   */
4623  public void setFixAssignments(boolean shouldFix) {
4624    fixAssignments = shouldFix;
4625    fixAny |= shouldFix;
4626  }
4627
4628  boolean shouldFixAssignments() {
4629    return fixAssignments;
4630  }
4631
4632  public void setFixMeta(boolean shouldFix) {
4633    fixMeta = shouldFix;
4634    fixAny |= shouldFix;
4635  }
4636
4637  boolean shouldFixMeta() {
4638    return fixMeta;
4639  }
4640
4641  public void setFixEmptyMetaCells(boolean shouldFix) {
4642    fixEmptyMetaCells = shouldFix;
4643    fixAny |= shouldFix;
4644  }
4645
4646  boolean shouldFixEmptyMetaCells() {
4647    return fixEmptyMetaCells;
4648  }
4649
4650  public void setCheckHdfs(boolean checking) {
4651    checkHdfs = checking;
4652  }
4653
4654  boolean shouldCheckHdfs() {
4655    return checkHdfs;
4656  }
4657
4658  public void setFixHdfsHoles(boolean shouldFix) {
4659    fixHdfsHoles = shouldFix;
4660    fixAny |= shouldFix;
4661  }
4662
4663  boolean shouldFixHdfsHoles() {
4664    return fixHdfsHoles;
4665  }
4666
4667  public void setFixTableOrphans(boolean shouldFix) {
4668    fixTableOrphans = shouldFix;
4669    fixAny |= shouldFix;
4670  }
4671
4672  boolean shouldFixTableOrphans() {
4673    return fixTableOrphans;
4674  }
4675
4676  public void setFixHdfsOverlaps(boolean shouldFix) {
4677    fixHdfsOverlaps = shouldFix;
4678    fixAny |= shouldFix;
4679  }
4680
4681  boolean shouldFixHdfsOverlaps() {
4682    return fixHdfsOverlaps;
4683  }
4684
4685  public void setFixHdfsOrphans(boolean shouldFix) {
4686    fixHdfsOrphans = shouldFix;
4687    fixAny |= shouldFix;
4688  }
4689
4690  boolean shouldFixHdfsOrphans() {
4691    return fixHdfsOrphans;
4692  }
4693
4694  public void setFixVersionFile(boolean shouldFix) {
4695    fixVersionFile = shouldFix;
4696    fixAny |= shouldFix;
4697  }
4698
4699  public boolean shouldFixVersionFile() {
4700    return fixVersionFile;
4701  }
4702
4703  public void setSidelineBigOverlaps(boolean sbo) {
4704    this.sidelineBigOverlaps = sbo;
4705  }
4706
4707  public boolean shouldSidelineBigOverlaps() {
4708    return sidelineBigOverlaps;
4709  }
4710
4711  public void setFixSplitParents(boolean shouldFix) {
4712    fixSplitParents = shouldFix;
4713    fixAny |= shouldFix;
4714  }
4715
4716  public void setRemoveParents(boolean shouldFix) {
4717    removeParents = shouldFix;
4718    fixAny |= shouldFix;
4719  }
4720
4721  boolean shouldFixSplitParents() {
4722    return fixSplitParents;
4723  }
4724
4725  boolean shouldRemoveParents() {
4726    return removeParents;
4727  }
4728
4729  public void setFixReferenceFiles(boolean shouldFix) {
4730    fixReferenceFiles = shouldFix;
4731    fixAny |= shouldFix;
4732  }
4733
4734  boolean shouldFixReferenceFiles() {
4735    return fixReferenceFiles;
4736  }
4737
4738  public void setFixHFileLinks(boolean shouldFix) {
4739    fixHFileLinks = shouldFix;
4740    fixAny |= shouldFix;
4741  }
4742
4743  boolean shouldFixHFileLinks() {
4744    return fixHFileLinks;
4745  }
4746
4747  public boolean shouldIgnorePreCheckPermission() {
4748    return !fixAny || ignorePreCheckPermission;
4749  }
4750
4751  public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
4752    this.ignorePreCheckPermission = ignorePreCheckPermission;
4753  }
4754
4755  /**
4756   * @param mm maximum number of regions to merge into a single region.
4757   */
4758  public void setMaxMerge(int mm) {
4759    this.maxMerge = mm;
4760  }
4761
4762  public int getMaxMerge() {
4763    return maxMerge;
4764  }
4765
4766  public void setMaxOverlapsToSideline(int mo) {
4767    this.maxOverlapsToSideline = mo;
4768  }
4769
4770  public int getMaxOverlapsToSideline() {
4771    return maxOverlapsToSideline;
4772  }
4773
4774  /**
4775   * Only check/fix tables specified by the list,
4776   * Empty list means all tables are included.
4777   */
4778  boolean isTableIncluded(TableName table) {
4779    return (tablesIncluded.isEmpty()) || tablesIncluded.contains(table);
4780  }
4781
4782  public void includeTable(TableName table) {
4783    tablesIncluded.add(table);
4784  }
4785
4786  Set<TableName> getIncludedTables() {
4787    return new HashSet<>(tablesIncluded);
4788  }
4789
4790  /**
4791   * We are interested in only those tables that have not changed their state in
4792   * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
4793   * @param seconds - the time in seconds
4794   */
4795  public void setTimeLag(long seconds) {
4796    timelag = seconds * 1000; // convert to milliseconds
4797  }
4798
4799  /**
4800   *
4801   * @param sidelineDir - HDFS path to sideline data
4802   */
4803  public void setSidelineDir(String sidelineDir) {
4804    this.sidelineDir = new Path(sidelineDir);
4805  }
4806
4807  protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4808    return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4809  }
4810
4811  public HFileCorruptionChecker getHFilecorruptionChecker() {
4812    return hfcc;
4813  }
4814
4815  public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4816    this.hfcc = hfcc;
4817  }
4818
4819  public void setRetCode(int code) {
4820    this.retcode = code;
4821  }
4822
4823  public int getRetCode() {
4824    return retcode;
4825  }
4826
4827  protected HBaseFsck printUsageAndExit() {
4828    StringWriter sw = new StringWriter(2048);
4829    PrintWriter out = new PrintWriter(sw);
4830    out.println("");
4831    out.println("-----------------------------------------------------------------------");
4832    out.println("NOTE: As of HBase version 2.0, the hbck tool is significantly changed.");
4833    out.println("In general, all Read-Only options are supported and can be be used");
4834    out.println("safely. Most -fix/ -repair options are NOT supported. Please see usage");
4835    out.println("below for details on which options are not supported.");
4836    out.println("-----------------------------------------------------------------------");
4837    out.println("");
4838    out.println("Usage: fsck [opts] {only tables}");
4839    out.println(" where [opts] are:");
4840    out.println("   -help Display help options (this)");
4841    out.println("   -details Display full report of all regions.");
4842    out.println("   -timelag <timeInSeconds>  Process only regions that " +
4843                       " have not experienced any metadata updates in the last " +
4844                       " <timeInSeconds> seconds.");
4845    out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4846        " before checking if the fix worked if run with -fix");
4847    out.println("   -summary Print only summary of the tables and status.");
4848    out.println("   -metaonly Only check the state of the hbase:meta table.");
4849    out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4850    out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
4851    out.println("   -exclusive Abort if another hbck is exclusive or fixing.");
4852
4853    out.println("");
4854    out.println("  Datafile Repair options: (expert features, use with caution!)");
4855    out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
4856    out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
4857
4858    out.println("");
4859    out.println(" Replication options");
4860    out.println("   -fixReplication   Deletes replication queues for removed peers");
4861
4862    out.println("");
4863    out.println("  Metadata Repair options supported as of version 2.0: (expert features, use with caution!)");
4864    out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
4865    out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
4866    out.println("   -fixHFileLinks  Try to offline lingering HFileLinks");
4867    out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
4868        + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4869    out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
4870
4871    out.println("");
4872    out.println("NOTE: Following options are NOT supported as of HBase version 2.0+.");
4873    out.println("");
4874    out.println("  UNSUPPORTED Metadata Repair options: (expert features, use with caution!)");
4875    out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
4876    out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
4877    out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
4878    out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
4879    out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
4880    out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4881    out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
4882    out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4883    out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
4884    out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4885    out.println("   -fixSplitParents  Try to force offline split parents to be online.");
4886    out.println("   -removeParents    Try to offline and sideline lingering parents and keep daughter regions.");
4887    out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
4888        + " (empty REGIONINFO_QUALIFIER rows)");
4889
4890    out.println("");
4891    out.println("  UNSUPPORTED Metadata Repair shortcuts");
4892    out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4893        "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles" +
4894        "-fixHFileLinks");
4895    out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4896    out.println("");
4897    out.println(" Replication options");
4898    out.println("   -fixReplication   Deletes replication queues for removed peers");
4899    out.println("   -cleanReplicationBrarier [tableName] clean the replication barriers " +
4900        "of a specified table, tableName is required");
4901    out.flush();
4902    errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4903
4904    setRetCode(-2);
4905    return this;
4906  }
4907
4908  /**
4909   * Main program
4910   *
4911   * @param args
4912   * @throws Exception
4913   */
4914  public static void main(String[] args) throws Exception {
4915    // create a fsck object
4916    Configuration conf = HBaseConfiguration.create();
4917    Path hbasedir = FSUtils.getRootDir(conf);
4918    URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4919    FSUtils.setFsDefault(conf, new Path(defaultFs));
4920    int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4921    System.exit(ret);
4922  }
4923
4924  /**
4925   * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
4926   */
4927  static class HBaseFsckTool extends Configured implements Tool {
4928    HBaseFsckTool(Configuration conf) { super(conf); }
4929    @Override
4930    public int run(String[] args) throws Exception {
4931      HBaseFsck hbck = new HBaseFsck(getConf());
4932      hbck.exec(hbck.executor, args);
4933      hbck.close();
4934      return hbck.getRetCode();
4935    }
4936  }
4937
4938  public HBaseFsck exec(ExecutorService exec, String[] args)
4939      throws KeeperException, IOException, InterruptedException, ReplicationException {
4940    long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4941
4942    boolean checkCorruptHFiles = false;
4943    boolean sidelineCorruptHFiles = false;
4944
4945    // Process command-line args.
4946    for (int i = 0; i < args.length; i++) {
4947      String cmd = args[i];
4948      if (cmd.equals("-help") || cmd.equals("-h")) {
4949        return printUsageAndExit();
4950      } else if (cmd.equals("-details")) {
4951        setDisplayFullReport();
4952      } else if (cmd.equals("-exclusive")) {
4953        setForceExclusive();
4954      } else if (cmd.equals("-timelag")) {
4955        if (i == args.length - 1) {
4956          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4957          return printUsageAndExit();
4958        }
4959        try {
4960          long timelag = Long.parseLong(args[++i]);
4961          setTimeLag(timelag);
4962        } catch (NumberFormatException e) {
4963          errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4964          return printUsageAndExit();
4965        }
4966      } else if (cmd.equals("-sleepBeforeRerun")) {
4967        if (i == args.length - 1) {
4968          errors.reportError(ERROR_CODE.WRONG_USAGE,
4969            "HBaseFsck: -sleepBeforeRerun needs a value.");
4970          return printUsageAndExit();
4971        }
4972        try {
4973          sleepBeforeRerun = Long.parseLong(args[++i]);
4974        } catch (NumberFormatException e) {
4975          errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4976          return printUsageAndExit();
4977        }
4978      } else if (cmd.equals("-sidelineDir")) {
4979        if (i == args.length - 1) {
4980          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4981          return printUsageAndExit();
4982        }
4983        setSidelineDir(args[++i]);
4984      } else if (cmd.equals("-fix")) {
4985        errors.reportError(ERROR_CODE.WRONG_USAGE,
4986          "This option is deprecated, please use  -fixAssignments instead.");
4987        setFixAssignments(true);
4988      } else if (cmd.equals("-fixAssignments")) {
4989        setFixAssignments(true);
4990      } else if (cmd.equals("-fixMeta")) {
4991        setFixMeta(true);
4992      } else if (cmd.equals("-noHdfsChecking")) {
4993        setCheckHdfs(false);
4994      } else if (cmd.equals("-fixHdfsHoles")) {
4995        setFixHdfsHoles(true);
4996      } else if (cmd.equals("-fixHdfsOrphans")) {
4997        setFixHdfsOrphans(true);
4998      } else if (cmd.equals("-fixTableOrphans")) {
4999        setFixTableOrphans(true);
5000      } else if (cmd.equals("-fixHdfsOverlaps")) {
5001        setFixHdfsOverlaps(true);
5002      } else if (cmd.equals("-fixVersionFile")) {
5003        setFixVersionFile(true);
5004      } else if (cmd.equals("-sidelineBigOverlaps")) {
5005        setSidelineBigOverlaps(true);
5006      } else if (cmd.equals("-fixSplitParents")) {
5007        setFixSplitParents(true);
5008      } else if (cmd.equals("-removeParents")) {
5009        setRemoveParents(true);
5010      } else if (cmd.equals("-ignorePreCheckPermission")) {
5011        setIgnorePreCheckPermission(true);
5012      } else if (cmd.equals("-checkCorruptHFiles")) {
5013        checkCorruptHFiles = true;
5014      } else if (cmd.equals("-sidelineCorruptHFiles")) {
5015        sidelineCorruptHFiles = true;
5016      } else if (cmd.equals("-fixReferenceFiles")) {
5017        setFixReferenceFiles(true);
5018      } else if (cmd.equals("-fixHFileLinks")) {
5019        setFixHFileLinks(true);
5020      } else if (cmd.equals("-fixEmptyMetaCells")) {
5021        setFixEmptyMetaCells(true);
5022      } else if (cmd.equals("-repair")) {
5023        // this attempts to merge overlapping hdfs regions, needs testing
5024        // under load
5025        setFixHdfsHoles(true);
5026        setFixHdfsOrphans(true);
5027        setFixMeta(true);
5028        setFixAssignments(true);
5029        setFixHdfsOverlaps(true);
5030        setFixVersionFile(true);
5031        setSidelineBigOverlaps(true);
5032        setFixSplitParents(false);
5033        setCheckHdfs(true);
5034        setFixReferenceFiles(true);
5035        setFixHFileLinks(true);
5036      } else if (cmd.equals("-repairHoles")) {
5037        // this will make all missing hdfs regions available but may lose data
5038        setFixHdfsHoles(true);
5039        setFixHdfsOrphans(false);
5040        setFixMeta(true);
5041        setFixAssignments(true);
5042        setFixHdfsOverlaps(false);
5043        setSidelineBigOverlaps(false);
5044        setFixSplitParents(false);
5045        setCheckHdfs(true);
5046      } else if (cmd.equals("-maxOverlapsToSideline")) {
5047        if (i == args.length - 1) {
5048          errors.reportError(ERROR_CODE.WRONG_USAGE,
5049            "-maxOverlapsToSideline needs a numeric value argument.");
5050          return printUsageAndExit();
5051        }
5052        try {
5053          int maxOverlapsToSideline = Integer.parseInt(args[++i]);
5054          setMaxOverlapsToSideline(maxOverlapsToSideline);
5055        } catch (NumberFormatException e) {
5056          errors.reportError(ERROR_CODE.WRONG_USAGE,
5057            "-maxOverlapsToSideline needs a numeric value argument.");
5058          return printUsageAndExit();
5059        }
5060      } else if (cmd.equals("-maxMerge")) {
5061        if (i == args.length - 1) {
5062          errors.reportError(ERROR_CODE.WRONG_USAGE,
5063            "-maxMerge needs a numeric value argument.");
5064          return printUsageAndExit();
5065        }
5066        try {
5067          int maxMerge = Integer.parseInt(args[++i]);
5068          setMaxMerge(maxMerge);
5069        } catch (NumberFormatException e) {
5070          errors.reportError(ERROR_CODE.WRONG_USAGE,
5071            "-maxMerge needs a numeric value argument.");
5072          return printUsageAndExit();
5073        }
5074      } else if (cmd.equals("-summary")) {
5075        setSummary();
5076      } else if (cmd.equals("-metaonly")) {
5077        setCheckMetaOnly();
5078      } else if (cmd.equals("-boundaries")) {
5079        setRegionBoundariesCheck();
5080      } else if (cmd.equals("-fixReplication")) {
5081        setFixReplication(true);
5082      } else if (cmd.equals("-cleanReplicationBarrier")) {
5083        setCleanReplicationBarrier(true);
5084        if(args[++i].startsWith("-")){
5085          printUsageAndExit();
5086        }
5087        setCleanReplicationBarrierTable(args[i]);
5088      } else if (cmd.startsWith("-")) {
5089        errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
5090        return printUsageAndExit();
5091      } else {
5092        includeTable(TableName.valueOf(cmd));
5093        errors.print("Allow checking/fixes for table: " + cmd);
5094      }
5095    }
5096
5097    errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
5098
5099    // pre-check current user has FS write permission or not
5100    try {
5101      preCheckPermission();
5102    } catch (AccessDeniedException ace) {
5103      Runtime.getRuntime().exit(-1);
5104    } catch (IOException ioe) {
5105      Runtime.getRuntime().exit(-1);
5106    }
5107
5108    // do the real work of hbck
5109    connect();
5110
5111    // after connecting to server above, we have server version
5112    // check if unsupported option is specified based on server version
5113    if (!isOptionsSupported(args)) {
5114      return printUsageAndExit();
5115    }
5116
5117    try {
5118      // if corrupt file mode is on, first fix them since they may be opened later
5119      if (checkCorruptHFiles || sidelineCorruptHFiles) {
5120        LOG.info("Checking all hfiles for corruption");
5121        HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
5122        setHFileCorruptionChecker(hfcc); // so we can get result
5123        Collection<TableName> tables = getIncludedTables();
5124        Collection<Path> tableDirs = new ArrayList<>();
5125        Path rootdir = FSUtils.getRootDir(getConf());
5126        if (tables.size() > 0) {
5127          for (TableName t : tables) {
5128            tableDirs.add(FSUtils.getTableDir(rootdir, t));
5129          }
5130        } else {
5131          tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
5132        }
5133        hfcc.checkTables(tableDirs);
5134        hfcc.report(errors);
5135      }
5136
5137      // check and fix table integrity, region consistency.
5138      int code = onlineHbck();
5139      setRetCode(code);
5140      // If we have changed the HBase state it is better to run hbck again
5141      // to see if we haven't broken something else in the process.
5142      // We run it only once more because otherwise we can easily fall into
5143      // an infinite loop.
5144      if (shouldRerun()) {
5145        try {
5146          LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
5147          Thread.sleep(sleepBeforeRerun);
5148        } catch (InterruptedException ie) {
5149          LOG.warn("Interrupted while sleeping");
5150          return this;
5151        }
5152        // Just report
5153        setFixAssignments(false);
5154        setFixMeta(false);
5155        setFixHdfsHoles(false);
5156        setFixHdfsOverlaps(false);
5157        setFixVersionFile(false);
5158        setFixTableOrphans(false);
5159        errors.resetErrors();
5160        code = onlineHbck();
5161        setRetCode(code);
5162      }
5163    } finally {
5164      IOUtils.closeQuietly(this);
5165    }
5166    return this;
5167  }
5168
5169  private boolean isOptionsSupported(String[] args) {
5170    boolean result = true;
5171    String hbaseServerVersion = status.getHBaseVersion();
5172    if (VersionInfo.compareVersion("2.any.any", hbaseServerVersion) < 0) {
5173      // Process command-line args.
5174      for (String arg : args) {
5175        if (unsupportedOptionsInV2.contains(arg)) {
5176          errors.reportError(ERROR_CODE.UNSUPPORTED_OPTION,
5177              "option '" + arg + "' is not " + "supportted!");
5178          result = false;
5179          break;
5180        }
5181      }
5182    }
5183    return result;
5184  }
5185
5186  public void setCleanReplicationBarrierTable(String cleanReplicationBarrierTable) {
5187    this.cleanReplicationBarrierTable = TableName.valueOf(cleanReplicationBarrierTable);
5188  }
5189
5190  public void cleanReplicationBarrier() throws IOException {
5191    if (!cleanReplicationBarrier || cleanReplicationBarrierTable == null) {
5192      return;
5193    }
5194    if (cleanReplicationBarrierTable.isSystemTable()) {
5195      errors.reportError(ERROR_CODE.INVALID_TABLE,
5196        "invalid table: " + cleanReplicationBarrierTable);
5197      return;
5198    }
5199
5200    boolean isGlobalScope = false;
5201    try {
5202      isGlobalScope = admin.getDescriptor(cleanReplicationBarrierTable).hasGlobalReplicationScope();
5203    } catch (TableNotFoundException e) {
5204      LOG.info("we may need to clean some erroneous data due to bugs");
5205    }
5206
5207    if (isGlobalScope) {
5208      errors.reportError(ERROR_CODE.INVALID_TABLE,
5209        "table's replication scope is global: " + cleanReplicationBarrierTable);
5210      return;
5211    }
5212    List<byte[]> regionNames = new ArrayList<>();
5213    Scan barrierScan = new Scan();
5214    barrierScan.setCaching(100);
5215    barrierScan.addFamily(HConstants.REPLICATION_BARRIER_FAMILY);
5216    barrierScan
5217        .withStartRow(MetaTableAccessor.getTableStartRowForMeta(cleanReplicationBarrierTable,
5218          MetaTableAccessor.QueryType.REGION))
5219        .withStopRow(MetaTableAccessor.getTableStopRowForMeta(cleanReplicationBarrierTable,
5220          MetaTableAccessor.QueryType.REGION));
5221    Result result;
5222    try (ResultScanner scanner = meta.getScanner(barrierScan)) {
5223      while ((result = scanner.next()) != null) {
5224        regionNames.add(result.getRow());
5225      }
5226    }
5227    if (regionNames.size() <= 0) {
5228      errors.reportError(ERROR_CODE.INVALID_TABLE,
5229        "there is no barriers of this table: " + cleanReplicationBarrierTable);
5230      return;
5231    }
5232    ReplicationQueueStorage queueStorage =
5233        ReplicationStorageFactory.getReplicationQueueStorage(zkw, getConf());
5234    List<ReplicationPeerDescription> peerDescriptions = admin.listReplicationPeers();
5235    if (peerDescriptions != null && peerDescriptions.size() > 0) {
5236      List<String> peers = peerDescriptions.stream()
5237          .filter(peerConfig -> ReplicationUtils.contains(peerConfig.getPeerConfig(),
5238            cleanReplicationBarrierTable))
5239          .map(peerConfig -> peerConfig.getPeerId()).collect(Collectors.toList());
5240      try {
5241        List<String> batch = new ArrayList<>();
5242        for (String peer : peers) {
5243          for (byte[] regionName : regionNames) {
5244            batch.add(RegionInfo.encodeRegionName(regionName));
5245            if (batch.size() % 100 == 0) {
5246              queueStorage.removeLastSequenceIds(peer, batch);
5247              batch.clear();
5248            }
5249          }
5250          if (batch.size() > 0) {
5251            queueStorage.removeLastSequenceIds(peer, batch);
5252            batch.clear();
5253          }
5254        }
5255      } catch (ReplicationException re) {
5256        throw new IOException(re);
5257      }
5258    }
5259    for (byte[] regionName : regionNames) {
5260      meta.delete(new Delete(regionName).addFamily(HConstants.REPLICATION_BARRIER_FAMILY));
5261    }
5262    setShouldRerun();
5263  }
5264
5265  /**
5266   * ls -r for debugging purposes
5267   */
5268  void debugLsr(Path p) throws IOException {
5269    debugLsr(getConf(), p, errors);
5270  }
5271
5272  /**
5273   * ls -r for debugging purposes
5274   */
5275  public static void debugLsr(Configuration conf,
5276      Path p) throws IOException {
5277    debugLsr(conf, p, new PrintingErrorReporter());
5278  }
5279
5280  /**
5281   * ls -r for debugging purposes
5282   */
5283  public static void debugLsr(Configuration conf,
5284      Path p, ErrorReporter errors) throws IOException {
5285    if (!LOG.isDebugEnabled() || p == null) {
5286      return;
5287    }
5288    FileSystem fs = p.getFileSystem(conf);
5289
5290    if (!fs.exists(p)) {
5291      // nothing
5292      return;
5293    }
5294    errors.print(p.toString());
5295
5296    if (fs.isFile(p)) {
5297      return;
5298    }
5299
5300    if (fs.getFileStatus(p).isDirectory()) {
5301      FileStatus[] fss= fs.listStatus(p);
5302      for (FileStatus status : fss) {
5303        debugLsr(conf, status.getPath(), errors);
5304      }
5305    }
5306  }
5307}