View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import com.google.common.base.Joiner;
21  import com.google.common.base.Preconditions;
22  import com.google.common.collect.ImmutableList;
23  import com.google.common.collect.Lists;
24  import com.google.common.collect.Multimap;
25  import com.google.common.collect.Ordering;
26  import com.google.common.collect.TreeMultimap;
27  import com.google.protobuf.ServiceException;
28
29  import java.io.Closeable;
30  import java.io.FileNotFoundException;
31  import java.io.IOException;
32  import java.io.InterruptedIOException;
33  import java.io.PrintWriter;
34  import java.io.StringWriter;
35  import java.net.InetAddress;
36  import java.net.URI;
37  import java.util.ArrayList;
38  import java.util.Arrays;
39  import java.util.Collection;
40  import java.util.Collections;
41  import java.util.Comparator;
42  import java.util.HashMap;
43  import java.util.HashSet;
44  import java.util.Iterator;
45  import java.util.List;
46  import java.util.Locale;
47  import java.util.Map;
48  import java.util.Map.Entry;
49  import java.util.Set;
50  import java.util.SortedMap;
51  import java.util.SortedSet;
52  import java.util.TreeMap;
53  import java.util.TreeSet;
54  import java.util.Vector;
55  import java.util.concurrent.Callable;
56  import java.util.concurrent.ConcurrentSkipListMap;
57  import java.util.concurrent.ExecutionException;
58  import java.util.concurrent.ExecutorService;
59  import java.util.concurrent.Executors;
60  import java.util.concurrent.Future;
61  import java.util.concurrent.FutureTask;
62  import java.util.concurrent.ScheduledThreadPoolExecutor;
63  import java.util.concurrent.TimeUnit;
64  import java.util.concurrent.TimeoutException;
65  import java.util.concurrent.atomic.AtomicBoolean;
66  import java.util.concurrent.atomic.AtomicInteger;
67
68  import org.apache.commons.io.IOUtils;
69  import org.apache.commons.lang.RandomStringUtils;
70  import org.apache.commons.lang.StringUtils;
71  import org.apache.commons.logging.Log;
72  import org.apache.commons.logging.LogFactory;
73  import org.apache.hadoop.conf.Configuration;
74  import org.apache.hadoop.conf.Configured;
75  import org.apache.hadoop.fs.FSDataOutputStream;
76  import org.apache.hadoop.fs.FileStatus;
77  import org.apache.hadoop.fs.FileSystem;
78  import org.apache.hadoop.fs.Path;
79  import org.apache.hadoop.fs.permission.FsAction;
80  import org.apache.hadoop.fs.permission.FsPermission;
81  import org.apache.hadoop.hbase.Abortable;
82  import org.apache.hadoop.hbase.Cell;
83  import org.apache.hadoop.hbase.CellUtil;
84  import org.apache.hadoop.hbase.ClusterStatus;
85  import org.apache.hadoop.hbase.HBaseConfiguration;
86  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
87  import org.apache.hadoop.hbase.HColumnDescriptor;
88  import org.apache.hadoop.hbase.HConstants;
89  import org.apache.hadoop.hbase.HRegionInfo;
90  import org.apache.hadoop.hbase.HRegionLocation;
91  import org.apache.hadoop.hbase.HTableDescriptor;
92  import org.apache.hadoop.hbase.KeyValue;
93  import org.apache.hadoop.hbase.MasterNotRunningException;
94  import org.apache.hadoop.hbase.MetaTableAccessor;
95  import org.apache.hadoop.hbase.RegionLocations;
96  import org.apache.hadoop.hbase.ServerName;
97  import org.apache.hadoop.hbase.TableName;
98  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
99  import org.apache.hadoop.hbase.classification.InterfaceAudience;
100 import org.apache.hadoop.hbase.classification.InterfaceStability;
101 import org.apache.hadoop.hbase.client.Admin;
102 import org.apache.hadoop.hbase.client.ClusterConnection;
103 import org.apache.hadoop.hbase.client.Connection;
104 import org.apache.hadoop.hbase.client.ConnectionFactory;
105 import org.apache.hadoop.hbase.client.Delete;
106 import org.apache.hadoop.hbase.client.Get;
107 import org.apache.hadoop.hbase.client.Put;
108 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
109 import org.apache.hadoop.hbase.client.Result;
110 import org.apache.hadoop.hbase.client.RowMutations;
111 import org.apache.hadoop.hbase.client.Table;
112 import org.apache.hadoop.hbase.client.TableState;
113 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
114 import org.apache.hadoop.hbase.io.hfile.HFile;
115 import org.apache.hadoop.hbase.master.MasterFileSystem;
116 import org.apache.hadoop.hbase.master.RegionState;
117 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
118 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
119 import org.apache.hadoop.hbase.regionserver.HRegion;
120 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
121 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
122 import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL;
123 import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
124 import org.apache.hadoop.hbase.security.AccessDeniedException;
125 import org.apache.hadoop.hbase.security.UserProvider;
126 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
127 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
128 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
129 import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
130 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
131 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
132 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
133 import org.apache.hadoop.hbase.wal.WAL;
134 import org.apache.hadoop.hbase.wal.WALFactory;
135 import org.apache.hadoop.hbase.wal.WALSplitter;
136 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
137 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
138 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
139 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
140 import org.apache.hadoop.ipc.RemoteException;
141 import org.apache.hadoop.security.UserGroupInformation;
142 import org.apache.hadoop.util.ReflectionUtils;
143 import org.apache.hadoop.util.Tool;
144 import org.apache.hadoop.util.ToolRunner;
145 import org.apache.zookeeper.KeeperException;
146
147 /**
148  * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
149  * table integrity problems in a corrupted HBase.
150  * <p>
151  * Region consistency checks verify that hbase:meta, region deployment on region
152  * servers and the state of data in HDFS (.regioninfo files) all are in
153  * accordance.
154  * <p>
155  * Table integrity checks verify that all possible row keys resolve to exactly
156  * one region of a table.  This means there are no individual degenerate
157  * or backwards regions; no holes between regions; and that there are no
158  * overlapping regions.
159  * <p>
160  * The general repair strategy works in two phases:
161  * <ol>
162  * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
163  * <li> Repair Region Consistency with hbase:meta and assignments
164  * </ol>
165  * <p>
166  * For table integrity repairs, the tables' region directories are scanned
167  * for .regioninfo files.  Each table's integrity is then verified.  If there
168  * are any orphan regions (regions with no .regioninfo files) or holes, new
169  * regions are fabricated.  Backwards regions are sidelined as well as empty
170  * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
171  * a new region is created and all data is merged into the new region.
172  * <p>
173  * Table integrity repairs deal solely with HDFS and could potentially be done
174  * offline -- the hbase region servers or master do not need to be running.
175  * This phase can eventually be used to completely reconstruct the hbase:meta table in
176  * an offline fashion.
177  * <p>
178  * Region consistency requires three conditions -- 1) valid .regioninfo file
179  * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
180  * and 3) a region is deployed only at the regionserver that was assigned to
181  * with proper state in the master.
182  * <p>
183  * Region consistency repairs require hbase to be online so that hbck can
184  * contact the HBase master and region servers.  The hbck#connect() method must
185  * first be called successfully.  Much of the region consistency information
186  * is transient and less risky to repair.
187  * <p>
188  * If hbck is run from the command line, there are a handful of arguments that
189  * can be used to limit the kinds of repairs hbck will do.  See the code in
190  * {@link #printUsageAndExit()} for more details.
191  */
192 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
193 @InterfaceStability.Evolving
194 public class HBaseFsck extends Configured implements Closeable {
195   public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
196   public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
197   private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
198   private static boolean rsSupportsOffline = true;
199   private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
200   private static final int DEFAULT_MAX_MERGE = 5;
201   private static final String TO_BE_LOADED = "to_be_loaded";
202   private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
203   private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
204   private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
205   private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
206   // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
207   // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
208   // AlreadyBeingCreatedException which is implies timeout on this operations up to
209   // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
210   private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
211   private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
212   private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
213   private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
214
215   /**********************
216    * Internal resources
217    **********************/
218   private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
219   private ClusterStatus status;
220   private ClusterConnection connection;
221   private Admin admin;
222   private Table meta;
223   // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
224   protected ExecutorService executor;
225   private long startMillis = EnvironmentEdgeManager.currentTime();
226   private HFileCorruptionChecker hfcc;
227   private int retcode = 0;
228   private Path HBCK_LOCK_PATH;
229   private FSDataOutputStream hbckOutFd;
230   // This lock is to prevent cleanup of balancer resources twice between
231   // ShutdownHook and the main code. We cleanup only if the connect() is
232   // successful
233   private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
234
235   /***********
236    * Options
237    ***********/
238   private static boolean details = false; // do we display the full report
239   private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
240   private static boolean forceExclusive = false; // only this hbck can modify HBase
241   private boolean fixAssignments = false; // fix assignment errors?
242   private boolean fixMeta = false; // fix meta errors?
243   private boolean checkHdfs = true; // load and check fs consistency?
244   private boolean fixHdfsHoles = false; // fix fs holes?
245   private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
246   private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
247   private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
248   private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
249   private boolean fixSplitParents = false; // fix lingering split parents
250   private boolean fixReferenceFiles = false; // fix lingering reference store file
251   private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
252   private boolean fixTableLocks = false; // fix table locks which are expired
253   private boolean fixReplication = false; // fix undeleted replication queues for removed peer
254   private boolean fixAny = false; // Set to true if any of the fix is required.
255
256   // limit checking/fixes to listed tables, if empty attempt to check/fix all
257   // hbase:meta are always checked
258   private Set<TableName> tablesIncluded = new HashSet<TableName>();
259   private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
260   // maximum number of overlapping regions to sideline
261   private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
262   private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
263   private Path sidelineDir = null;
264
265   private boolean rerun = false; // if we tried to fix something, rerun hbck
266   private static boolean summary = false; // if we want to print less output
267   private boolean checkMetaOnly = false;
268   private boolean checkRegionBoundaries = false;
269   private boolean ignorePreCheckPermission = false; // if pre-check permission
270
271   /*********
272    * State
273    *********/
274   final private ErrorReporter errors;
275   int fixes = 0;
276
277   /**
278    * This map contains the state of all hbck items.  It maps from encoded region
279    * name to HbckInfo structure.  The information contained in HbckInfo is used
280    * to detect and correct consistency (hdfs/meta/deployment) problems.
281    */
282   private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
283   // Empty regioninfo qualifiers in hbase:meta
284   private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
285
286   /**
287    * This map from Tablename -> TableInfo contains the structures necessary to
288    * detect table consistency problems (holes, dupes, overlaps).  It is sorted
289    * to prevent dupes.
290    *
291    * If tablesIncluded is empty, this map contains all tables.
292    * Otherwise, it contains only meta tables and tables in tablesIncluded,
293    * unless checkMetaOnly is specified, in which case, it contains only
294    * the meta table
295    */
296   private SortedMap<TableName, TableInfo> tablesInfo =
297       new ConcurrentSkipListMap<TableName, TableInfo>();
298
299   /**
300    * When initially looking at HDFS, we attempt to find any orphaned data.
301    */
302   private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
303
304   private Map<TableName, Set<String>> orphanTableDirs =
305       new HashMap<TableName, Set<String>>();
306   private Map<TableName, TableState> tableStates =
307       new HashMap<TableName, TableState>();
308   private final RetryCounterFactory lockFileRetryCounterFactory;
309   private final RetryCounterFactory createZNodeRetryCounterFactory;
310
311   private Map<TableName, Set<String>> skippedRegions = new HashMap<TableName, Set<String>>();
312
313   private ZooKeeperWatcher zkw = null;
314   private String hbckEphemeralNodePath = null;
315   private boolean hbckZodeCreated = false;
316
317   /**
318    * Constructor
319    *
320    * @param conf Configuration object
321    * @throws MasterNotRunningException if the master is not running
322    * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
323    */
324   public HBaseFsck(Configuration conf) throws MasterNotRunningException,
325       ZooKeeperConnectionException, IOException, ClassNotFoundException {
326     this(conf, createThreadPool(conf));
327   }
328
329   private static ExecutorService createThreadPool(Configuration conf) {
330     int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
331     return new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
332   }
333
334   /**
335    * Constructor
336    *
337    * @param conf
338    *          Configuration object
339    * @throws MasterNotRunningException
340    *           if the master is not running
341    * @throws ZooKeeperConnectionException
342    *           if unable to connect to ZooKeeper
343    */
344   public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
345       ZooKeeperConnectionException, IOException, ClassNotFoundException {
346     super(conf);
347     errors = getErrorReporter(getConf());
348     this.executor = exec;
349     lockFileRetryCounterFactory = new RetryCounterFactory(
350       getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
351       getConf().getInt(
352         "hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
353       getConf().getInt(
354         "hbase.hbck.lockfile.attempt.maxsleeptime", DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
355     createZNodeRetryCounterFactory = new RetryCounterFactory(
356       getConf().getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
357       getConf().getInt(
358         "hbase.hbck.createznode.attempt.sleep.interval",
359         DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
360       getConf().getInt(
361         "hbase.hbck.createznode.attempt.maxsleeptime",
362         DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
363     zkw = createZooKeeperWatcher();
364   }
365
366   private class FileLockCallable implements Callable<FSDataOutputStream> {
367     RetryCounter retryCounter;
368
369     public FileLockCallable(RetryCounter retryCounter) {
370       this.retryCounter = retryCounter;
371     }
372     @Override
373     public FSDataOutputStream call() throws IOException {
374       try {
375         FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
376         FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
377             HConstants.DATA_FILE_UMASK_KEY);
378         Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
379         fs.mkdirs(tmpDir);
380         HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
381         final FSDataOutputStream out = createFileWithRetries(fs, HBCK_LOCK_PATH, defaultPerms);
382         out.writeBytes(InetAddress.getLocalHost().toString());
383         out.flush();
384         return out;
385       } catch(RemoteException e) {
386         if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
387           return null;
388         } else {
389           throw e;
390         }
391       }
392     }
393
394     private FSDataOutputStream createFileWithRetries(final FileSystem fs,
395         final Path hbckLockFilePath, final FsPermission defaultPerms)
396         throws IOException {
397
398       IOException exception = null;
399       do {
400         try {
401           return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
402         } catch (IOException ioe) {
403           LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
404               + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
405               + retryCounter.getMaxAttempts());
406           LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(),
407               ioe);
408           try {
409             exception = ioe;
410             retryCounter.sleepUntilNextRetry();
411           } catch (InterruptedException ie) {
412             throw (InterruptedIOException) new InterruptedIOException(
413                 "Can't create lock file " + hbckLockFilePath.getName())
414             .initCause(ie);
415           }
416         }
417       } while (retryCounter.shouldRetry());
418
419       throw exception;
420     }
421   }
422
423   /**
424    * This method maintains a lock using a file. If the creation fails we return null
425    *
426    * @return FSDataOutputStream object corresponding to the newly opened lock file
427    * @throws IOException if IO failure occurs
428    */
429   private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
430     RetryCounter retryCounter = lockFileRetryCounterFactory.create();
431     FileLockCallable callable = new FileLockCallable(retryCounter);
432     ExecutorService executor = Executors.newFixedThreadPool(1);
433     FutureTask<FSDataOutputStream> futureTask = new FutureTask<FSDataOutputStream>(callable);
434     executor.execute(futureTask);
435     final int timeoutInSeconds = getConf().getInt(
436       "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
437     FSDataOutputStream stream = null;
438     try {
439       stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
440     } catch (ExecutionException ee) {
441       LOG.warn("Encountered exception when opening lock file", ee);
442     } catch (InterruptedException ie) {
443       LOG.warn("Interrupted when opening lock file", ie);
444       Thread.currentThread().interrupt();
445     } catch (TimeoutException exception) {
446       // took too long to obtain lock
447       LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
448       futureTask.cancel(true);
449     } finally {
450       executor.shutdownNow();
451     }
452     return stream;
453   }
454
455   private void unlockHbck() {
456     if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) {
457       RetryCounter retryCounter = lockFileRetryCounterFactory.create();
458       do {
459         try {
460           IOUtils.closeQuietly(hbckOutFd);
461           FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()),
462               HBCK_LOCK_PATH, true);
463           LOG.info("Finishing hbck");
464           return;
465         } catch (IOException ioe) {
466           LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
467               + (retryCounter.getAttemptTimes() + 1) + " of "
468               + retryCounter.getMaxAttempts());
469           LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
470           try {
471             retryCounter.sleepUntilNextRetry();
472           } catch (InterruptedException ie) {
473             Thread.currentThread().interrupt();
474             LOG.warn("Interrupted while deleting lock file" +
475                 HBCK_LOCK_PATH);
476             return;
477           }
478         }
479       } while (retryCounter.shouldRetry());
480     }
481   }
482
483   /**
484    * To repair region consistency, one must call connect() in order to repair
485    * online state.
486    */
487   public void connect() throws IOException {
488
489     if (isExclusive()) {
490       // Grab the lock
491       hbckOutFd = checkAndMarkRunningHbck();
492       if (hbckOutFd == null) {
493         setRetCode(-1);
494         LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " +
495             "[If you are sure no other instance is running, delete the lock file " +
496             HBCK_LOCK_PATH + " and rerun the tool]");
497         throw new IOException("Duplicate hbck - Abort");
498       }
499
500       // Make sure to cleanup the lock
501       hbckLockCleanup.set(true);
502     }
503
504
505     // Add a shutdown hook to this thread, in case user tries to
506     // kill the hbck with a ctrl-c, we want to cleanup the lock so that
507     // it is available for further calls
508     Runtime.getRuntime().addShutdownHook(new Thread() {
509       @Override
510       public void run() {
511         IOUtils.closeQuietly(HBaseFsck.this);
512         cleanupHbckZnode();
513         unlockHbck();
514       }
515     });
516
517     LOG.info("Launching hbck");
518
519     connection = (ClusterConnection)ConnectionFactory.createConnection(getConf());
520     admin = connection.getAdmin();
521     meta = connection.getTable(TableName.META_TABLE_NAME);
522     status = admin.getClusterStatus();
523   }
524
525   /**
526    * Get deployed regions according to the region servers.
527    */
528   private void loadDeployedRegions() throws IOException, InterruptedException {
529     // From the master, get a list of all known live region servers
530     Collection<ServerName> regionServers = status.getServers();
531     errors.print("Number of live region servers: " + regionServers.size());
532     if (details) {
533       for (ServerName rsinfo: regionServers) {
534         errors.print("  " + rsinfo.getServerName());
535       }
536     }
537
538     // From the master, get a list of all dead region servers
539     Collection<ServerName> deadRegionServers = status.getDeadServerNames();
540     errors.print("Number of dead region servers: " + deadRegionServers.size());
541     if (details) {
542       for (ServerName name: deadRegionServers) {
543         errors.print("  " + name);
544       }
545     }
546
547     // Print the current master name and state
548     errors.print("Master: " + status.getMaster());
549
550     // Print the list of all backup masters
551     Collection<ServerName> backupMasters = status.getBackupMasters();
552     errors.print("Number of backup masters: " + backupMasters.size());
553     if (details) {
554       for (ServerName name: backupMasters) {
555         errors.print("  " + name);
556       }
557     }
558
559     errors.print("Average load: " + status.getAverageLoad());
560     errors.print("Number of requests: " + status.getRequestsCount());
561     errors.print("Number of regions: " + status.getRegionsCount());
562
563     Set<RegionState> rits = status.getRegionsInTransition();
564     errors.print("Number of regions in transition: " + rits.size());
565     if (details) {
566       for (RegionState state: rits) {
567         errors.print("  " + state.toDescriptiveString());
568       }
569     }
570
571     // Determine what's deployed
572     processRegionServers(regionServers);
573   }
574
575   /**
576    * Clear the current state of hbck.
577    */
578   private void clearState() {
579     // Make sure regionInfo is empty before starting
580     fixes = 0;
581     regionInfoMap.clear();
582     emptyRegionInfoQualifiers.clear();
583     tableStates.clear();
584     errors.clear();
585     tablesInfo.clear();
586     orphanHdfsDirs.clear();
587     skippedRegions.clear();
588   }
589
590   /**
591    * This repair method analyzes hbase data in hdfs and repairs it to satisfy
592    * the table integrity rules.  HBase doesn't need to be online for this
593    * operation to work.
594    */
595   public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
596     // Initial pass to fix orphans.
597     if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
598         || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
599       LOG.info("Loading regioninfos HDFS");
600       // if nothing is happening this should always complete in two iterations.
601       int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
602       int curIter = 0;
603       do {
604         clearState(); // clears hbck state and reset fixes to 0 and.
605         // repair what's on HDFS
606         restoreHdfsIntegrity();
607         curIter++;// limit the number of iterations.
608       } while (fixes > 0 && curIter <= maxIterations);
609
610       // Repairs should be done in the first iteration and verification in the second.
611       // If there are more than 2 passes, something funny has happened.
612       if (curIter > 2) {
613         if (curIter == maxIterations) {
614           LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
615               + "Tables integrity may not be fully repaired!");
616         } else {
617           LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
618         }
619       }
620     }
621   }
622
623   /**
624    * This repair method requires the cluster to be online since it contacts
625    * region servers and the masters.  It makes each region's state in HDFS, in
626    * hbase:meta, and deployments consistent.
627    *
628    * @return If &gt; 0 , number of errors detected, if &lt; 0 there was an unrecoverable
629    *     error.  If 0, we have a clean hbase.
630    */
631   public int onlineConsistencyRepair() throws IOException, KeeperException,
632     InterruptedException {
633
634     // get regions according to what is online on each RegionServer
635     loadDeployedRegions();
636     // check whether hbase:meta is deployed and online
637     recordMetaRegion();
638     // Check if hbase:meta is found only once and in the right place
639     if (!checkMetaRegion()) {
640       String errorMsg = "hbase:meta table is not consistent. ";
641       if (shouldFixAssignments()) {
642         errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
643       } else {
644         errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
645       }
646       errors.reportError(errorMsg + " Exiting...");
647       return -2;
648     }
649     // Not going with further consistency check for tables when hbase:meta itself is not consistent.
650     LOG.info("Loading regionsinfo from the hbase:meta table");
651     boolean success = loadMetaEntries();
652     if (!success) return -1;
653
654     // Empty cells in hbase:meta?
655     reportEmptyMetaCells();
656
657     // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
658     if (shouldFixEmptyMetaCells()) {
659       fixEmptyMetaCells();
660     }
661
662     // get a list of all tables that have not changed recently.
663     if (!checkMetaOnly) {
664       reportTablesInFlux();
665     }
666
667     // Get disabled tables states
668     loadTableStates();
669
670     // load regiondirs and regioninfos from HDFS
671     if (shouldCheckHdfs()) {
672       LOG.info("Loading region directories from HDFS");
673       loadHdfsRegionDirs();
674       LOG.info("Loading region information from HDFS");
675       loadHdfsRegionInfos();
676     }
677
678     // fix the orphan tables
679     fixOrphanTables();
680
681     LOG.info("Checking and fixing region consistency");
682     // Check and fix consistency
683     checkAndFixConsistency();
684
685     // Check integrity (does not fix)
686     checkIntegrity();
687     return errors.getErrorList().size();
688   }
689
690   /**
691    * This method maintains an ephemeral znode. If the creation fails we return false or throw
692    * exception
693    *
694    * @return true if creating znode succeeds; false otherwise
695    * @throws IOException if IO failure occurs
696    */
697   private boolean setMasterInMaintenanceMode() throws IOException {
698     RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
699     hbckEphemeralNodePath = ZKUtil.joinZNode(
700       ZooKeeperWatcher.masterMaintZNode,
701       "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
702     do {
703       try {
704         hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
705         if (hbckZodeCreated) {
706           break;
707         }
708       } catch (KeeperException e) {
709         if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
710            throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
711         }
712         // fall through and retry
713       }
714
715       LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try=" +
716           (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
717
718       try {
719         retryCounter.sleepUntilNextRetry();
720       } catch (InterruptedException ie) {
721         throw (InterruptedIOException) new InterruptedIOException(
722               "Can't create znode " + hbckEphemeralNodePath).initCause(ie);
723       }
724     } while (retryCounter.shouldRetry());
725     return hbckZodeCreated;
726   }
727
728   private void cleanupHbckZnode() {
729     try {
730       if (zkw != null && hbckZodeCreated) {
731         ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
732         hbckZodeCreated = false;
733       }
734     } catch (KeeperException e) {
735       // Ignore
736       if (!e.code().equals(KeeperException.Code.NONODE)) {
737         LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
738       }
739     }
740   }
741
742   /**
743    * Contacts the master and prints out cluster-wide information
744    * @return 0 on success, non-zero on failure
745    */
746   public int onlineHbck()
747       throws IOException, KeeperException, InterruptedException, ServiceException {
748     // print hbase server version
749     errors.print("Version: " + status.getHBaseVersion());
750
751     // Clean start
752     clearState();
753     // Do offline check and repair first
754     offlineHdfsIntegrityRepair();
755     offlineReferenceFileRepair();
756     // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
757     // hbck, it is likely that hbck would be misled and report transient errors.  Therefore, it
758     // is better to set Master into maintenance mode during online hbck.
759     //
760     if (!setMasterInMaintenanceMode()) {
761       LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
762         + "error.  Please run HBCK multiple times to reduce the chance of transient error.");
763     }
764
765     onlineConsistencyRepair();
766
767     if (checkRegionBoundaries) {
768       checkRegionBoundaries();
769     }
770
771     checkAndFixTableLocks();
772
773     checkAndFixReplication();
774
775     // Remove the hbck znode
776     cleanupHbckZnode();
777
778     // Remove the hbck lock
779     unlockHbck();
780
781     // Print table summary
782     printTableSummary(tablesInfo);
783     return errors.summarize();
784   }
785
786   public static byte[] keyOnly (byte[] b) {
787     if (b == null)
788       return b;
789     int rowlength = Bytes.toShort(b, 0);
790     byte[] result = new byte[rowlength];
791     System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
792     return result;
793   }
794
795   @Override
796   public void close() throws IOException {
797     try {
798       cleanupHbckZnode();
799       unlockHbck();
800     } catch (Exception io) {
801       LOG.warn(io);
802     } finally {
803       if (zkw != null) {
804         zkw.close();
805         zkw = null;
806       }
807       IOUtils.closeQuietly(admin);
808       IOUtils.closeQuietly(meta);
809       IOUtils.closeQuietly(connection);
810     }
811   }
812
813   private static class RegionBoundariesInformation {
814     public byte [] regionName;
815     public byte [] metaFirstKey;
816     public byte [] metaLastKey;
817     public byte [] storesFirstKey;
818     public byte [] storesLastKey;
819     @Override
820     public String toString () {
821       return "regionName=" + Bytes.toStringBinary(regionName) +
822              "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
823              "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
824              "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
825              "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
826     }
827   }
828
829   public void checkRegionBoundaries() {
830     try {
831       ByteArrayComparator comparator = new ByteArrayComparator();
832       List<HRegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
833       final RegionBoundariesInformation currentRegionBoundariesInformation =
834           new RegionBoundariesInformation();
835       Path hbaseRoot = FSUtils.getRootDir(getConf());
836       for (HRegionInfo regionInfo : regions) {
837         Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
838         currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
839         // For each region, get the start and stop key from the META and compare them to the
840         // same information from the Stores.
841         Path path = new Path(tableDir, regionInfo.getEncodedName());
842         FileSystem fs = path.getFileSystem(getConf());
843         FileStatus[] files = fs.listStatus(path);
844         // For all the column families in this region...
845         byte[] storeFirstKey = null;
846         byte[] storeLastKey = null;
847         for (FileStatus file : files) {
848           String fileName = file.getPath().toString();
849           fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
850           if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
851             FileStatus[] storeFiles = fs.listStatus(file.getPath());
852             // For all the stores in this column family.
853             for (FileStatus storeFile : storeFiles) {
854               HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
855                   getConf()), getConf());
856               if ((reader.getFirstKey() != null)
857                   && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
858                       ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey()).getKey()) > 0))) {
859                 storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey()).getKey();
860               }
861               if ((reader.getLastKey() != null)
862                   && ((storeLastKey == null) || (comparator.compare(storeLastKey,
863                       ((KeyValue.KeyOnlyKeyValue)reader.getLastKey()).getKey())) < 0)) {
864                 storeLastKey = ((KeyValue.KeyOnlyKeyValue)reader.getLastKey()).getKey();
865               }
866               reader.close();
867             }
868           }
869         }
870         currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
871         currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
872         currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
873         currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
874         if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
875           currentRegionBoundariesInformation.metaFirstKey = null;
876         if (currentRegionBoundariesInformation.metaLastKey.length == 0)
877           currentRegionBoundariesInformation.metaLastKey = null;
878
879         // For a region to be correct, we need the META start key to be smaller or equal to the
880         // smallest start key from all the stores, and the start key from the next META entry to
881         // be bigger than the last key from all the current stores. First region start key is null;
882         // Last region end key is null; some regions can be empty and not have any store.
883
884         boolean valid = true;
885         // Checking start key.
886         if ((currentRegionBoundariesInformation.storesFirstKey != null)
887             && (currentRegionBoundariesInformation.metaFirstKey != null)) {
888           valid = valid
889               && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
890                 currentRegionBoundariesInformation.metaFirstKey) >= 0;
891         }
892         // Checking stop key.
893         if ((currentRegionBoundariesInformation.storesLastKey != null)
894             && (currentRegionBoundariesInformation.metaLastKey != null)) {
895           valid = valid
896               && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
897                 currentRegionBoundariesInformation.metaLastKey) < 0;
898         }
899         if (!valid) {
900           errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
901             tablesInfo.get(regionInfo.getTable()));
902           LOG.warn("Region's boundaries not alligned between stores and META for:");
903           LOG.warn(currentRegionBoundariesInformation);
904         }
905       }
906     } catch (IOException e) {
907       LOG.error(e);
908     }
909   }
910
911   /**
912    * Iterates through the list of all orphan/invalid regiondirs.
913    */
914   private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
915     for (HbckInfo hi : orphanHdfsDirs) {
916       LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
917       adoptHdfsOrphan(hi);
918     }
919   }
920
921   /**
922    * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
923    * these orphans by creating a new region, and moving the column families,
924    * recovered edits, WALs, into the new region dir.  We determine the region
925    * startkey and endkeys by looking at all of the hfiles inside the column
926    * families to identify the min and max keys. The resulting region will
927    * likely violate table integrity but will be dealt with by merging
928    * overlapping regions.
929    */
930   @SuppressWarnings("deprecation")
931   private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
932     Path p = hi.getHdfsRegionDir();
933     FileSystem fs = p.getFileSystem(getConf());
934     FileStatus[] dirs = fs.listStatus(p);
935     if (dirs == null) {
936       LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
937           p + ". This dir could probably be deleted.");
938       return ;
939     }
940
941     TableName tableName = hi.getTableName();
942     TableInfo tableInfo = tablesInfo.get(tableName);
943     Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
944     HTableDescriptor template = tableInfo.getHTD();
945
946     // find min and max key values
947     Pair<byte[],byte[]> orphanRegionRange = null;
948     for (FileStatus cf : dirs) {
949       String cfName= cf.getPath().getName();
950       // TODO Figure out what the special dirs are
951       if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
952
953       FileStatus[] hfiles = fs.listStatus(cf.getPath());
954       for (FileStatus hfile : hfiles) {
955         byte[] start, end;
956         HFile.Reader hf = null;
957         try {
958           CacheConfig cacheConf = new CacheConfig(getConf());
959           hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
960           hf.loadFileInfo();
961           Cell startKv = hf.getFirstKey();
962           start = CellUtil.cloneRow(startKv);
963           Cell endKv = hf.getLastKey();
964           end = CellUtil.cloneRow(endKv);
965         } catch (IOException ioe) {
966           LOG.warn("Problem reading orphan file " + hfile + ", skipping");
967           continue;
968         } catch (NullPointerException ioe) {
969           LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
970           continue;
971         } finally {
972           if (hf != null) {
973             hf.close();
974           }
975         }
976
977         // expand the range to include the range of all hfiles
978         if (orphanRegionRange == null) {
979           // first range
980           orphanRegionRange = new Pair<byte[], byte[]>(start, end);
981         } else {
982           // TODO add test
983
984           // expand range only if the hfile is wider.
985           if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
986             orphanRegionRange.setFirst(start);
987           }
988           if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
989             orphanRegionRange.setSecond(end);
990           }
991         }
992       }
993     }
994     if (orphanRegionRange == null) {
995       LOG.warn("No data in dir " + p + ", sidelining data");
996       fixes++;
997       sidelineRegionDir(fs, hi);
998       return;
999     }
1000     LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
1001         Bytes.toString(orphanRegionRange.getSecond()) + ")");
1002
1003     // create new region on hdfs. move data into place.
1004     HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(),
1005         Bytes.add(orphanRegionRange.getSecond(), new byte[1]));
1006     LOG.info("Creating new region : " + hri);
1007     HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
1008     Path target = region.getRegionFileSystem().getRegionDir();
1009
1010     // rename all the data to new region
1011     mergeRegionDirs(target, hi);
1012     fixes++;
1013   }
1014
1015   /**
1016    * This method determines if there are table integrity errors in HDFS.  If
1017    * there are errors and the appropriate "fix" options are enabled, the method
1018    * will first correct orphan regions making them into legit regiondirs, and
1019    * then reload to merge potentially overlapping regions.
1020    *
1021    * @return number of table integrity errors found
1022    */
1023   private int restoreHdfsIntegrity() throws IOException, InterruptedException {
1024     // Determine what's on HDFS
1025     LOG.info("Loading HBase regioninfo from HDFS...");
1026     loadHdfsRegionDirs(); // populating regioninfo table.
1027
1028     int errs = errors.getErrorList().size();
1029     // First time just get suggestions.
1030     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1031     checkHdfsIntegrity(false, false);
1032
1033     if (errors.getErrorList().size() == errs) {
1034       LOG.info("No integrity errors.  We are done with this phase. Glorious.");
1035       return 0;
1036     }
1037
1038     if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
1039       adoptHdfsOrphans(orphanHdfsDirs);
1040       // TODO optimize by incrementally adding instead of reloading.
1041     }
1042
1043     // Make sure there are no holes now.
1044     if (shouldFixHdfsHoles()) {
1045       clearState(); // this also resets # fixes.
1046       loadHdfsRegionDirs();
1047       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1048       tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1049     }
1050
1051     // Now we fix overlaps
1052     if (shouldFixHdfsOverlaps()) {
1053       // second pass we fix overlaps.
1054       clearState(); // this also resets # fixes.
1055       loadHdfsRegionDirs();
1056       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1057       tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1058     }
1059
1060     return errors.getErrorList().size();
1061   }
1062
1063   /**
1064    * Scan all the store file names to find any lingering reference files,
1065    * which refer to some none-exiting files. If "fix" option is enabled,
1066    * any lingering reference file will be sidelined if found.
1067    * <p>
1068    * Lingering reference file prevents a region from opening. It has to
1069    * be fixed before a cluster can start properly.
1070    */
1071   private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1072     clearState();
1073     Configuration conf = getConf();
1074     Path hbaseRoot = FSUtils.getRootDir(conf);
1075     FileSystem fs = hbaseRoot.getFileSystem(conf);
1076     LOG.info("Computing mapping of all store files");
1077     Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1078       new FSUtils.ReferenceFileFilter(fs), executor, errors);
1079     errors.print("");
1080     LOG.info("Validating mapping using HDFS state");
1081     for (Path path: allFiles.values()) {
1082       Path referredToFile = StoreFileInfo.getReferredToFile(path);
1083       if (fs.exists(referredToFile)) continue;  // good, expected
1084
1085       // Found a lingering reference file
1086       errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1087         "Found lingering reference file " + path);
1088       if (!shouldFixReferenceFiles()) continue;
1089
1090       // Now, trying to fix it since requested
1091       boolean success = false;
1092       String pathStr = path.toString();
1093
1094       // A reference file path should be like
1095       // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1096       // Up 5 directories to get the root folder.
1097       // So the file will be sidelined to a similar folder structure.
1098       int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1099       for (int i = 0; index > 0 && i < 5; i++) {
1100         index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1101       }
1102       if (index > 0) {
1103         Path rootDir = getSidelineDir();
1104         Path dst = new Path(rootDir, pathStr.substring(index + 1));
1105         fs.mkdirs(dst.getParent());
1106         LOG.info("Trying to sildeline reference file "
1107           + path + " to " + dst);
1108         setShouldRerun();
1109
1110         success = fs.rename(path, dst);
1111       }
1112       if (!success) {
1113         LOG.error("Failed to sideline reference file " + path);
1114       }
1115     }
1116   }
1117
1118   /**
1119    * TODO -- need to add tests for this.
1120    */
1121   private void reportEmptyMetaCells() {
1122     errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1123       emptyRegionInfoQualifiers.size());
1124     if (details) {
1125       for (Result r: emptyRegionInfoQualifiers) {
1126         errors.print("  " + r);
1127       }
1128     }
1129   }
1130
1131   /**
1132    * TODO -- need to add tests for this.
1133    */
1134   private void reportTablesInFlux() {
1135     AtomicInteger numSkipped = new AtomicInteger(0);
1136     HTableDescriptor[] allTables = getTables(numSkipped);
1137     errors.print("Number of Tables: " + allTables.length);
1138     if (details) {
1139       if (numSkipped.get() > 0) {
1140         errors.detail("Number of Tables in flux: " + numSkipped.get());
1141       }
1142       for (HTableDescriptor td : allTables) {
1143         errors.detail("  Table: " + td.getTableName() + "\t" +
1144                            (td.isReadOnly() ? "ro" : "rw") + "\t" +
1145                             (td.isMetaRegion() ? "META" : "    ") + "\t" +
1146                            " families: " + td.getFamilies().size());
1147       }
1148     }
1149   }
1150
1151   public ErrorReporter getErrors() {
1152     return errors;
1153   }
1154
1155   /**
1156    * Read the .regioninfo file from the file system.  If there is no
1157    * .regioninfo, add it to the orphan hdfs region list.
1158    */
1159   private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
1160     Path regionDir = hbi.getHdfsRegionDir();
1161     if (regionDir == null) {
1162       if (hbi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
1163         // Log warning only for default/ primary replica with no region dir
1164         LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
1165       }
1166       return;
1167     }
1168
1169     if (hbi.hdfsEntry.hri != null) {
1170       // already loaded data
1171       return;
1172     }
1173
1174     FileSystem fs = FileSystem.get(getConf());
1175     HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
1176     LOG.debug("HRegionInfo read: " + hri.toString());
1177     hbi.hdfsEntry.hri = hri;
1178   }
1179
1180   /**
1181    * Exception thrown when a integrity repair operation fails in an
1182    * unresolvable way.
1183    */
1184   public static class RegionRepairException extends IOException {
1185     private static final long serialVersionUID = 1L;
1186     final IOException ioe;
1187     public RegionRepairException(String s, IOException ioe) {
1188       super(s);
1189       this.ioe = ioe;
1190     }
1191   }
1192
1193   /**
1194    * Populate hbi's from regionInfos loaded from file system.
1195    */
1196   private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1197       throws IOException, InterruptedException {
1198     tablesInfo.clear(); // regenerating the data
1199     // generate region split structure
1200     Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1201
1202     // Parallelized read of .regioninfo files.
1203     List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
1204     List<Future<Void>> hbiFutures;
1205
1206     for (HbckInfo hbi : hbckInfos) {
1207       WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1208       hbis.add(work);
1209     }
1210
1211     // Submit and wait for completion
1212     hbiFutures = executor.invokeAll(hbis);
1213
1214     for(int i=0; i<hbiFutures.size(); i++) {
1215       WorkItemHdfsRegionInfo work = hbis.get(i);
1216       Future<Void> f = hbiFutures.get(i);
1217       try {
1218         f.get();
1219       } catch(ExecutionException e) {
1220         LOG.warn("Failed to read .regioninfo file for region " +
1221               work.hbi.getRegionNameAsString(), e.getCause());
1222       }
1223     }
1224
1225     Path hbaseRoot = FSUtils.getRootDir(getConf());
1226     FileSystem fs = hbaseRoot.getFileSystem(getConf());
1227     // serialized table info gathering.
1228     for (HbckInfo hbi: hbckInfos) {
1229
1230       if (hbi.getHdfsHRI() == null) {
1231         // was an orphan
1232         continue;
1233       }
1234
1235
1236       // get table name from hdfs, populate various HBaseFsck tables.
1237       TableName tableName = hbi.getTableName();
1238       if (tableName == null) {
1239         // There was an entry in hbase:meta not in the HDFS?
1240         LOG.warn("tableName was null for: " + hbi);
1241         continue;
1242       }
1243
1244       TableInfo modTInfo = tablesInfo.get(tableName);
1245       if (modTInfo == null) {
1246         // only executed once per table.
1247         modTInfo = new TableInfo(tableName);
1248         tablesInfo.put(tableName, modTInfo);
1249         try {
1250           HTableDescriptor htd =
1251               FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1252           modTInfo.htds.add(htd);
1253         } catch (IOException ioe) {
1254           if (!orphanTableDirs.containsKey(tableName)) {
1255             LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1256             //should only report once for each table
1257             errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1258                 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1259             Set<String> columns = new HashSet<String>();
1260             orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1261           }
1262         }
1263       }
1264       if (!hbi.isSkipChecks()) {
1265         modTInfo.addRegionInfo(hbi);
1266       }
1267     }
1268
1269     loadTableInfosForTablesWithNoRegion();
1270     errors.print("");
1271
1272     return tablesInfo;
1273   }
1274
1275   /**
1276    * To get the column family list according to the column family dirs
1277    * @param columns
1278    * @param hbi
1279    * @return a set of column families
1280    * @throws IOException
1281    */
1282   private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1283     Path regionDir = hbi.getHdfsRegionDir();
1284     FileSystem fs = regionDir.getFileSystem(getConf());
1285     FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1286     for (FileStatus subdir : subDirs) {
1287       String columnfamily = subdir.getPath().getName();
1288       columns.add(columnfamily);
1289     }
1290     return columns;
1291   }
1292
1293   /**
1294    * To fabricate a .tableinfo file with following contents<br>
1295    * 1. the correct tablename <br>
1296    * 2. the correct colfamily list<br>
1297    * 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1298    * @throws IOException
1299    */
1300   private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1301       Set<String> columns) throws IOException {
1302     if (columns ==null || columns.isEmpty()) return false;
1303     HTableDescriptor htd = new HTableDescriptor(tableName);
1304     for (String columnfamimly : columns) {
1305       htd.addFamily(new HColumnDescriptor(columnfamimly));
1306     }
1307     fstd.createTableDescriptor(htd, true);
1308     return true;
1309   }
1310
1311   /**
1312    * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1313    * @throws IOException
1314    */
1315   public void fixEmptyMetaCells() throws IOException {
1316     if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1317       LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1318       for (Result region : emptyRegionInfoQualifiers) {
1319         deleteMetaRegion(region.getRow());
1320         errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1321       }
1322       emptyRegionInfoQualifiers.clear();
1323     }
1324   }
1325
1326   /**
1327    * To fix orphan table by creating a .tableinfo file under tableDir <br>
1328    * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1329    * 2. else create a default .tableinfo file with following items<br>
1330    * &nbsp;2.1 the correct tablename <br>
1331    * &nbsp;2.2 the correct colfamily list<br>
1332    * &nbsp;2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1333    * @throws IOException
1334    */
1335   public void fixOrphanTables() throws IOException {
1336     if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1337
1338       List<TableName> tmpList = new ArrayList<TableName>();
1339       tmpList.addAll(orphanTableDirs.keySet());
1340       HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1341       Iterator<Entry<TableName, Set<String>>> iter =
1342           orphanTableDirs.entrySet().iterator();
1343       int j = 0;
1344       int numFailedCase = 0;
1345       FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1346       while (iter.hasNext()) {
1347         Entry<TableName, Set<String>> entry =
1348             iter.next();
1349         TableName tableName = entry.getKey();
1350         LOG.info("Trying to fix orphan table error: " + tableName);
1351         if (j < htds.length) {
1352           if (tableName.equals(htds[j].getTableName())) {
1353             HTableDescriptor htd = htds[j];
1354             LOG.info("fixing orphan table: " + tableName + " from cache");
1355             fstd.createTableDescriptor(htd, true);
1356             j++;
1357             iter.remove();
1358           }
1359         } else {
1360           if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1361             LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1362             LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1363             iter.remove();
1364           } else {
1365             LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1366             numFailedCase++;
1367           }
1368         }
1369         fixes++;
1370       }
1371
1372       if (orphanTableDirs.isEmpty()) {
1373         // all orphanTableDirs are luckily recovered
1374         // re-run doFsck after recovering the .tableinfo file
1375         setShouldRerun();
1376         LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1377       } else if (numFailedCase > 0) {
1378         LOG.error("Failed to fix " + numFailedCase
1379             + " OrphanTables with default .tableinfo files");
1380       }
1381
1382     }
1383     //cleanup the list
1384     orphanTableDirs.clear();
1385
1386   }
1387
1388   /**
1389    * This borrows code from MasterFileSystem.bootstrap(). Explicitly creates it's own WAL, so be
1390    * sure to close it as well as the region when you're finished.
1391    *
1392    * @return an open hbase:meta HRegion
1393    */
1394   private HRegion createNewMeta() throws IOException {
1395     Path rootdir = FSUtils.getRootDir(getConf());
1396     Configuration c = getConf();
1397     HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1398     HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1399     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1400     // The WAL subsystem will use the default rootDir rather than the passed in rootDir
1401     // unless I pass along via the conf.
1402     Configuration confForWAL = new Configuration(c);
1403     confForWAL.set(HConstants.HBASE_DIR, rootdir.toString());
1404     WAL wal = (new WALFactory(confForWAL,
1405         Collections.<WALActionsListener>singletonList(new MetricsWAL()),
1406         "hbck-meta-recovery-" + RandomStringUtils.randomNumeric(8))).
1407         getWAL(metaHRI.getEncodedNameAsBytes(), metaHRI.getTable().getNamespace());
1408     HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor, wal);
1409     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1410     return meta;
1411   }
1412
1413   /**
1414    * Generate set of puts to add to new meta.  This expects the tables to be
1415    * clean with no overlaps or holes.  If there are any problems it returns null.
1416    *
1417    * @return An array list of puts to do in bulk, null if tables have problems
1418    */
1419   private ArrayList<Put> generatePuts(
1420       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1421     ArrayList<Put> puts = new ArrayList<Put>();
1422     boolean hasProblems = false;
1423     for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1424       TableName name = e.getKey();
1425
1426       // skip "hbase:meta"
1427       if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1428         continue;
1429       }
1430
1431       TableInfo ti = e.getValue();
1432       puts.add(MetaTableAccessor
1433           .makePutFromTableState(new TableState(ti.tableName, TableState.State.ENABLED)));
1434       for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1435           .entrySet()) {
1436         Collection<HbckInfo> his = spl.getValue();
1437         int sz = his.size();
1438         if (sz != 1) {
1439           // problem
1440           LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1441               + " had " +  sz + " regions instead of exactly 1." );
1442           hasProblems = true;
1443           continue;
1444         }
1445
1446         // add the row directly to meta.
1447         HbckInfo hi = his.iterator().next();
1448         HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
1449         Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1450         puts.add(p);
1451       }
1452     }
1453     return hasProblems ? null : puts;
1454   }
1455
1456   /**
1457    * Suggest fixes for each table
1458    */
1459   private void suggestFixes(
1460       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1461     logParallelMerge();
1462     for (TableInfo tInfo : tablesInfo.values()) {
1463       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1464       tInfo.checkRegionChain(handler);
1465     }
1466   }
1467
1468   /**
1469    * Rebuilds meta from information in hdfs/fs.  Depends on configuration settings passed into
1470    * hbck constructor to point to a particular fs/dir. Assumes HBase is OFFLINE.
1471    *
1472    * @param fix flag that determines if method should attempt to fix holes
1473    * @return true if successful, false if attempt failed.
1474    */
1475   public boolean rebuildMeta(boolean fix) throws IOException,
1476       InterruptedException {
1477
1478     // TODO check to make sure hbase is offline. (or at least the table
1479     // currently being worked on is off line)
1480
1481     // Determine what's on HDFS
1482     LOG.info("Loading HBase regioninfo from HDFS...");
1483     loadHdfsRegionDirs(); // populating regioninfo table.
1484
1485     int errs = errors.getErrorList().size();
1486     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1487     checkHdfsIntegrity(false, false);
1488
1489     // make sure ok.
1490     if (errors.getErrorList().size() != errs) {
1491       // While in error state, iterate until no more fixes possible
1492       while(true) {
1493         fixes = 0;
1494         suggestFixes(tablesInfo);
1495         errors.clear();
1496         loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1497         checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1498
1499         int errCount = errors.getErrorList().size();
1500
1501         if (fixes == 0) {
1502           if (errCount > 0) {
1503             return false; // failed to fix problems.
1504           } else {
1505             break; // no fixes and no problems? drop out and fix stuff!
1506           }
1507         }
1508       }
1509     }
1510
1511     // we can rebuild, move old meta out of the way and start
1512     LOG.info("HDFS regioninfo's seems good.  Sidelining old hbase:meta");
1513     Path backupDir = sidelineOldMeta();
1514
1515     LOG.info("Creating new hbase:meta");
1516     HRegion meta = createNewMeta();
1517
1518     // populate meta
1519     List<Put> puts = generatePuts(tablesInfo);
1520     if (puts == null) {
1521       LOG.fatal("Problem encountered when creating new hbase:meta entries.  " +
1522         "You may need to restore the previously sidelined hbase:meta");
1523       return false;
1524     }
1525     meta.batchMutate(puts.toArray(new Put[puts.size()]), HConstants.NO_NONCE, HConstants.NO_NONCE);
1526     meta.close();
1527     if (meta.getWAL() != null) {
1528       meta.getWAL().close();
1529     }
1530     LOG.info("Success! hbase:meta table rebuilt.");
1531     LOG.info("Old hbase:meta is moved into " + backupDir);
1532     return true;
1533   }
1534
1535   /**
1536    * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1537    */
1538   private void logParallelMerge() {
1539     if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1540       LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1541           " false to run serially.");
1542     } else {
1543       LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
1544           " true to run in parallel.");
1545     }
1546   }
1547
1548   private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1549       boolean fixOverlaps) throws IOException {
1550     LOG.info("Checking HBase region split map from HDFS data...");
1551     logParallelMerge();
1552     for (TableInfo tInfo : tablesInfo.values()) {
1553       TableIntegrityErrorHandler handler;
1554       if (fixHoles || fixOverlaps) {
1555         handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1556           fixHoles, fixOverlaps);
1557       } else {
1558         handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1559       }
1560       if (!tInfo.checkRegionChain(handler)) {
1561         // should dump info as well.
1562         errors.report("Found inconsistency in table " + tInfo.getName());
1563       }
1564     }
1565     return tablesInfo;
1566   }
1567
1568   private Path getSidelineDir() throws IOException {
1569     if (sidelineDir == null) {
1570       Path hbaseDir = FSUtils.getRootDir(getConf());
1571       Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1572       sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1573           + startMillis);
1574     }
1575     return sidelineDir;
1576   }
1577
1578   /**
1579    * Sideline a region dir (instead of deleting it)
1580    */
1581   Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1582     return sidelineRegionDir(fs, null, hi);
1583   }
1584
1585   /**
1586    * Sideline a region dir (instead of deleting it)
1587    *
1588    * @param parentDir if specified, the region will be sidelined to folder like
1589    *     {@literal .../parentDir/<table name>/<region name>}. The purpose is to group together
1590    *     similar regions sidelined, for example, those regions should be bulk loaded back later
1591    *     on. If NULL, it is ignored.
1592    */
1593   Path sidelineRegionDir(FileSystem fs,
1594       String parentDir, HbckInfo hi) throws IOException {
1595     TableName tableName = hi.getTableName();
1596     Path regionDir = hi.getHdfsRegionDir();
1597
1598     if (!fs.exists(regionDir)) {
1599       LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1600       return null;
1601     }
1602
1603     Path rootDir = getSidelineDir();
1604     if (parentDir != null) {
1605       rootDir = new Path(rootDir, parentDir);
1606     }
1607     Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1608     Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1609     fs.mkdirs(sidelineRegionDir);
1610     boolean success = false;
1611     FileStatus[] cfs =  fs.listStatus(regionDir);
1612     if (cfs == null) {
1613       LOG.info("Region dir is empty: " + regionDir);
1614     } else {
1615       for (FileStatus cf : cfs) {
1616         Path src = cf.getPath();
1617         Path dst =  new Path(sidelineRegionDir, src.getName());
1618         if (fs.isFile(src)) {
1619           // simple file
1620           success = fs.rename(src, dst);
1621           if (!success) {
1622             String msg = "Unable to rename file " + src +  " to " + dst;
1623             LOG.error(msg);
1624             throw new IOException(msg);
1625           }
1626           continue;
1627         }
1628
1629         // is a directory.
1630         fs.mkdirs(dst);
1631
1632         LOG.info("Sidelining files from " + src + " into containing region " + dst);
1633         // FileSystem.rename is inconsistent with directories -- if the
1634         // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1635         // it moves the src into the dst dir resulting in (foo/a/b).  If
1636         // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1637         FileStatus[] hfiles = fs.listStatus(src);
1638         if (hfiles != null && hfiles.length > 0) {
1639           for (FileStatus hfile : hfiles) {
1640             success = fs.rename(hfile.getPath(), dst);
1641             if (!success) {
1642               String msg = "Unable to rename file " + src +  " to " + dst;
1643               LOG.error(msg);
1644               throw new IOException(msg);
1645             }
1646           }
1647         }
1648         LOG.debug("Sideline directory contents:");
1649         debugLsr(sidelineRegionDir);
1650       }
1651     }
1652
1653     LOG.info("Removing old region dir: " + regionDir);
1654     success = fs.delete(regionDir, true);
1655     if (!success) {
1656       String msg = "Unable to delete dir " + regionDir;
1657       LOG.error(msg);
1658       throw new IOException(msg);
1659     }
1660     return sidelineRegionDir;
1661   }
1662
1663   /**
1664    * Side line an entire table.
1665    */
1666   void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1667       Path backupHbaseDir) throws IOException {
1668     Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1669     if (fs.exists(tableDir)) {
1670       Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1671       fs.mkdirs(backupTableDir.getParent());
1672       boolean success = fs.rename(tableDir, backupTableDir);
1673       if (!success) {
1674         throw new IOException("Failed to move  " + tableName + " from "
1675             +  tableDir + " to " + backupTableDir);
1676       }
1677     } else {
1678       LOG.info("No previous " + tableName +  " exists.  Continuing.");
1679     }
1680   }
1681
1682   /**
1683    * @return Path to backup of original directory
1684    */
1685   Path sidelineOldMeta() throws IOException {
1686     // put current hbase:meta aside.
1687     Path hbaseDir = FSUtils.getRootDir(getConf());
1688     FileSystem fs = hbaseDir.getFileSystem(getConf());
1689     Path backupDir = getSidelineDir();
1690     fs.mkdirs(backupDir);
1691
1692     try {
1693       sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1694     } catch (IOException e) {
1695         LOG.fatal("... failed to sideline meta. Currently in inconsistent state.  To restore "
1696             + "try to rename hbase:meta in " + backupDir.getName() + " to "
1697             + hbaseDir.getName() + ".", e);
1698       throw e; // throw original exception
1699     }
1700     return backupDir;
1701   }
1702
1703   /**
1704    * Load the list of disabled tables in ZK into local set.
1705    * @throws ZooKeeperConnectionException
1706    * @throws IOException
1707    */
1708   private void loadTableStates()
1709   throws IOException {
1710     tableStates = MetaTableAccessor.getTableStates(connection);
1711   }
1712
1713   /**
1714    * Check if the specified region's table is disabled.
1715    * @param tableName table to check status of
1716    */
1717   private boolean isTableDisabled(TableName tableName) {
1718     return tableStates.containsKey(tableName)
1719         && tableStates.get(tableName)
1720         .inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1721   }
1722
1723   /**
1724    * Scan HDFS for all regions, recording their information into
1725    * regionInfoMap
1726    */
1727   public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1728     Path rootDir = FSUtils.getRootDir(getConf());
1729     FileSystem fs = rootDir.getFileSystem(getConf());
1730
1731     // list all tables from HDFS
1732     List<FileStatus> tableDirs = Lists.newArrayList();
1733
1734     boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1735
1736     List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1737     for (Path path : paths) {
1738       TableName tableName = FSUtils.getTableName(path);
1739        if ((!checkMetaOnly &&
1740            isTableIncluded(tableName)) ||
1741            tableName.equals(TableName.META_TABLE_NAME)) {
1742          tableDirs.add(fs.getFileStatus(path));
1743        }
1744     }
1745
1746     // verify that version file exists
1747     if (!foundVersionFile) {
1748       errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1749           "Version file does not exist in root dir " + rootDir);
1750       if (shouldFixVersionFile()) {
1751         LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1752             + " file.");
1753         setShouldRerun();
1754         FSUtils.setVersion(fs, rootDir, getConf().getInt(
1755             HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1756             HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1757             HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1758       }
1759     }
1760
1761     // Avoid multithreading at table-level because already multithreaded internally at
1762     // region-level.  Additionally multithreading at table-level can lead to deadlock
1763     // if there are many tables in the cluster.  Since there are a limited # of threads
1764     // in the executor's thread pool and if we multithread at the table-level by putting
1765     // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1766     // executor tied up solely in waiting for the tables' region-level calls to complete.
1767     // If there are enough tables then there will be no actual threads in the pool left
1768     // for the region-level callables to be serviced.
1769     for (FileStatus tableDir : tableDirs) {
1770       LOG.debug("Loading region dirs from " +tableDir.getPath());
1771       WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1772       try {
1773         item.call();
1774       } catch (ExecutionException e) {
1775         LOG.warn("Could not completely load table dir " +
1776             tableDir.getPath(), e.getCause());
1777       }
1778     }
1779     errors.print("");
1780   }
1781
1782   /**
1783    * Record the location of the hbase:meta region as found in ZooKeeper.
1784    */
1785   private boolean recordMetaRegion() throws IOException {
1786     RegionLocations rl = ((ClusterConnection)connection).locateRegion(TableName.META_TABLE_NAME,
1787         HConstants.EMPTY_START_ROW, false, false);
1788     if (rl == null) {
1789       errors.reportError(ERROR_CODE.NULL_META_REGION,
1790           "META region was not found in ZooKeeper");
1791       return false;
1792     }
1793     for (HRegionLocation metaLocation : rl.getRegionLocations()) {
1794       // Check if Meta region is valid and existing
1795       if (metaLocation == null ) {
1796         errors.reportError(ERROR_CODE.NULL_META_REGION,
1797             "META region location is null");
1798         return false;
1799       }
1800       if (metaLocation.getRegionInfo() == null) {
1801         errors.reportError(ERROR_CODE.NULL_META_REGION,
1802             "META location regionInfo is null");
1803         return false;
1804       }
1805       if (metaLocation.getHostname() == null) {
1806         errors.reportError(ERROR_CODE.NULL_META_REGION,
1807             "META location hostName is null");
1808         return false;
1809       }
1810       ServerName sn = metaLocation.getServerName();
1811       MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, EnvironmentEdgeManager.currentTime());
1812       HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1813       if (hbckInfo == null) {
1814         regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1815       } else {
1816         hbckInfo.metaEntry = m;
1817       }
1818     }
1819     return true;
1820   }
1821
1822   private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1823     return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1824       @Override
1825       public void abort(String why, Throwable e) {
1826         LOG.error(why, e);
1827         System.exit(1);
1828       }
1829
1830       @Override
1831       public boolean isAborted() {
1832         return false;
1833       }
1834
1835     });
1836   }
1837
1838   private ServerName getMetaRegionServerName(int replicaId)
1839   throws IOException, KeeperException {
1840     return new MetaTableLocator().getMetaRegionLocation(zkw, replicaId);
1841   }
1842
1843   /**
1844    * Contacts each regionserver and fetches metadata about regions.
1845    * @param regionServerList - the list of region servers to connect to
1846    * @throws IOException if a remote or network exception occurs
1847    */
1848   void processRegionServers(Collection<ServerName> regionServerList)
1849     throws IOException, InterruptedException {
1850
1851     List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1852     List<Future<Void>> workFutures;
1853
1854     // loop to contact each region server in parallel
1855     for (ServerName rsinfo: regionServerList) {
1856       workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1857     }
1858
1859     workFutures = executor.invokeAll(workItems);
1860
1861     for(int i=0; i<workFutures.size(); i++) {
1862       WorkItemRegion item = workItems.get(i);
1863       Future<Void> f = workFutures.get(i);
1864       try {
1865         f.get();
1866       } catch(ExecutionException e) {
1867         LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1868             e.getCause());
1869       }
1870     }
1871   }
1872
1873   /**
1874    * Check consistency of all regions that have been found in previous phases.
1875    */
1876   private void checkAndFixConsistency()
1877   throws IOException, KeeperException, InterruptedException {
1878     // Divide the checks in two phases. One for default/primary replicas and another
1879     // for the non-primary ones. Keeps code cleaner this way.
1880
1881     List<CheckRegionConsistencyWorkItem> workItems =
1882         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1883     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1884       if (e.getValue().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
1885         workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1886       }
1887     }
1888     checkRegionConsistencyConcurrently(workItems);
1889
1890     boolean prevHdfsCheck = shouldCheckHdfs();
1891     setCheckHdfs(false); //replicas don't have any hdfs data
1892     // Run a pass over the replicas and fix any assignment issues that exist on the currently
1893     // deployed/undeployed replicas.
1894     List<CheckRegionConsistencyWorkItem> replicaWorkItems =
1895         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1896     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1897       if (e.getValue().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
1898         replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1899       }
1900     }
1901     checkRegionConsistencyConcurrently(replicaWorkItems);
1902     setCheckHdfs(prevHdfsCheck);
1903
1904     // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
1905     // not get accurate state of the hbase if continuing. The config here allows users to tune
1906     // the tolerance of number of skipped region.
1907     // TODO: evaluate the consequence to continue the hbck operation without config.
1908     int terminateThreshold =  getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
1909     int numOfSkippedRegions = skippedRegions.size();
1910     if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
1911       throw new IOException(numOfSkippedRegions
1912         + " region(s) could not be checked or repaired.  See logs for detail.");
1913     }
1914
1915     if (shouldCheckHdfs()) {
1916       checkAndFixTableStates();
1917     }
1918   }
1919
1920   /**
1921    * Check consistency of all regions using mulitple threads concurrently.
1922    */
1923   private void checkRegionConsistencyConcurrently(
1924     final List<CheckRegionConsistencyWorkItem> workItems)
1925     throws IOException, KeeperException, InterruptedException {
1926     if (workItems.isEmpty()) {
1927       return;  // nothing to check
1928     }
1929
1930     List<Future<Void>> workFutures = executor.invokeAll(workItems);
1931     for(Future<Void> f: workFutures) {
1932       try {
1933         f.get();
1934       } catch(ExecutionException e1) {
1935         LOG.warn("Could not check region consistency " , e1.getCause());
1936         if (e1.getCause() instanceof IOException) {
1937           throw (IOException)e1.getCause();
1938         } else if (e1.getCause() instanceof KeeperException) {
1939           throw (KeeperException)e1.getCause();
1940         } else if (e1.getCause() instanceof InterruptedException) {
1941           throw (InterruptedException)e1.getCause();
1942         } else {
1943           throw new IOException(e1.getCause());
1944         }
1945       }
1946     }
1947   }
1948
1949   class CheckRegionConsistencyWorkItem implements Callable<Void> {
1950     private final String key;
1951     private final HbckInfo hbi;
1952
1953     CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
1954       this.key = key;
1955       this.hbi = hbi;
1956     }
1957
1958     @Override
1959     public synchronized Void call() throws Exception {
1960       try {
1961         checkRegionConsistency(key, hbi);
1962       } catch (Exception e) {
1963         // If the region is non-META region, skip this region and send warning/error message; if
1964         // the region is META region, we should not continue.
1965         LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()
1966           + "'.", e);
1967         if (hbi.getHdfsHRI().isMetaRegion()) {
1968           throw e;
1969         }
1970         LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
1971         addSkippedRegion(hbi);
1972       }
1973       return null;
1974     }
1975   }
1976
1977   private void addSkippedRegion(final HbckInfo hbi) {
1978     Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
1979     if (skippedRegionNames == null) {
1980       skippedRegionNames = new HashSet<String>();
1981     }
1982     skippedRegionNames.add(hbi.getRegionNameAsString());
1983     skippedRegions.put(hbi.getTableName(), skippedRegionNames);
1984   }
1985
1986   /**
1987    * Check and fix table states, assumes full info available:
1988    * - tableInfos
1989    * - empty tables loaded
1990    */
1991   private void checkAndFixTableStates() throws IOException {
1992     // first check dangling states
1993     for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
1994       TableName tableName = entry.getKey();
1995       TableState tableState = entry.getValue();
1996       TableInfo tableInfo = tablesInfo.get(tableName);
1997       if (isTableIncluded(tableName)
1998           && !tableName.isSystemTable()
1999           && tableInfo == null) {
2000         if (fixMeta) {
2001           MetaTableAccessor.deleteTableState(connection, tableName);
2002           TableState state = MetaTableAccessor.getTableState(connection, tableName);
2003           if (state != null) {
2004             errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
2005                 tableName + " unable to delete dangling table state " + tableState);
2006           }
2007         } else {
2008           errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
2009               tableName + " has dangling table state " + tableState);
2010         }
2011       }
2012     }
2013     // check that all tables have states
2014     for (TableName tableName : tablesInfo.keySet()) {
2015       if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
2016         if (fixMeta) {
2017           MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
2018           TableState newState = MetaTableAccessor.getTableState(connection, tableName);
2019           if (newState == null) {
2020             errors.reportError(ERROR_CODE.NO_TABLE_STATE,
2021                 "Unable to change state for table " + tableName + " in meta ");
2022           }
2023         } else {
2024           errors.reportError(ERROR_CODE.NO_TABLE_STATE,
2025               tableName + " has no state in meta ");
2026         }
2027       }
2028     }
2029   }
2030
2031   private void preCheckPermission() throws IOException, AccessDeniedException {
2032     if (shouldIgnorePreCheckPermission()) {
2033       return;
2034     }
2035
2036     Path hbaseDir = FSUtils.getRootDir(getConf());
2037     FileSystem fs = hbaseDir.getFileSystem(getConf());
2038     UserProvider userProvider = UserProvider.instantiate(getConf());
2039     UserGroupInformation ugi = userProvider.getCurrent().getUGI();
2040     FileStatus[] files = fs.listStatus(hbaseDir);
2041     for (FileStatus file : files) {
2042       try {
2043         FSUtils.checkAccess(ugi, file, FsAction.WRITE);
2044       } catch (AccessDeniedException ace) {
2045         LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
2046         errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
2047           + " does not have write perms to " + file.getPath()
2048           + ". Please rerun hbck as hdfs user " + file.getOwner());
2049         throw ace;
2050       }
2051     }
2052   }
2053
2054   /**
2055    * Deletes region from meta table
2056    */
2057   private void deleteMetaRegion(HbckInfo hi) throws IOException {
2058     deleteMetaRegion(hi.metaEntry.getRegionName());
2059   }
2060
2061   /**
2062    * Deletes region from meta table
2063    */
2064   private void deleteMetaRegion(byte[] metaKey) throws IOException {
2065     Delete d = new Delete(metaKey);
2066     meta.delete(d);
2067     LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
2068   }
2069
2070   /**
2071    * Reset the split parent region info in meta table
2072    */
2073   private void resetSplitParent(HbckInfo hi) throws IOException {
2074     RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
2075     Delete d = new Delete(hi.metaEntry.getRegionName());
2076     d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
2077     d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
2078     mutations.add(d);
2079
2080     HRegionInfo hri = new HRegionInfo(hi.metaEntry);
2081     hri.setOffline(false);
2082     hri.setSplit(false);
2083     Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
2084     mutations.add(p);
2085
2086     meta.mutateRow(mutations);
2087     LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
2088   }
2089
2090   /**
2091    * This backwards-compatibility wrapper for permanently offlining a region
2092    * that should not be alive.  If the region server does not support the
2093    * "offline" method, it will use the closest unassign method instead.  This
2094    * will basically work until one attempts to disable or delete the affected
2095    * table.  The problem has to do with in-memory only master state, so
2096    * restarting the HMaster or failing over to another should fix this.
2097    */
2098   private void offline(byte[] regionName) throws IOException {
2099     String regionString = Bytes.toStringBinary(regionName);
2100     if (!rsSupportsOffline) {
2101       LOG.warn("Using unassign region " + regionString
2102           + " instead of using offline method, you should"
2103           + " restart HMaster after these repairs");
2104       admin.unassign(regionName, true);
2105       return;
2106     }
2107
2108     // first time we assume the rs's supports #offline.
2109     try {
2110       LOG.info("Offlining region " + regionString);
2111       admin.offline(regionName);
2112     } catch (IOException ioe) {
2113       String notFoundMsg = "java.lang.NoSuchMethodException: " +
2114         "org.apache.hadoop.hbase.master.HMaster.offline([B)";
2115       if (ioe.getMessage().contains(notFoundMsg)) {
2116         LOG.warn("Using unassign region " + regionString
2117             + " instead of using offline method, you should"
2118             + " restart HMaster after these repairs");
2119         rsSupportsOffline = false; // in the future just use unassign
2120         admin.unassign(regionName, true);
2121         return;
2122       }
2123       throw ioe;
2124     }
2125   }
2126
2127   private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
2128     undeployRegionsForHbi(hi);
2129     // undeploy replicas of the region (but only if the method is invoked for the primary)
2130     if (hi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2131       return;
2132     }
2133     int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2134     for (int i = 1; i < numReplicas; i++) {
2135       if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2136       HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
2137           hi.getPrimaryHRIForDeployedReplica(), i);
2138       HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2139       if (h != null) {
2140         undeployRegionsForHbi(h);
2141         //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2142         //in consistency checks
2143         h.setSkipChecks(true);
2144       }
2145     }
2146   }
2147
2148   private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {
2149     for (OnlineEntry rse : hi.deployedEntries) {
2150       LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
2151       try {
2152         HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);
2153         offline(rse.hri.getRegionName());
2154       } catch (IOException ioe) {
2155         LOG.warn("Got exception when attempting to offline region "
2156             + Bytes.toString(rse.hri.getRegionName()), ioe);
2157       }
2158     }
2159   }
2160
2161   /**
2162    * Attempts to undeploy a region from a region server based in information in
2163    * META.  Any operations that modify the file system should make sure that
2164    * its corresponding region is not deployed to prevent data races.
2165    *
2166    * A separate call is required to update the master in-memory region state
2167    * kept in the AssignementManager.  Because disable uses this state instead of
2168    * that found in META, we can't seem to cleanly disable/delete tables that
2169    * have been hbck fixed.  When used on a version of HBase that does not have
2170    * the offline ipc call exposed on the master (&lt;0.90.5, &lt;0.92.0) a master
2171    * restart or failover may be required.
2172    */
2173   private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
2174     if (hi.metaEntry == null && hi.hdfsEntry == null) {
2175       undeployRegions(hi);
2176       return;
2177     }
2178
2179     // get assignment info and hregioninfo from meta.
2180     Get get = new Get(hi.getRegionName());
2181     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2182     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2183     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2184     // also get the locations of the replicas to close if the primary region is being closed
2185     if (hi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2186       int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2187       for (int i = 0; i < numReplicas; i++) {
2188         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
2189         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
2190       }
2191     }
2192     Result r = meta.get(get);
2193     RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2194     if (rl == null) {
2195       LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2196           " since meta does not have handle to reach it");
2197       return;
2198     }
2199     for (HRegionLocation h : rl.getRegionLocations()) {
2200       ServerName serverName = h.getServerName();
2201       if (serverName == null) {
2202         errors.reportError("Unable to close region "
2203             + hi.getRegionNameAsString() +  " because meta does not "
2204             + "have handle to reach it.");
2205         continue;
2206       }
2207       HRegionInfo hri = h.getRegionInfo();
2208       if (hri == null) {
2209         LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2210             + " because hbase:meta had invalid or missing "
2211             + HConstants.CATALOG_FAMILY_STR + ":"
2212             + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2213             + " qualifier value.");
2214         continue;
2215       }
2216       // close the region -- close files and remove assignment
2217       HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2218     }
2219   }
2220
2221   private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
2222     KeeperException, InterruptedException {
2223     // If we are trying to fix the errors
2224     if (shouldFixAssignments()) {
2225       errors.print(msg);
2226       undeployRegions(hbi);
2227       setShouldRerun();
2228       HRegionInfo hri = hbi.getHdfsHRI();
2229       if (hri == null) {
2230         hri = hbi.metaEntry;
2231       }
2232       HBaseFsckRepair.fixUnassigned(admin, hri);
2233       HBaseFsckRepair.waitUntilAssigned(admin, hri);
2234
2235       // also assign replicas if needed (do it only when this call operates on a primary replica)
2236       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) return;
2237       int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();
2238       for (int i = 1; i < replicationCount; i++) {
2239         hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2240         HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2241         if (h != null) {
2242           undeployRegions(h);
2243           //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2244           //in consistency checks
2245           h.setSkipChecks(true);
2246         }
2247         HBaseFsckRepair.fixUnassigned(admin, hri);
2248         HBaseFsckRepair.waitUntilAssigned(admin, hri);
2249       }
2250
2251     }
2252   }
2253
2254   /**
2255    * Check a single region for consistency and correct deployment.
2256    */
2257   private void checkRegionConsistency(final String key, final HbckInfo hbi)
2258   throws IOException, KeeperException, InterruptedException {
2259
2260     if (hbi.isSkipChecks()) return;
2261     String descriptiveName = hbi.toString();
2262     boolean inMeta = hbi.metaEntry != null;
2263     // In case not checking HDFS, assume the region is on HDFS
2264     boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2265     boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
2266     boolean isDeployed = !hbi.deployedOn.isEmpty();
2267     boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
2268     boolean deploymentMatchesMeta =
2269       hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2270       hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
2271     boolean splitParent =
2272         inMeta && hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
2273     boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry.getTable());
2274     boolean recentlyModified = inHdfs &&
2275       hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2276
2277     // ========== First the healthy cases =============
2278     if (hbi.containsOnlyHdfsEdits()) {
2279       return;
2280     }
2281     if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2282       return;
2283     } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2284       LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2285         "tabled that is not deployed");
2286       return;
2287     } else if (recentlyModified) {
2288       LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2289       return;
2290     }
2291     // ========== Cases where the region is not in hbase:meta =============
2292     else if (!inMeta && !inHdfs && !isDeployed) {
2293       // We shouldn't have record of this region at all then!
2294       assert false : "Entry for region with no data";
2295     } else if (!inMeta && !inHdfs && isDeployed) {
2296       errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2297           + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2298           "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2299       if (shouldFixAssignments()) {
2300         undeployRegions(hbi);
2301       }
2302
2303     } else if (!inMeta && inHdfs && !isDeployed) {
2304       if (hbi.isMerged()) {
2305         // This region has already been merged, the remaining hdfs file will be
2306         // cleaned by CatalogJanitor later
2307         hbi.setSkipChecks(true);
2308         LOG.info("Region " + descriptiveName
2309             + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2310         return;
2311       }
2312       errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2313           + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2314           "or deployed on any region server");
2315       // restore region consistency of an adopted orphan
2316       if (shouldFixMeta()) {
2317         if (!hbi.isHdfsRegioninfoPresent()) {
2318           LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2319               +  " in table integrity repair phase if -fixHdfsOrphans was" +
2320               " used.");
2321           return;
2322         }
2323
2324         HRegionInfo hri = hbi.getHdfsHRI();
2325         TableInfo tableInfo = tablesInfo.get(hri.getTable());
2326
2327         for (HRegionInfo region : tableInfo.getRegionsFromMeta()) {
2328           if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2329               && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2330                 hri.getEndKey()) >= 0)
2331               && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2332             if(region.isSplit() || region.isOffline()) continue;
2333             Path regionDir = hbi.getHdfsRegionDir();
2334             FileSystem fs = regionDir.getFileSystem(getConf());
2335             List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2336             for (Path familyDir : familyDirs) {
2337               List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2338               for (Path referenceFilePath : referenceFilePaths) {
2339                 Path parentRegionDir =
2340                     StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2341                 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2342                   LOG.warn(hri + " start and stop keys are in the range of " + region
2343                       + ". The region might not be cleaned up from hdfs when region " + region
2344                       + " split failed. Hence deleting from hdfs.");
2345                   HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2346                     regionDir.getParent(), hri);
2347                   return;
2348                 }
2349               }
2350             }
2351           }
2352         }
2353         LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2354         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2355         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2356             admin.getClusterStatus().getServers(), numReplicas);
2357
2358         tryAssignmentRepair(hbi, "Trying to reassign region...");
2359       }
2360
2361     } else if (!inMeta && inHdfs && isDeployed) {
2362       errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2363           + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2364       debugLsr(hbi.getHdfsRegionDir());
2365       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2366         // for replicas, this means that we should undeploy the region (we would have
2367         // gone over the primaries and fixed meta holes in first phase under
2368         // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2369         // this stage unless unwanted replica)
2370         if (shouldFixAssignments()) {
2371           undeployRegionsForHbi(hbi);
2372         }
2373       }
2374       if (shouldFixMeta() && hbi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2375         if (!hbi.isHdfsRegioninfoPresent()) {
2376           LOG.error("This should have been repaired in table integrity repair phase");
2377           return;
2378         }
2379
2380         LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2381         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2382         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2383             admin.getClusterStatus().getServers(), numReplicas);
2384         tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2385       }
2386
2387     // ========== Cases where the region is in hbase:meta =============
2388     } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2389       // check whether this is an actual error, or just transient state where parent
2390       // is not cleaned
2391       if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2392         // check that split daughters are there
2393         HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2394         HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2395         if (infoA != null && infoB != null) {
2396           // we already processed or will process daughters. Move on, nothing to see here.
2397           hbi.setSkipChecks(true);
2398           return;
2399         }
2400       }
2401       errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2402           + descriptiveName + " is a split parent in META, in HDFS, "
2403           + "and not deployed on any region server. This could be transient.");
2404       if (shouldFixSplitParents()) {
2405         setShouldRerun();
2406         resetSplitParent(hbi);
2407       }
2408     } else if (inMeta && !inHdfs && !isDeployed) {
2409       errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2410           + descriptiveName + " found in META, but not in HDFS "
2411           + "or deployed on any region server.");
2412       if (shouldFixMeta()) {
2413         deleteMetaRegion(hbi);
2414       }
2415     } else if (inMeta && !inHdfs && isDeployed) {
2416       errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2417           + " found in META, but not in HDFS, " +
2418           "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2419       // We treat HDFS as ground truth.  Any information in meta is transient
2420       // and equivalent data can be regenerated.  So, lets unassign and remove
2421       // these problems from META.
2422       if (shouldFixAssignments()) {
2423         errors.print("Trying to fix unassigned region...");
2424         undeployRegions(hbi);
2425       }
2426       if (shouldFixMeta()) {
2427         // wait for it to complete
2428         deleteMetaRegion(hbi);
2429       }
2430     } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2431       errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2432           + " not deployed on any region server.");
2433       tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2434     } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2435       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2436           "Region " + descriptiveName + " should not be deployed according " +
2437           "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2438       if (shouldFixAssignments()) {
2439         errors.print("Trying to close the region " + descriptiveName);
2440         setShouldRerun();
2441         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2442       }
2443     } else if (inMeta && inHdfs && isMultiplyDeployed) {
2444       errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2445           + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2446           + " but is multiply assigned to region servers " +
2447           Joiner.on(", ").join(hbi.deployedOn));
2448       // If we are trying to fix the errors
2449       if (shouldFixAssignments()) {
2450         errors.print("Trying to fix assignment error...");
2451         setShouldRerun();
2452         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2453       }
2454     } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2455       errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2456           + descriptiveName + " listed in hbase:meta on region server " +
2457           hbi.metaEntry.regionServer + " but found on region server " +
2458           hbi.deployedOn.get(0));
2459       // If we are trying to fix the errors
2460       if (shouldFixAssignments()) {
2461         errors.print("Trying to fix assignment error...");
2462         setShouldRerun();
2463         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2464         HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2465       }
2466     } else {
2467       errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2468           " is in an unforeseen state:" +
2469           " inMeta=" + inMeta +
2470           " inHdfs=" + inHdfs +
2471           " isDeployed=" + isDeployed +
2472           " isMultiplyDeployed=" + isMultiplyDeployed +
2473           " deploymentMatchesMeta=" + deploymentMatchesMeta +
2474           " shouldBeDeployed=" + shouldBeDeployed);
2475     }
2476   }
2477
2478   /**
2479    * Checks tables integrity. Goes over all regions and scans the tables.
2480    * Collects all the pieces for each table and checks if there are missing,
2481    * repeated or overlapping ones.
2482    * @throws IOException
2483    */
2484   SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2485     tablesInfo = new TreeMap<TableName,TableInfo> ();
2486     LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2487     for (HbckInfo hbi : regionInfoMap.values()) {
2488       // Check only valid, working regions
2489       if (hbi.metaEntry == null) {
2490         // this assumes that consistency check has run loadMetaEntry
2491         Path p = hbi.getHdfsRegionDir();
2492         if (p == null) {
2493           errors.report("No regioninfo in Meta or HDFS. " + hbi);
2494         }
2495
2496         // TODO test.
2497         continue;
2498       }
2499       if (hbi.metaEntry.regionServer == null) {
2500         errors.detail("Skipping region because no region server: " + hbi);
2501         continue;
2502       }
2503       if (hbi.metaEntry.isOffline()) {
2504         errors.detail("Skipping region because it is offline: " + hbi);
2505         continue;
2506       }
2507       if (hbi.containsOnlyHdfsEdits()) {
2508         errors.detail("Skipping region because it only contains edits" + hbi);
2509         continue;
2510       }
2511
2512       // Missing regionDir or over-deployment is checked elsewhere. Include
2513       // these cases in modTInfo, so we can evaluate those regions as part of
2514       // the region chain in META
2515       //if (hbi.foundRegionDir == null) continue;
2516       //if (hbi.deployedOn.size() != 1) continue;
2517       if (hbi.deployedOn.size() == 0) continue;
2518
2519       // We should be safe here
2520       TableName tableName = hbi.metaEntry.getTable();
2521       TableInfo modTInfo = tablesInfo.get(tableName);
2522       if (modTInfo == null) {
2523         modTInfo = new TableInfo(tableName);
2524       }
2525       for (ServerName server : hbi.deployedOn) {
2526         modTInfo.addServer(server);
2527       }
2528
2529       if (!hbi.isSkipChecks()) {
2530         modTInfo.addRegionInfo(hbi);
2531       }
2532
2533       tablesInfo.put(tableName, modTInfo);
2534     }
2535
2536     loadTableInfosForTablesWithNoRegion();
2537
2538     logParallelMerge();
2539     for (TableInfo tInfo : tablesInfo.values()) {
2540       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2541       if (!tInfo.checkRegionChain(handler)) {
2542         errors.report("Found inconsistency in table " + tInfo.getName());
2543       }
2544     }
2545     return tablesInfo;
2546   }
2547
2548   /** Loads table info's for tables that may not have been included, since there are no
2549    * regions reported for the table, but table dir is there in hdfs
2550    */
2551   private void loadTableInfosForTablesWithNoRegion() throws IOException {
2552     Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2553     for (HTableDescriptor htd : allTables.values()) {
2554       if (checkMetaOnly && !htd.isMetaTable()) {
2555         continue;
2556       }
2557
2558       TableName tableName = htd.getTableName();
2559       if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2560         TableInfo tableInfo = new TableInfo(tableName);
2561         tableInfo.htds.add(htd);
2562         tablesInfo.put(htd.getTableName(), tableInfo);
2563       }
2564     }
2565   }
2566
2567   /**
2568    * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2569    * @return number of file move fixes done to merge regions.
2570    */
2571   public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2572     int fileMoves = 0;
2573     String thread = Thread.currentThread().getName();
2574     LOG.debug("[" + thread + "] Contained region dir after close and pause");
2575     debugLsr(contained.getHdfsRegionDir());
2576
2577     // rename the contained into the container.
2578     FileSystem fs = targetRegionDir.getFileSystem(getConf());
2579     FileStatus[] dirs = null;
2580     try {
2581       dirs = fs.listStatus(contained.getHdfsRegionDir());
2582     } catch (FileNotFoundException fnfe) {
2583       // region we are attempting to merge in is not present!  Since this is a merge, there is
2584       // no harm skipping this region if it does not exist.
2585       if (!fs.exists(contained.getHdfsRegionDir())) {
2586         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2587             + " is missing. Assuming already sidelined or moved.");
2588       } else {
2589         sidelineRegionDir(fs, contained);
2590       }
2591       return fileMoves;
2592     }
2593
2594     if (dirs == null) {
2595       if (!fs.exists(contained.getHdfsRegionDir())) {
2596         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2597             + " already sidelined.");
2598       } else {
2599         sidelineRegionDir(fs, contained);
2600       }
2601       return fileMoves;
2602     }
2603
2604     for (FileStatus cf : dirs) {
2605       Path src = cf.getPath();
2606       Path dst =  new Path(targetRegionDir, src.getName());
2607
2608       if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2609         // do not copy the old .regioninfo file.
2610         continue;
2611       }
2612
2613       if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2614         // do not copy the .oldlogs files
2615         continue;
2616       }
2617
2618       LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2619       // FileSystem.rename is inconsistent with directories -- if the
2620       // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2621       // it moves the src into the dst dir resulting in (foo/a/b).  If
2622       // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2623       for (FileStatus hfile : fs.listStatus(src)) {
2624         boolean success = fs.rename(hfile.getPath(), dst);
2625         if (success) {
2626           fileMoves++;
2627         }
2628       }
2629       LOG.debug("[" + thread + "] Sideline directory contents:");
2630       debugLsr(targetRegionDir);
2631     }
2632
2633     // if all success.
2634     sidelineRegionDir(fs, contained);
2635     LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2636         getSidelineDir());
2637     debugLsr(contained.getHdfsRegionDir());
2638
2639     return fileMoves;
2640   }
2641
2642
2643   static class WorkItemOverlapMerge implements Callable<Void> {
2644     private TableIntegrityErrorHandler handler;
2645     Collection<HbckInfo> overlapgroup;
2646
2647     WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2648       this.handler = handler;
2649       this.overlapgroup = overlapgroup;
2650     }
2651
2652     @Override
2653     public Void call() throws Exception {
2654       handler.handleOverlapGroup(overlapgroup);
2655       return null;
2656     }
2657   };
2658
2659
2660   /**
2661    * Maintain information about a particular table.
2662    */
2663   public class TableInfo {
2664     TableName tableName;
2665     TreeSet <ServerName> deployedOn;
2666
2667     // backwards regions
2668     final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2669
2670     // sidelined big overlapped regions
2671     final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2672
2673     // region split calculator
2674     final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2675
2676     // Histogram of different HTableDescriptors found.  Ideally there is only one!
2677     final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2678
2679     // key = start split, values = set of splits in problem group
2680     final Multimap<byte[], HbckInfo> overlapGroups =
2681       TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2682
2683     // list of regions derived from meta entries.
2684     private ImmutableList<HRegionInfo> regionsFromMeta = null;
2685
2686     TableInfo(TableName name) {
2687       this.tableName = name;
2688       deployedOn = new TreeSet <ServerName>();
2689     }
2690
2691     /**
2692      * @return descriptor common to all regions.  null if are none or multiple!
2693      */
2694     private HTableDescriptor getHTD() {
2695       if (htds.size() == 1) {
2696         return (HTableDescriptor)htds.toArray()[0];
2697       } else {
2698         LOG.error("None/Multiple table descriptors found for table '"
2699           + tableName + "' regions: " + htds);
2700       }
2701       return null;
2702     }
2703
2704     public void addRegionInfo(HbckInfo hir) {
2705       if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2706         // end key is absolute end key, just add it.
2707         // ignore replicas other than primary for these checks
2708         if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2709         return;
2710       }
2711
2712       // if not the absolute end key, check for cycle
2713       if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2714         errors.reportError(
2715             ERROR_CODE.REGION_CYCLE,
2716             String.format("The endkey for this region comes before the "
2717                 + "startkey, startkey=%s, endkey=%s",
2718                 Bytes.toStringBinary(hir.getStartKey()),
2719                 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2720         backwards.add(hir);
2721         return;
2722       }
2723
2724       // main case, add to split calculator
2725       // ignore replicas other than primary for these checks
2726       if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2727     }
2728
2729     public void addServer(ServerName server) {
2730       this.deployedOn.add(server);
2731     }
2732
2733     public TableName getName() {
2734       return tableName;
2735     }
2736
2737     public int getNumRegions() {
2738       return sc.getStarts().size() + backwards.size();
2739     }
2740
2741     public synchronized ImmutableList<HRegionInfo> getRegionsFromMeta() {
2742       // lazy loaded, synchronized to ensure a single load
2743       if (regionsFromMeta == null) {
2744         List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
2745         for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2746           if (tableName.equals(h.getTableName())) {
2747             if (h.metaEntry != null) {
2748               regions.add((HRegionInfo) h.metaEntry);
2749             }
2750           }
2751         }
2752         regionsFromMeta = Ordering.natural().immutableSortedCopy(regions);
2753       }
2754
2755       return regionsFromMeta;
2756     }
2757
2758     private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2759       ErrorReporter errors;
2760
2761       IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2762         this.errors = errors;
2763         setTableInfo(ti);
2764       }
2765
2766       @Override
2767       public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2768         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2769             "First region should start with an empty key.  You need to "
2770             + " create a new region and regioninfo in HDFS to plug the hole.",
2771             getTableInfo(), hi);
2772       }
2773
2774       @Override
2775       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2776         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2777             "Last region should end with an empty key. You need to "
2778                 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2779       }
2780
2781       @Override
2782       public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2783         errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2784             "Region has the same start and end key.", getTableInfo(), hi);
2785       }
2786
2787       @Override
2788       public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2789         byte[] key = r1.getStartKey();
2790         // dup start key
2791         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2792             "Multiple regions have the same startkey: "
2793             + Bytes.toStringBinary(key), getTableInfo(), r1);
2794         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2795             "Multiple regions have the same startkey: "
2796             + Bytes.toStringBinary(key), getTableInfo(), r2);
2797       }
2798
2799       @Override
2800       public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2801         errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2802             "There is an overlap in the region chain.",
2803             getTableInfo(), hi1, hi2);
2804       }
2805
2806       @Override
2807       public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2808         errors.reportError(
2809             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2810             "There is a hole in the region chain between "
2811                 + Bytes.toStringBinary(holeStart) + " and "
2812                 + Bytes.toStringBinary(holeStop)
2813                 + ".  You need to create a new .regioninfo and region "
2814                 + "dir in hdfs to plug the hole.");
2815       }
2816     };
2817
2818     /**
2819      * This handler fixes integrity errors from hdfs information.  There are
2820      * basically three classes of integrity problems 1) holes, 2) overlaps, and
2821      * 3) invalid regions.
2822      *
2823      * This class overrides methods that fix holes and the overlap group case.
2824      * Individual cases of particular overlaps are handled by the general
2825      * overlap group merge repair case.
2826      *
2827      * If hbase is online, this forces regions offline before doing merge
2828      * operations.
2829      */
2830     private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2831       Configuration conf;
2832
2833       boolean fixOverlaps = true;
2834
2835       HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2836           boolean fixHoles, boolean fixOverlaps) {
2837         super(ti, errors);
2838         this.conf = conf;
2839         this.fixOverlaps = fixOverlaps;
2840         // TODO properly use fixHoles
2841       }
2842
2843       /**
2844        * This is a special case hole -- when the first region of a table is
2845        * missing from META, HBase doesn't acknowledge the existance of the
2846        * table.
2847        */
2848       @Override
2849       public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2850         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2851             "First region should start with an empty key.  Creating a new " +
2852             "region and regioninfo in HDFS to plug the hole.",
2853             getTableInfo(), next);
2854         HTableDescriptor htd = getTableInfo().getHTD();
2855         // from special EMPTY_START_ROW to next region's startKey
2856         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2857             HConstants.EMPTY_START_ROW, next.getStartKey());
2858
2859         // TODO test
2860         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2861         LOG.info("Table region start key was not empty.  Created new empty region: "
2862             + newRegion + " " +region);
2863         fixes++;
2864       }
2865
2866       @Override
2867       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2868         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2869             "Last region should end with an empty key.  Creating a new "
2870                 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2871         HTableDescriptor htd = getTableInfo().getHTD();
2872         // from curEndKey to EMPTY_START_ROW
2873         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2874             HConstants.EMPTY_START_ROW);
2875
2876         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2877         LOG.info("Table region end key was not empty.  Created new empty region: " + newRegion
2878             + " " + region);
2879         fixes++;
2880       }
2881
2882       /**
2883        * There is a hole in the hdfs regions that violates the table integrity
2884        * rules.  Create a new empty region that patches the hole.
2885        */
2886       @Override
2887       public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2888         errors.reportError(
2889             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2890             "There is a hole in the region chain between "
2891                 + Bytes.toStringBinary(holeStartKey) + " and "
2892                 + Bytes.toStringBinary(holeStopKey)
2893                 + ".  Creating a new regioninfo and region "
2894                 + "dir in hdfs to plug the hole.");
2895         HTableDescriptor htd = getTableInfo().getHTD();
2896         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2897         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2898         LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2899         fixes++;
2900       }
2901
2902       /**
2903        * This takes set of overlapping regions and merges them into a single
2904        * region.  This covers cases like degenerate regions, shared start key,
2905        * general overlaps, duplicate ranges, and partial overlapping regions.
2906        *
2907        * Cases:
2908        * - Clean regions that overlap
2909        * - Only .oldlogs regions (can't find start/stop range, or figure out)
2910        *
2911        * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
2912        */
2913       @Override
2914       public void handleOverlapGroup(Collection<HbckInfo> overlap)
2915           throws IOException {
2916         Preconditions.checkNotNull(overlap);
2917         Preconditions.checkArgument(overlap.size() >0);
2918
2919         if (!this.fixOverlaps) {
2920           LOG.warn("Not attempting to repair overlaps.");
2921           return;
2922         }
2923
2924         if (overlap.size() > maxMerge) {
2925           LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2926             "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2927           if (sidelineBigOverlaps) {
2928             // we only sideline big overlapped groups that exceeds the max number of regions to merge
2929             sidelineBigOverlaps(overlap);
2930           }
2931           return;
2932         }
2933
2934         mergeOverlaps(overlap);
2935       }
2936
2937       void mergeOverlaps(Collection<HbckInfo> overlap)
2938           throws IOException {
2939         String thread = Thread.currentThread().getName();
2940         LOG.info("== [" + thread + "] Merging regions into one region: "
2941           + Joiner.on(",").join(overlap));
2942         // get the min / max range and close all concerned regions
2943         Pair<byte[], byte[]> range = null;
2944         for (HbckInfo hi : overlap) {
2945           if (range == null) {
2946             range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2947           } else {
2948             if (RegionSplitCalculator.BYTES_COMPARATOR
2949                 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2950               range.setFirst(hi.getStartKey());
2951             }
2952             if (RegionSplitCalculator.BYTES_COMPARATOR
2953                 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2954               range.setSecond(hi.getEndKey());
2955             }
2956           }
2957           // need to close files so delete can happen.
2958           LOG.debug("[" + thread + "] Closing region before moving data around: " +  hi);
2959           LOG.debug("[" + thread + "] Contained region dir before close");
2960           debugLsr(hi.getHdfsRegionDir());
2961           try {
2962             LOG.info("[" + thread + "] Closing region: " + hi);
2963             closeRegion(hi);
2964           } catch (IOException ioe) {
2965             LOG.warn("[" + thread + "] Was unable to close region " + hi
2966               + ".  Just continuing... ", ioe);
2967           } catch (InterruptedException e) {
2968             LOG.warn("[" + thread + "] Was unable to close region " + hi
2969               + ".  Just continuing... ", e);
2970           }
2971
2972           try {
2973             LOG.info("[" + thread + "] Offlining region: " + hi);
2974             offline(hi.getRegionName());
2975           } catch (IOException ioe) {
2976             LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2977               + ".  Just continuing... ", ioe);
2978           }
2979         }
2980
2981         // create new empty container region.
2982         HTableDescriptor htd = getTableInfo().getHTD();
2983         // from start key to end Key
2984         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2985             range.getSecond());
2986         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2987         LOG.info("[" + thread + "] Created new empty container region: " +
2988             newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2989         debugLsr(region.getRegionFileSystem().getRegionDir());
2990
2991         // all target regions are closed, should be able to safely cleanup.
2992         boolean didFix= false;
2993         Path target = region.getRegionFileSystem().getRegionDir();
2994         for (HbckInfo contained : overlap) {
2995           LOG.info("[" + thread + "] Merging " + contained  + " into " + target );
2996           int merges = mergeRegionDirs(target, contained);
2997           if (merges > 0) {
2998             didFix = true;
2999           }
3000         }
3001         if (didFix) {
3002           fixes++;
3003         }
3004       }
3005
3006       /**
3007        * Sideline some regions in a big overlap group so that it
3008        * will have fewer regions, and it is easier to merge them later on.
3009        *
3010        * @param bigOverlap the overlapped group with regions more than maxMerge
3011        * @throws IOException
3012        */
3013       void sidelineBigOverlaps(
3014           Collection<HbckInfo> bigOverlap) throws IOException {
3015         int overlapsToSideline = bigOverlap.size() - maxMerge;
3016         if (overlapsToSideline > maxOverlapsToSideline) {
3017           overlapsToSideline = maxOverlapsToSideline;
3018         }
3019         List<HbckInfo> regionsToSideline =
3020           RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
3021         FileSystem fs = FileSystem.get(conf);
3022         for (HbckInfo regionToSideline: regionsToSideline) {
3023           try {
3024             LOG.info("Closing region: " + regionToSideline);
3025             closeRegion(regionToSideline);
3026           } catch (IOException ioe) {
3027             LOG.warn("Was unable to close region " + regionToSideline
3028               + ".  Just continuing... ", ioe);
3029           } catch (InterruptedException e) {
3030             LOG.warn("Was unable to close region " + regionToSideline
3031               + ".  Just continuing... ", e);
3032           }
3033
3034           try {
3035             LOG.info("Offlining region: " + regionToSideline);
3036             offline(regionToSideline.getRegionName());
3037           } catch (IOException ioe) {
3038             LOG.warn("Unable to offline region from master: " + regionToSideline
3039               + ".  Just continuing... ", ioe);
3040           }
3041
3042           LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
3043           Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
3044           if (sidelineRegionDir != null) {
3045             sidelinedRegions.put(sidelineRegionDir, regionToSideline);
3046             LOG.info("After sidelined big overlapped region: "
3047               + regionToSideline.getRegionNameAsString()
3048               + " to " + sidelineRegionDir.toString());
3049             fixes++;
3050           }
3051         }
3052       }
3053     }
3054
3055     /**
3056      * Check the region chain (from META) of this table.  We are looking for
3057      * holes, overlaps, and cycles.
3058      * @return false if there are errors
3059      * @throws IOException
3060      */
3061     public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
3062       // When table is disabled no need to check for the region chain. Some of the regions
3063       // accidently if deployed, this below code might report some issues like missing start
3064       // or end regions or region hole in chain and may try to fix which is unwanted.
3065       if (isTableDisabled(this.tableName)) {
3066         return true;
3067       }
3068       int originalErrorsCount = errors.getErrorList().size();
3069       Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
3070       SortedSet<byte[]> splits = sc.getSplits();
3071
3072       byte[] prevKey = null;
3073       byte[] problemKey = null;
3074
3075       if (splits.size() == 0) {
3076         // no region for this table
3077         handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
3078       }
3079
3080       for (byte[] key : splits) {
3081         Collection<HbckInfo> ranges = regions.get(key);
3082         if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
3083           for (HbckInfo rng : ranges) {
3084             handler.handleRegionStartKeyNotEmpty(rng);
3085           }
3086         }
3087
3088         // check for degenerate ranges
3089         for (HbckInfo rng : ranges) {
3090           // special endkey case converts '' to null
3091           byte[] endKey = rng.getEndKey();
3092           endKey = (endKey.length == 0) ? null : endKey;
3093           if (Bytes.equals(rng.getStartKey(),endKey)) {
3094             handler.handleDegenerateRegion(rng);
3095           }
3096         }
3097
3098         if (ranges.size() == 1) {
3099           // this split key is ok -- no overlap, not a hole.
3100           if (problemKey != null) {
3101             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3102           }
3103           problemKey = null; // fell through, no more problem.
3104         } else if (ranges.size() > 1) {
3105           // set the new problem key group name, if already have problem key, just
3106           // keep using it.
3107           if (problemKey == null) {
3108             // only for overlap regions.
3109             LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
3110             problemKey = key;
3111           }
3112           overlapGroups.putAll(problemKey, ranges);
3113
3114           // record errors
3115           ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
3116           //  this dumb and n^2 but this shouldn't happen often
3117           for (HbckInfo r1 : ranges) {
3118             if (r1.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
3119             subRange.remove(r1);
3120             for (HbckInfo r2 : subRange) {
3121               if (r2.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
3122               if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
3123                 handler.handleDuplicateStartKeys(r1,r2);
3124               } else {
3125                 // overlap
3126                 handler.handleOverlapInRegionChain(r1, r2);
3127               }
3128             }
3129           }
3130
3131         } else if (ranges.size() == 0) {
3132           if (problemKey != null) {
3133             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3134           }
3135           problemKey = null;
3136
3137           byte[] holeStopKey = sc.getSplits().higher(key);
3138           // if higher key is null we reached the top.
3139           if (holeStopKey != null) {
3140             // hole
3141             handler.handleHoleInRegionChain(key, holeStopKey);
3142           }
3143         }
3144         prevKey = key;
3145       }
3146
3147       // When the last region of a table is proper and having an empty end key, 'prevKey'
3148       // will be null.
3149       if (prevKey != null) {
3150         handler.handleRegionEndKeyNotEmpty(prevKey);
3151       }
3152
3153       // TODO fold this into the TableIntegrityHandler
3154       if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
3155         boolean ok = handleOverlapsParallel(handler, prevKey);
3156         if (!ok) {
3157           return false;
3158         }
3159       } else {
3160         for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3161           handler.handleOverlapGroup(overlap);
3162         }
3163       }
3164
3165       if (details) {
3166         // do full region split map dump
3167         errors.print("---- Table '"  +  this.tableName
3168             + "': region split map");
3169         dump(splits, regions);
3170         errors.print("---- Table '"  +  this.tableName
3171             + "': overlap groups");
3172         dumpOverlapProblems(overlapGroups);
3173         errors.print("There are " + overlapGroups.keySet().size()
3174             + " overlap groups with " + overlapGroups.size()
3175             + " overlapping regions");
3176       }
3177       if (!sidelinedRegions.isEmpty()) {
3178         LOG.warn("Sidelined big overlapped regions, please bulk load them!");
3179         errors.print("---- Table '"  +  this.tableName
3180             + "': sidelined big overlapped regions");
3181         dumpSidelinedRegions(sidelinedRegions);
3182       }
3183       return errors.getErrorList().size() == originalErrorsCount;
3184     }
3185
3186     private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
3187         throws IOException {
3188       // we parallelize overlap handler for the case we have lots of groups to fix.  We can
3189       // safely assume each group is independent.
3190       List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
3191       List<Future<Void>> rets;
3192       for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3193         //
3194         merges.add(new WorkItemOverlapMerge(overlap, handler));
3195       }
3196       try {
3197         rets = executor.invokeAll(merges);
3198       } catch (InterruptedException e) {
3199         LOG.error("Overlap merges were interrupted", e);
3200         return false;
3201       }
3202       for(int i=0; i<merges.size(); i++) {
3203         WorkItemOverlapMerge work = merges.get(i);
3204         Future<Void> f = rets.get(i);
3205         try {
3206           f.get();
3207         } catch(ExecutionException e) {
3208           LOG.warn("Failed to merge overlap group" + work, e.getCause());
3209         } catch (InterruptedException e) {
3210           LOG.error("Waiting for overlap merges was interrupted", e);
3211           return false;
3212         }
3213       }
3214       return true;
3215     }
3216
3217     /**
3218      * This dumps data in a visually reasonable way for visual debugging
3219      *
3220      * @param splits
3221      * @param regions
3222      */
3223     void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
3224       // we display this way because the last end key should be displayed as well.
3225       StringBuilder sb = new StringBuilder();
3226       for (byte[] k : splits) {
3227         sb.setLength(0); // clear out existing buffer, if any.
3228         sb.append(Bytes.toStringBinary(k) + ":\t");
3229         for (HbckInfo r : regions.get(k)) {
3230           sb.append("[ "+ r.toString() + ", "
3231               + Bytes.toStringBinary(r.getEndKey())+ "]\t");
3232         }
3233         errors.print(sb.toString());
3234       }
3235     }
3236   }
3237
3238   public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
3239     // we display this way because the last end key should be displayed as
3240     // well.
3241     for (byte[] k : regions.keySet()) {
3242       errors.print(Bytes.toStringBinary(k) + ":");
3243       for (HbckInfo r : regions.get(k)) {
3244         errors.print("[ " + r.toString() + ", "
3245             + Bytes.toStringBinary(r.getEndKey()) + "]");
3246       }
3247       errors.print("----");
3248     }
3249   }
3250
3251   public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
3252     for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
3253       TableName tableName = entry.getValue().getTableName();
3254       Path path = entry.getKey();
3255       errors.print("This sidelined region dir should be bulk loaded: "
3256         + path.toString());
3257       errors.print("Bulk load command looks like: "
3258         + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
3259         + path.toUri().getPath() + " "+ tableName);
3260     }
3261   }
3262
3263   public Multimap<byte[], HbckInfo> getOverlapGroups(
3264       TableName table) {
3265     TableInfo ti = tablesInfo.get(table);
3266     return ti.overlapGroups;
3267   }
3268
3269   /**
3270    * Return a list of user-space table names whose metadata have not been
3271    * modified in the last few milliseconds specified by timelag
3272    * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
3273    * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
3274    * milliseconds specified by timelag, then the table is a candidate to be returned.
3275    * @return tables that have not been modified recently
3276    * @throws IOException if an error is encountered
3277    */
3278   HTableDescriptor[] getTables(AtomicInteger numSkipped) {
3279     List<TableName> tableNames = new ArrayList<TableName>();
3280     long now = EnvironmentEdgeManager.currentTime();
3281
3282     for (HbckInfo hbi : regionInfoMap.values()) {
3283       MetaEntry info = hbi.metaEntry;
3284
3285       // if the start key is zero, then we have found the first region of a table.
3286       // pick only those tables that were not modified in the last few milliseconds.
3287       if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
3288         if (info.modTime + timelag < now) {
3289           tableNames.add(info.getTable());
3290         } else {
3291           numSkipped.incrementAndGet(); // one more in-flux table
3292         }
3293       }
3294     }
3295     return getHTableDescriptors(tableNames);
3296   }
3297
3298   HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
3299     HTableDescriptor[] htd = new HTableDescriptor[0];
3300       LOG.info("getHTableDescriptors == tableNames => " + tableNames);
3301     try (Connection conn = ConnectionFactory.createConnection(getConf());
3302         Admin admin = conn.getAdmin()) {
3303       htd = admin.getTableDescriptorsByTableName(tableNames);
3304     } catch (IOException e) {
3305       LOG.debug("Exception getting table descriptors", e);
3306     }
3307     return htd;
3308   }
3309
3310   /**
3311    * Gets the entry in regionInfo corresponding to the the given encoded
3312    * region name. If the region has not been seen yet, a new entry is added
3313    * and returned.
3314    */
3315   private synchronized HbckInfo getOrCreateInfo(String name) {
3316     HbckInfo hbi = regionInfoMap.get(name);
3317     if (hbi == null) {
3318       hbi = new HbckInfo(null);
3319       regionInfoMap.put(name, hbi);
3320     }
3321     return hbi;
3322   }
3323
3324   private void checkAndFixTableLocks() throws IOException {
3325     TableLockChecker checker = new TableLockChecker(zkw, errors);
3326     checker.checkTableLocks();
3327
3328     if (this.fixTableLocks) {
3329       checker.fixExpiredTableLocks();
3330     }
3331   }
3332
3333   private void checkAndFixReplication() throws IOException {
3334     ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, connection, errors);
3335     checker.checkUnDeletedQueues();
3336
3337     if (checker.hasUnDeletedQueues() && this.fixReplication) {
3338       checker.fixUnDeletedQueues();
3339       setShouldRerun();
3340     }
3341   }
3342
3343   /**
3344     * Check values in regionInfo for hbase:meta
3345     * Check if zero or more than one regions with hbase:meta are found.
3346     * If there are inconsistencies (i.e. zero or more than one regions
3347     * pretend to be holding the hbase:meta) try to fix that and report an error.
3348     * @throws IOException from HBaseFsckRepair functions
3349     * @throws KeeperException
3350     * @throws InterruptedException
3351     */
3352   boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3353     Map<Integer, HbckInfo> metaRegions = new HashMap<Integer, HbckInfo>();
3354     for (HbckInfo value : regionInfoMap.values()) {
3355       if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3356         metaRegions.put(value.getReplicaId(), value);
3357       }
3358     }
3359     int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME)
3360         .getRegionReplication();
3361     boolean noProblem = true;
3362     // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
3363     // Check the deployed servers. It should be exactly one server for each replica.
3364     for (int i = 0; i < metaReplication; i++) {
3365       HbckInfo metaHbckInfo = metaRegions.remove(i);
3366       List<ServerName> servers = new ArrayList<ServerName>();
3367       if (metaHbckInfo != null) {
3368         servers = metaHbckInfo.deployedOn;
3369       }
3370       if (servers.size() != 1) {
3371         noProblem = false;
3372         if (servers.size() == 0) {
3373           assignMetaReplica(i);
3374         } else if (servers.size() > 1) {
3375           errors
3376           .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
3377                        metaHbckInfo.getReplicaId() + " is found on more than one region.");
3378           if (shouldFixAssignments()) {
3379             errors.print("Trying to fix a problem with hbase:meta, replicaId " +
3380                          metaHbckInfo.getReplicaId() +"..");
3381             setShouldRerun();
3382             // try fix it (treat is a dupe assignment)
3383             HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers);
3384           }
3385         }
3386       }
3387     }
3388     // unassign whatever is remaining in metaRegions. They are excess replicas.
3389     for (Map.Entry<Integer, HbckInfo> entry : metaRegions.entrySet()) {
3390       noProblem = false;
3391       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
3392           "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
3393           ", deployed " + metaRegions.size());
3394       if (shouldFixAssignments()) {
3395         errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
3396             " of hbase:meta..");
3397         setShouldRerun();
3398         unassignMetaReplica(entry.getValue());
3399       }
3400     }
3401     // if noProblem is false, rerun hbck with hopefully fixed META
3402     // if noProblem is true, no errors, so continue normally
3403     return noProblem;
3404   }
3405
3406   private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException,
3407   KeeperException {
3408     undeployRegions(hi);
3409     ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId()));
3410   }
3411
3412   private void assignMetaReplica(int replicaId)
3413       throws IOException, KeeperException, InterruptedException {
3414     errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
3415         replicaId +" is not found on any region.");
3416     if (shouldFixAssignments()) {
3417       errors.print("Trying to fix a problem with hbase:meta..");
3418       setShouldRerun();
3419       // try to fix it (treat it as unassigned region)
3420       HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
3421           HRegionInfo.FIRST_META_REGIONINFO, replicaId);
3422       HBaseFsckRepair.fixUnassigned(admin, h);
3423       HBaseFsckRepair.waitUntilAssigned(admin, h);
3424     }
3425   }
3426
3427   /**
3428    * Scan hbase:meta, adding all regions found to the regionInfo map.
3429    * @throws IOException if an error is encountered
3430    */
3431   boolean loadMetaEntries() throws IOException {
3432     MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() {
3433       int countRecord = 1;
3434
3435       // comparator to sort KeyValues with latest modtime
3436       final Comparator<Cell> comp = new Comparator<Cell>() {
3437         @Override
3438         public int compare(Cell k1, Cell k2) {
3439           return Long.compare(k1.getTimestamp(), k2.getTimestamp());
3440         }
3441       };
3442
3443       @Override
3444       public boolean visit(Result result) throws IOException {
3445         try {
3446
3447           // record the latest modification of this META record
3448           long ts =  Collections.max(result.listCells(), comp).getTimestamp();
3449           RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
3450           if (rl == null) {
3451             emptyRegionInfoQualifiers.add(result);
3452             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3453               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3454             return true;
3455           }
3456           ServerName sn = null;
3457           if (rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID) == null ||
3458               rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) {
3459             emptyRegionInfoQualifiers.add(result);
3460             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3461               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3462             return true;
3463           }
3464           HRegionInfo hri = rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo();
3465           if (!(isTableIncluded(hri.getTable())
3466               || hri.isMetaRegion())) {
3467             return true;
3468           }
3469           PairOfSameType<HRegionInfo> daughters = MetaTableAccessor.getDaughterRegions(result);
3470           for (HRegionLocation h : rl.getRegionLocations()) {
3471             if (h == null || h.getRegionInfo() == null) {
3472               continue;
3473             }
3474             sn = h.getServerName();
3475             hri = h.getRegionInfo();
3476
3477             MetaEntry m = null;
3478             if (hri.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
3479               m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3480             } else {
3481               m = new MetaEntry(hri, sn, ts, null, null);
3482             }
3483             HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3484             if (previous == null) {
3485               regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3486             } else if (previous.metaEntry == null) {
3487               previous.metaEntry = m;
3488             } else {
3489               throw new IOException("Two entries in hbase:meta are same " + previous);
3490             }
3491           }
3492           PairOfSameType<HRegionInfo> mergeRegions = MetaTableAccessor.getMergeRegions(result);
3493           for (HRegionInfo mergeRegion : new HRegionInfo[] {
3494               mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3495             if (mergeRegion != null) {
3496               // This region is already been merged
3497               HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3498               hbInfo.setMerged(true);
3499             }
3500           }
3501
3502           // show proof of progress to the user, once for every 100 records.
3503           if (countRecord % 100 == 0) {
3504             errors.progress();
3505           }
3506           countRecord++;
3507           return true;
3508         } catch (RuntimeException e) {
3509           LOG.error("Result=" + result);
3510           throw e;
3511         }
3512       }
3513     };
3514     if (!checkMetaOnly) {
3515       // Scan hbase:meta to pick up user regions
3516       MetaTableAccessor.fullScanRegions(connection, visitor);
3517     }
3518
3519     errors.print("");
3520     return true;
3521   }
3522
3523   /**
3524    * Stores the regioninfo entries scanned from META
3525    */
3526   static class MetaEntry extends HRegionInfo {
3527     ServerName regionServer;   // server hosting this region
3528     long modTime;          // timestamp of most recent modification metadata
3529     HRegionInfo splitA, splitB; //split daughters
3530
3531     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
3532       this(rinfo, regionServer, modTime, null, null);
3533     }
3534
3535     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
3536         HRegionInfo splitA, HRegionInfo splitB) {
3537       super(rinfo);
3538       this.regionServer = regionServer;
3539       this.modTime = modTime;
3540       this.splitA = splitA;
3541       this.splitB = splitB;
3542     }
3543
3544     @Override
3545     public boolean equals(Object o) {
3546       boolean superEq = super.equals(o);
3547       if (!superEq) {
3548         return superEq;
3549       }
3550
3551       MetaEntry me = (MetaEntry) o;
3552       if (!regionServer.equals(me.regionServer)) {
3553         return false;
3554       }
3555       return (modTime == me.modTime);
3556     }
3557
3558     @Override
3559     public int hashCode() {
3560       int hash = Arrays.hashCode(getRegionName());
3561       hash ^= getRegionId();
3562       hash ^= Arrays.hashCode(getStartKey());
3563       hash ^= Arrays.hashCode(getEndKey());
3564       hash ^= Boolean.valueOf(isOffline()).hashCode();
3565       hash ^= getTable().hashCode();
3566       if (regionServer != null) {
3567         hash ^= regionServer.hashCode();
3568       }
3569       hash ^= modTime;
3570       return hash;
3571     }
3572   }
3573
3574   /**
3575    * Stores the regioninfo entries from HDFS
3576    */
3577   static class HdfsEntry {
3578     HRegionInfo hri;
3579     Path hdfsRegionDir = null;
3580     long hdfsRegionDirModTime  = 0;
3581     boolean hdfsRegioninfoFilePresent = false;
3582     boolean hdfsOnlyEdits = false;
3583   }
3584
3585   /**
3586    * Stores the regioninfo retrieved from Online region servers.
3587    */
3588   static class OnlineEntry {
3589     HRegionInfo hri;
3590     ServerName hsa;
3591
3592     @Override
3593     public String toString() {
3594       return hsa.toString() + ";" + hri.getRegionNameAsString();
3595     }
3596   }
3597
3598   /**
3599    * Maintain information about a particular region.  It gathers information
3600    * from three places -- HDFS, META, and region servers.
3601    */
3602   public static class HbckInfo implements KeyRange {
3603     private MetaEntry metaEntry = null; // info in META
3604     private HdfsEntry hdfsEntry = null; // info in HDFS
3605     private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
3606     private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
3607     private boolean skipChecks = false; // whether to skip further checks to this region info.
3608     private boolean isMerged = false;// whether this region has already been merged into another one
3609     private int deployedReplicaId = HRegionInfo.DEFAULT_REPLICA_ID;
3610     private HRegionInfo primaryHRIForDeployedReplica = null;
3611
3612     HbckInfo(MetaEntry metaEntry) {
3613       this.metaEntry = metaEntry;
3614     }
3615
3616     public synchronized int getReplicaId() {
3617       return metaEntry != null? metaEntry.getReplicaId(): deployedReplicaId;
3618     }
3619
3620     public synchronized void addServer(HRegionInfo hri, ServerName server) {
3621       OnlineEntry rse = new OnlineEntry() ;
3622       rse.hri = hri;
3623       rse.hsa = server;
3624       this.deployedEntries.add(rse);
3625       this.deployedOn.add(server);
3626       // save the replicaId that we see deployed in the cluster
3627       this.deployedReplicaId = hri.getReplicaId();
3628       this.primaryHRIForDeployedReplica =
3629           RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
3630     }
3631
3632     @Override
3633     public synchronized String toString() {
3634       StringBuilder sb = new StringBuilder();
3635       sb.append("{ meta => ");
3636       sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3637       sb.append( ", hdfs => " + getHdfsRegionDir());
3638       sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3639       sb.append( ", replicaId => " + getReplicaId());
3640       sb.append(" }");
3641       return sb.toString();
3642     }
3643
3644     @Override
3645     public byte[] getStartKey() {
3646       if (this.metaEntry != null) {
3647         return this.metaEntry.getStartKey();
3648       } else if (this.hdfsEntry != null) {
3649         return this.hdfsEntry.hri.getStartKey();
3650       } else {
3651         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3652         return null;
3653       }
3654     }
3655
3656     @Override
3657     public byte[] getEndKey() {
3658       if (this.metaEntry != null) {
3659         return this.metaEntry.getEndKey();
3660       } else if (this.hdfsEntry != null) {
3661         return this.hdfsEntry.hri.getEndKey();
3662       } else {
3663         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3664         return null;
3665       }
3666     }
3667
3668     public TableName getTableName() {
3669       if (this.metaEntry != null) {
3670         return this.metaEntry.getTable();
3671       } else if (this.hdfsEntry != null) {
3672         // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3673         // so we get the name from the Path
3674         Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3675         return FSUtils.getTableName(tableDir);
3676       } else {
3677         // return the info from the first online/deployed hri
3678         for (OnlineEntry e : deployedEntries) {
3679           return e.hri.getTable();
3680         }
3681         return null;
3682       }
3683     }
3684
3685     public String getRegionNameAsString() {
3686       if (metaEntry != null) {
3687         return metaEntry.getRegionNameAsString();
3688       } else if (hdfsEntry != null) {
3689         if (hdfsEntry.hri != null) {
3690           return hdfsEntry.hri.getRegionNameAsString();
3691         }
3692       } else {
3693         // return the info from the first online/deployed hri
3694         for (OnlineEntry e : deployedEntries) {
3695           return e.hri.getRegionNameAsString();
3696         }
3697       }
3698       return null;
3699     }
3700
3701     public byte[] getRegionName() {
3702       if (metaEntry != null) {
3703         return metaEntry.getRegionName();
3704       } else if (hdfsEntry != null) {
3705         return hdfsEntry.hri.getRegionName();
3706       } else {
3707         // return the info from the first online/deployed hri
3708         for (OnlineEntry e : deployedEntries) {
3709           return e.hri.getRegionName();
3710         }
3711         return null;
3712       }
3713     }
3714
3715     public HRegionInfo getPrimaryHRIForDeployedReplica() {
3716       return primaryHRIForDeployedReplica;
3717     }
3718
3719     Path getHdfsRegionDir() {
3720       if (hdfsEntry == null) {
3721         return null;
3722       }
3723       return hdfsEntry.hdfsRegionDir;
3724     }
3725
3726     boolean containsOnlyHdfsEdits() {
3727       if (hdfsEntry == null) {
3728         return false;
3729       }
3730       return hdfsEntry.hdfsOnlyEdits;
3731     }
3732
3733     boolean isHdfsRegioninfoPresent() {
3734       if (hdfsEntry == null) {
3735         return false;
3736       }
3737       return hdfsEntry.hdfsRegioninfoFilePresent;
3738     }
3739
3740     long getModTime() {
3741       if (hdfsEntry == null) {
3742         return 0;
3743       }
3744       return hdfsEntry.hdfsRegionDirModTime;
3745     }
3746
3747     HRegionInfo getHdfsHRI() {
3748       if (hdfsEntry == null) {
3749         return null;
3750       }
3751       return hdfsEntry.hri;
3752     }
3753
3754     public void setSkipChecks(boolean skipChecks) {
3755       this.skipChecks = skipChecks;
3756     }
3757
3758     public boolean isSkipChecks() {
3759       return skipChecks;
3760     }
3761
3762     public void setMerged(boolean isMerged) {
3763       this.isMerged = isMerged;
3764     }
3765
3766     public boolean isMerged() {
3767       return this.isMerged;
3768     }
3769   }
3770
3771   final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3772     @Override
3773     public int compare(HbckInfo l, HbckInfo r) {
3774       if (l == r) {
3775         // same instance
3776         return 0;
3777       }
3778
3779       int tableCompare = l.getTableName().compareTo(r.getTableName());
3780       if (tableCompare != 0) {
3781         return tableCompare;
3782       }
3783
3784       int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3785           l.getStartKey(), r.getStartKey());
3786       if (startComparison != 0) {
3787         return startComparison;
3788       }
3789
3790       // Special case for absolute endkey
3791       byte[] endKey = r.getEndKey();
3792       endKey = (endKey.length == 0) ? null : endKey;
3793       byte[] endKey2 = l.getEndKey();
3794       endKey2 = (endKey2.length == 0) ? null : endKey2;
3795       int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3796           endKey2,  endKey);
3797
3798       if (endComparison != 0) {
3799         return endComparison;
3800       }
3801
3802       // use regionId as tiebreaker.
3803       // Null is considered after all possible values so make it bigger.
3804       if (l.hdfsEntry == null && r.hdfsEntry == null) {
3805         return 0;
3806       }
3807       if (l.hdfsEntry == null && r.hdfsEntry != null) {
3808         return 1;
3809       }
3810       // l.hdfsEntry must not be null
3811       if (r.hdfsEntry == null) {
3812         return -1;
3813       }
3814       // both l.hdfsEntry and r.hdfsEntry must not be null.
3815       return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3816     }
3817   };
3818
3819   /**
3820    * Prints summary of all tables found on the system.
3821    */
3822   private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
3823     StringBuilder sb = new StringBuilder();
3824     int numOfSkippedRegions;
3825     errors.print("Summary:");
3826     for (TableInfo tInfo : tablesInfo.values()) {
3827       numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
3828           skippedRegions.get(tInfo.getName()).size() : 0;
3829
3830       if (errors.tableHasErrors(tInfo)) {
3831         errors.print("Table " + tInfo.getName() + " is inconsistent.");
3832       } else if (numOfSkippedRegions > 0){
3833         errors.print("Table " + tInfo.getName() + " is okay (with "
3834           + numOfSkippedRegions + " skipped regions).");
3835       }
3836       else {
3837         errors.print("Table " + tInfo.getName() + " is okay.");
3838       }
3839       errors.print("    Number of regions: " + tInfo.getNumRegions());
3840       if (numOfSkippedRegions > 0) {
3841         Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
3842         System.out.println("    Number of skipped regions: " + numOfSkippedRegions);
3843         System.out.println("      List of skipped regions:");
3844         for(String sr : skippedRegionStrings) {
3845           System.out.println("        " + sr);
3846         }
3847       }
3848       sb.setLength(0); // clear out existing buffer, if any.
3849       sb.append("    Deployed on: ");
3850       for (ServerName server : tInfo.deployedOn) {
3851         sb.append(" " + server.toString());
3852       }
3853       errors.print(sb.toString());
3854     }
3855   }
3856
3857   static ErrorReporter getErrorReporter(
3858       final Configuration conf) throws ClassNotFoundException {
3859     Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3860     return ReflectionUtils.newInstance(reporter, conf);
3861   }
3862
3863   public interface ErrorReporter {
3864     enum ERROR_CODE {
3865       UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3866       NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META,
3867       NOT_DEPLOYED,
3868       MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3869       FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3870       HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3871       ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3872       WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR, ORPHAN_TABLE_STATE,
3873       NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE
3874     }
3875     void clear();
3876     void report(String message);
3877     void reportError(String message);
3878     void reportError(ERROR_CODE errorCode, String message);
3879     void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3880     void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3881     void reportError(
3882       ERROR_CODE errorCode,
3883       String message,
3884       TableInfo table,
3885       HbckInfo info1,
3886       HbckInfo info2
3887     );
3888     int summarize();
3889     void detail(String details);
3890     ArrayList<ERROR_CODE> getErrorList();
3891     void progress();
3892     void print(String message);
3893     void resetErrors();
3894     boolean tableHasErrors(TableInfo table);
3895   }
3896
3897   static class PrintingErrorReporter implements ErrorReporter {
3898     public int errorCount = 0;
3899     private int showProgress;
3900     // How frequently calls to progress() will create output
3901     private static final int progressThreshold = 100;
3902
3903     Set<TableInfo> errorTables = new HashSet<TableInfo>();
3904
3905     // for use by unit tests to verify which errors were discovered
3906     private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3907
3908     @Override
3909     public void clear() {
3910       errorTables.clear();
3911       errorList.clear();
3912       errorCount = 0;
3913     }
3914
3915     @Override
3916     public synchronized void reportError(ERROR_CODE errorCode, String message) {
3917       if (errorCode == ERROR_CODE.WRONG_USAGE) {
3918         System.err.println(message);
3919         return;
3920       }
3921
3922       errorList.add(errorCode);
3923       if (!summary) {
3924         System.out.println("ERROR: " + message);
3925       }
3926       errorCount++;
3927       showProgress = 0;
3928     }
3929
3930     @Override
3931     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3932       errorTables.add(table);
3933       reportError(errorCode, message);
3934     }
3935
3936     @Override
3937     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3938                                          HbckInfo info) {
3939       errorTables.add(table);
3940       String reference = "(region " + info.getRegionNameAsString() + ")";
3941       reportError(errorCode, reference + " " + message);
3942     }
3943
3944     @Override
3945     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3946                                          HbckInfo info1, HbckInfo info2) {
3947       errorTables.add(table);
3948       String reference = "(regions " + info1.getRegionNameAsString()
3949           + " and " + info2.getRegionNameAsString() + ")";
3950       reportError(errorCode, reference + " " + message);
3951     }
3952
3953     @Override
3954     public synchronized void reportError(String message) {
3955       reportError(ERROR_CODE.UNKNOWN, message);
3956     }
3957
3958     /**
3959      * Report error information, but do not increment the error count.  Intended for cases
3960      * where the actual error would have been reported previously.
3961      * @param message
3962      */
3963     @Override
3964     public synchronized void report(String message) {
3965       if (! summary) {
3966         System.out.println("ERROR: " + message);
3967       }
3968       showProgress = 0;
3969     }
3970
3971     @Override
3972     public synchronized int summarize() {
3973       System.out.println(Integer.toString(errorCount) +
3974                          " inconsistencies detected.");
3975       if (errorCount == 0) {
3976         System.out.println("Status: OK");
3977         return 0;
3978       } else {
3979         System.out.println("Status: INCONSISTENT");
3980         return -1;
3981       }
3982     }
3983
3984     @Override
3985     public ArrayList<ERROR_CODE> getErrorList() {
3986       return errorList;
3987     }
3988
3989     @Override
3990     public synchronized void print(String message) {
3991       if (!summary) {
3992         System.out.println(message);
3993       }
3994     }
3995
3996     @Override
3997     public boolean tableHasErrors(TableInfo table) {
3998       return errorTables.contains(table);
3999     }
4000
4001     @Override
4002     public void resetErrors() {
4003       errorCount = 0;
4004     }
4005
4006     @Override
4007     public synchronized void detail(String message) {
4008       if (details) {
4009         System.out.println(message);
4010       }
4011       showProgress = 0;
4012     }
4013
4014     @Override
4015     public synchronized void progress() {
4016       if (showProgress++ == progressThreshold) {
4017         if (!summary) {
4018           System.out.print(".");
4019         }
4020         showProgress = 0;
4021       }
4022     }
4023   }
4024
4025   /**
4026    * Contact a region server and get all information from it
4027    */
4028   static class WorkItemRegion implements Callable<Void> {
4029     private final HBaseFsck hbck;
4030     private final ServerName rsinfo;
4031     private final ErrorReporter errors;
4032     private final ClusterConnection connection;
4033
4034     WorkItemRegion(HBaseFsck hbck, ServerName info,
4035                    ErrorReporter errors, ClusterConnection connection) {
4036       this.hbck = hbck;
4037       this.rsinfo = info;
4038       this.errors = errors;
4039       this.connection = connection;
4040     }
4041
4042     @Override
4043     public synchronized Void call() throws IOException {
4044       errors.progress();
4045       try {
4046         BlockingInterface server = connection.getAdmin(rsinfo);
4047
4048         // list all online regions from this region server
4049         List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
4050         regions = filterRegions(regions);
4051
4052         if (details) {
4053           errors.detail("RegionServer: " + rsinfo.getServerName() +
4054                            " number of regions: " + regions.size());
4055           for (HRegionInfo rinfo: regions) {
4056             errors.detail("  " + rinfo.getRegionNameAsString() +
4057                              " id: " + rinfo.getRegionId() +
4058                              " encoded_name: " + rinfo.getEncodedName() +
4059                              " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
4060                              " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
4061           }
4062         }
4063
4064         // check to see if the existence of this region matches the region in META
4065         for (HRegionInfo r:regions) {
4066           HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
4067           hbi.addServer(r, rsinfo);
4068         }
4069       } catch (IOException e) {          // unable to connect to the region server.
4070         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
4071           " Unable to fetch region information. " + e);
4072         throw e;
4073       }
4074       return null;
4075     }
4076
4077     private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
4078       List<HRegionInfo> ret = Lists.newArrayList();
4079       for (HRegionInfo hri : regions) {
4080         if (hri.isMetaTable() || (!hbck.checkMetaOnly
4081             && hbck.isTableIncluded(hri.getTable()))) {
4082           ret.add(hri);
4083         }
4084       }
4085       return ret;
4086     }
4087   }
4088
4089   /**
4090    * Contact hdfs and get all information about specified table directory into
4091    * regioninfo list.
4092    */
4093   class WorkItemHdfsDir implements Callable<Void> {
4094     private FileStatus tableDir;
4095     private ErrorReporter errors;
4096     private FileSystem fs;
4097
4098     WorkItemHdfsDir(FileSystem fs, ErrorReporter errors,
4099                     FileStatus status) {
4100       this.fs = fs;
4101       this.tableDir = status;
4102       this.errors = errors;
4103     }
4104
4105     @Override
4106     public synchronized Void call() throws InterruptedException, ExecutionException {
4107       final Vector<Exception> exceptions = new Vector<Exception>();
4108
4109       try {
4110         final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
4111         final List<Future<?>> futures = new ArrayList<Future<?>>(regionDirs.length);
4112
4113         for (final FileStatus regionDir : regionDirs) {
4114           errors.progress();
4115           final String encodedName = regionDir.getPath().getName();
4116           // ignore directories that aren't hexadecimal
4117           if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
4118             continue;
4119           }
4120
4121           if (!exceptions.isEmpty()) {
4122             break;
4123           }
4124
4125           futures.add(executor.submit(new Runnable() {
4126             @Override
4127             public void run() {
4128               try {
4129                 LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
4130
4131                 Path regioninfoFile = new Path(regionDir.getPath(), HRegionFileSystem.REGION_INFO_FILE);
4132                 boolean regioninfoFileExists = fs.exists(regioninfoFile);
4133
4134                 if (!regioninfoFileExists) {
4135                   // As tables become larger it is more and more likely that by the time you
4136                   // reach a given region that it will be gone due to region splits/merges.
4137                   if (!fs.exists(regionDir.getPath())) {
4138                     LOG.warn("By the time we tried to process this region dir it was already gone: "
4139                         + regionDir.getPath());
4140                     return;
4141                   }
4142                 }
4143
4144                 HbckInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
4145                 HdfsEntry he = new HdfsEntry();
4146                 synchronized (hbi) {
4147                   if (hbi.getHdfsRegionDir() != null) {
4148                     errors.print("Directory " + encodedName + " duplicate??" +
4149                                  hbi.getHdfsRegionDir());
4150                   }
4151
4152                   he.hdfsRegionDir = regionDir.getPath();
4153                   he.hdfsRegionDirModTime = regionDir.getModificationTime();
4154                   he.hdfsRegioninfoFilePresent = regioninfoFileExists;
4155                   // we add to orphan list when we attempt to read .regioninfo
4156
4157                   // Set a flag if this region contains only edits
4158                   // This is special case if a region is left after split
4159                   he.hdfsOnlyEdits = true;
4160                   FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
4161                   Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath());
4162                   for (FileStatus subDir : subDirs) {
4163                     errors.progress();
4164                     String sdName = subDir.getPath().getName();
4165                     if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
4166                       he.hdfsOnlyEdits = false;
4167                       break;
4168                     }
4169                   }
4170                   hbi.hdfsEntry = he;
4171                 }
4172               } catch (Exception e) {
4173                 LOG.error("Could not load region dir", e);
4174                 exceptions.add(e);
4175               }
4176             }
4177           }));
4178         }
4179
4180         // Ensure all pending tasks are complete (or that we run into an exception)
4181         for (Future<?> f : futures) {
4182           if (!exceptions.isEmpty()) {
4183             break;
4184           }
4185           try {
4186             f.get();
4187           } catch (ExecutionException e) {
4188             LOG.error("Unexpected exec exception!  Should've been caught already.  (Bug?)", e);
4189             // Shouldn't happen, we already logged/caught any exceptions in the Runnable
4190           };
4191         }
4192       } catch (IOException e) {
4193         LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
4194         exceptions.add(e);
4195       } finally {
4196         if (!exceptions.isEmpty()) {
4197           errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
4198               + tableDir.getPath().getName()
4199               + " Unable to fetch all HDFS region information. ");
4200           // Just throw the first exception as an indication something bad happened
4201           // Don't need to propagate all the exceptions, we already logged them all anyway
4202           throw new ExecutionException("First exception in WorkItemHdfsDir", exceptions.firstElement());
4203         }
4204       }
4205       return null;
4206     }
4207   }
4208
4209   /**
4210    * Contact hdfs and get all information about specified table directory into
4211    * regioninfo list.
4212    */
4213   static class WorkItemHdfsRegionInfo implements Callable<Void> {
4214     private HbckInfo hbi;
4215     private HBaseFsck hbck;
4216     private ErrorReporter errors;
4217
4218     WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
4219       this.hbi = hbi;
4220       this.hbck = hbck;
4221       this.errors = errors;
4222     }
4223
4224     @Override
4225     public synchronized Void call() throws IOException {
4226       // only load entries that haven't been loaded yet.
4227       if (hbi.getHdfsHRI() == null) {
4228         try {
4229           errors.progress();
4230           hbck.loadHdfsRegioninfo(hbi);
4231         } catch (IOException ioe) {
4232           String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
4233               + hbi.getTableName() + " in hdfs dir "
4234               + hbi.getHdfsRegionDir()
4235               + "!  It may be an invalid format or version file.  Treating as "
4236               + "an orphaned regiondir.";
4237           errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
4238           try {
4239             hbck.debugLsr(hbi.getHdfsRegionDir());
4240           } catch (IOException ioe2) {
4241             LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
4242             throw ioe2;
4243           }
4244           hbck.orphanHdfsDirs.add(hbi);
4245           throw ioe;
4246         }
4247       }
4248       return null;
4249     }
4250   };
4251
4252   /**
4253    * Display the full report from fsck. This displays all live and dead region
4254    * servers, and all known regions.
4255    */
4256   public static void setDisplayFullReport() {
4257     details = true;
4258   }
4259
4260   /**
4261    * Set exclusive mode.
4262    */
4263   public static void setForceExclusive() {
4264     forceExclusive = true;
4265   }
4266
4267   /**
4268    * Only one instance of hbck can modify HBase at a time.
4269    */
4270   public boolean isExclusive() {
4271     return fixAny || forceExclusive;
4272   }
4273
4274   /**
4275    * Set summary mode.
4276    * Print only summary of the tables and status (OK or INCONSISTENT)
4277    */
4278   static void setSummary() {
4279     summary = true;
4280   }
4281
4282   /**
4283    * Set hbase:meta check mode.
4284    * Print only info about hbase:meta table deployment/state
4285    */
4286   void setCheckMetaOnly() {
4287     checkMetaOnly = true;
4288   }
4289
4290   /**
4291    * Set region boundaries check mode.
4292    */
4293   void setRegionBoundariesCheck() {
4294     checkRegionBoundaries = true;
4295   }
4296
4297   /**
4298    * Set table locks fix mode.
4299    * Delete table locks held for a long time
4300    */
4301   public void setFixTableLocks(boolean shouldFix) {
4302     fixTableLocks = shouldFix;
4303     fixAny |= shouldFix;
4304   }
4305
4306   /**
4307    * Set replication fix mode.
4308    */
4309   public void setFixReplication(boolean shouldFix) {
4310     fixReplication = shouldFix;
4311     fixAny |= shouldFix;
4312   }
4313
4314   /**
4315    * Check if we should rerun fsck again. This checks if we've tried to
4316    * fix something and we should rerun fsck tool again.
4317    * Display the full report from fsck. This displays all live and dead
4318    * region servers, and all known regions.
4319    */
4320   void setShouldRerun() {
4321     rerun = true;
4322   }
4323
4324   boolean shouldRerun() {
4325     return rerun;
4326   }
4327
4328   /**
4329    * Fix inconsistencies found by fsck. This should try to fix errors (if any)
4330    * found by fsck utility.
4331    */
4332   public void setFixAssignments(boolean shouldFix) {
4333     fixAssignments = shouldFix;
4334     fixAny |= shouldFix;
4335   }
4336
4337   boolean shouldFixAssignments() {
4338     return fixAssignments;
4339   }
4340
4341   public void setFixMeta(boolean shouldFix) {
4342     fixMeta = shouldFix;
4343     fixAny |= shouldFix;
4344   }
4345
4346   boolean shouldFixMeta() {
4347     return fixMeta;
4348   }
4349
4350   public void setFixEmptyMetaCells(boolean shouldFix) {
4351     fixEmptyMetaCells = shouldFix;
4352     fixAny |= shouldFix;
4353   }
4354
4355   boolean shouldFixEmptyMetaCells() {
4356     return fixEmptyMetaCells;
4357   }
4358
4359   public void setCheckHdfs(boolean checking) {
4360     checkHdfs = checking;
4361   }
4362
4363   boolean shouldCheckHdfs() {
4364     return checkHdfs;
4365   }
4366
4367   public void setFixHdfsHoles(boolean shouldFix) {
4368     fixHdfsHoles = shouldFix;
4369     fixAny |= shouldFix;
4370   }
4371
4372   boolean shouldFixHdfsHoles() {
4373     return fixHdfsHoles;
4374   }
4375
4376   public void setFixTableOrphans(boolean shouldFix) {
4377     fixTableOrphans = shouldFix;
4378     fixAny |= shouldFix;
4379   }
4380
4381   boolean shouldFixTableOrphans() {
4382     return fixTableOrphans;
4383   }
4384
4385   public void setFixHdfsOverlaps(boolean shouldFix) {
4386     fixHdfsOverlaps = shouldFix;
4387     fixAny |= shouldFix;
4388   }
4389
4390   boolean shouldFixHdfsOverlaps() {
4391     return fixHdfsOverlaps;
4392   }
4393
4394   public void setFixHdfsOrphans(boolean shouldFix) {
4395     fixHdfsOrphans = shouldFix;
4396     fixAny |= shouldFix;
4397   }
4398
4399   boolean shouldFixHdfsOrphans() {
4400     return fixHdfsOrphans;
4401   }
4402
4403   public void setFixVersionFile(boolean shouldFix) {
4404     fixVersionFile = shouldFix;
4405     fixAny |= shouldFix;
4406   }
4407
4408   public boolean shouldFixVersionFile() {
4409     return fixVersionFile;
4410   }
4411
4412   public void setSidelineBigOverlaps(boolean sbo) {
4413     this.sidelineBigOverlaps = sbo;
4414   }
4415
4416   public boolean shouldSidelineBigOverlaps() {
4417     return sidelineBigOverlaps;
4418   }
4419
4420   public void setFixSplitParents(boolean shouldFix) {
4421     fixSplitParents = shouldFix;
4422     fixAny |= shouldFix;
4423   }
4424
4425   boolean shouldFixSplitParents() {
4426     return fixSplitParents;
4427   }
4428
4429   public void setFixReferenceFiles(boolean shouldFix) {
4430     fixReferenceFiles = shouldFix;
4431     fixAny |= shouldFix;
4432   }
4433
4434   boolean shouldFixReferenceFiles() {
4435     return fixReferenceFiles;
4436   }
4437
4438   public boolean shouldIgnorePreCheckPermission() {
4439     return !fixAny || ignorePreCheckPermission;
4440   }
4441
4442   public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
4443     this.ignorePreCheckPermission = ignorePreCheckPermission;
4444   }
4445
4446   /**
4447    * @param mm maximum number of regions to merge into a single region.
4448    */
4449   public void setMaxMerge(int mm) {
4450     this.maxMerge = mm;
4451   }
4452
4453   public int getMaxMerge() {
4454     return maxMerge;
4455   }
4456
4457   public void setMaxOverlapsToSideline(int mo) {
4458     this.maxOverlapsToSideline = mo;
4459   }
4460
4461   public int getMaxOverlapsToSideline() {
4462     return maxOverlapsToSideline;
4463   }
4464
4465   /**
4466    * Only check/fix tables specified by the list,
4467    * Empty list means all tables are included.
4468    */
4469   boolean isTableIncluded(TableName table) {
4470     return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
4471   }
4472
4473   public void includeTable(TableName table) {
4474     tablesIncluded.add(table);
4475   }
4476
4477   Set<TableName> getIncludedTables() {
4478     return new HashSet<TableName>(tablesIncluded);
4479   }
4480
4481   /**
4482    * We are interested in only those tables that have not changed their state in
4483    * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
4484    * @param seconds - the time in seconds
4485    */
4486   public void setTimeLag(long seconds) {
4487     timelag = seconds * 1000; // convert to milliseconds
4488   }
4489
4490   /**
4491    *
4492    * @param sidelineDir - HDFS path to sideline data
4493    */
4494   public void setSidelineDir(String sidelineDir) {
4495     this.sidelineDir = new Path(sidelineDir);
4496   }
4497
4498   protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4499     return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4500   }
4501
4502   public HFileCorruptionChecker getHFilecorruptionChecker() {
4503     return hfcc;
4504   }
4505
4506   public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4507     this.hfcc = hfcc;
4508   }
4509
4510   public void setRetCode(int code) {
4511     this.retcode = code;
4512   }
4513
4514   public int getRetCode() {
4515     return retcode;
4516   }
4517
4518   protected HBaseFsck printUsageAndExit() {
4519     StringWriter sw = new StringWriter(2048);
4520     PrintWriter out = new PrintWriter(sw);
4521     out.println("Usage: fsck [opts] {only tables}");
4522     out.println(" where [opts] are:");
4523     out.println("   -help Display help options (this)");
4524     out.println("   -details Display full report of all regions.");
4525     out.println("   -timelag <timeInSeconds>  Process only regions that " +
4526                        " have not experienced any metadata updates in the last " +
4527                        " <timeInSeconds> seconds.");
4528     out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4529         " before checking if the fix worked if run with -fix");
4530     out.println("   -summary Print only summary of the tables and status.");
4531     out.println("   -metaonly Only check the state of the hbase:meta table.");
4532     out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4533     out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
4534     out.println("   -exclusive Abort if another hbck is exclusive or fixing.");
4535
4536     out.println("");
4537     out.println("  Metadata Repair options: (expert features, use with caution!)");
4538     out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
4539     out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
4540     out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
4541     out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
4542         + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4543     out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
4544     out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
4545     out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4546     out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
4547     out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
4548     out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4549     out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
4550     out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4551     out.println("   -fixSplitParents  Try to force offline split parents to be online.");
4552     out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
4553     out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
4554     out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
4555         + " (empty REGIONINFO_QUALIFIER rows)");
4556
4557     out.println("");
4558     out.println("  Datafile Repair options: (expert features, use with caution!)");
4559     out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
4560     out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
4561
4562     out.println("");
4563     out.println("  Metadata Repair shortcuts");
4564     out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4565         "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles -fixTableLocks");
4566     out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4567
4568     out.println("");
4569     out.println("  Table lock options");
4570     out.println("   -fixTableLocks    Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
4571
4572     out.println("");
4573     out.println(" Replication options");
4574     out.println("   -fixReplication   Deletes replication queues for removed peers");
4575
4576     out.flush();
4577     errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4578
4579     setRetCode(-2);
4580     return this;
4581   }
4582
4583   /**
4584    * Main program
4585    *
4586    * @param args
4587    * @throws Exception
4588    */
4589   public static void main(String[] args) throws Exception {
4590     // create a fsck object
4591     Configuration conf = HBaseConfiguration.create();
4592     Path hbasedir = FSUtils.getRootDir(conf);
4593     URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4594     FSUtils.setFsDefault(conf, new Path(defaultFs));
4595     int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4596     System.exit(ret);
4597   }
4598
4599   /**
4600    * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
4601    */
4602   static class HBaseFsckTool extends Configured implements Tool {
4603     HBaseFsckTool(Configuration conf) { super(conf); }
4604     @Override
4605     public int run(String[] args) throws Exception {
4606       HBaseFsck hbck = new HBaseFsck(getConf());
4607       hbck.exec(hbck.executor, args);
4608       hbck.close();
4609       return hbck.getRetCode();
4610     }
4611   };
4612
4613
4614   public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
4615     ServiceException, InterruptedException {
4616     long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4617
4618     boolean checkCorruptHFiles = false;
4619     boolean sidelineCorruptHFiles = false;
4620
4621     // Process command-line args.
4622     for (int i = 0; i < args.length; i++) {
4623       String cmd = args[i];
4624       if (cmd.equals("-help") || cmd.equals("-h")) {
4625         return printUsageAndExit();
4626       } else if (cmd.equals("-details")) {
4627         setDisplayFullReport();
4628       } else if (cmd.equals("-exclusive")) {
4629         setForceExclusive();
4630       } else if (cmd.equals("-timelag")) {
4631         if (i == args.length - 1) {
4632           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4633           return printUsageAndExit();
4634         }
4635         try {
4636           long timelag = Long.parseLong(args[i+1]);
4637           setTimeLag(timelag);
4638         } catch (NumberFormatException e) {
4639           errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4640           return printUsageAndExit();
4641         }
4642         i++;
4643       } else if (cmd.equals("-sleepBeforeRerun")) {
4644         if (i == args.length - 1) {
4645           errors.reportError(ERROR_CODE.WRONG_USAGE,
4646             "HBaseFsck: -sleepBeforeRerun needs a value.");
4647           return printUsageAndExit();
4648         }
4649         try {
4650           sleepBeforeRerun = Long.parseLong(args[i+1]);
4651         } catch (NumberFormatException e) {
4652           errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4653           return printUsageAndExit();
4654         }
4655         i++;
4656       } else if (cmd.equals("-sidelineDir")) {
4657         if (i == args.length - 1) {
4658           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4659           return printUsageAndExit();
4660         }
4661         i++;
4662         setSidelineDir(args[i]);
4663       } else if (cmd.equals("-fix")) {
4664         errors.reportError(ERROR_CODE.WRONG_USAGE,
4665           "This option is deprecated, please use  -fixAssignments instead.");
4666         setFixAssignments(true);
4667       } else if (cmd.equals("-fixAssignments")) {
4668         setFixAssignments(true);
4669       } else if (cmd.equals("-fixMeta")) {
4670         setFixMeta(true);
4671       } else if (cmd.equals("-noHdfsChecking")) {
4672         setCheckHdfs(false);
4673       } else if (cmd.equals("-fixHdfsHoles")) {
4674         setFixHdfsHoles(true);
4675       } else if (cmd.equals("-fixHdfsOrphans")) {
4676         setFixHdfsOrphans(true);
4677       } else if (cmd.equals("-fixTableOrphans")) {
4678         setFixTableOrphans(true);
4679       } else if (cmd.equals("-fixHdfsOverlaps")) {
4680         setFixHdfsOverlaps(true);
4681       } else if (cmd.equals("-fixVersionFile")) {
4682         setFixVersionFile(true);
4683       } else if (cmd.equals("-sidelineBigOverlaps")) {
4684         setSidelineBigOverlaps(true);
4685       } else if (cmd.equals("-fixSplitParents")) {
4686         setFixSplitParents(true);
4687       } else if (cmd.equals("-ignorePreCheckPermission")) {
4688         setIgnorePreCheckPermission(true);
4689       } else if (cmd.equals("-checkCorruptHFiles")) {
4690         checkCorruptHFiles = true;
4691       } else if (cmd.equals("-sidelineCorruptHFiles")) {
4692         sidelineCorruptHFiles = true;
4693       } else if (cmd.equals("-fixReferenceFiles")) {
4694         setFixReferenceFiles(true);
4695       } else if (cmd.equals("-fixEmptyMetaCells")) {
4696         setFixEmptyMetaCells(true);
4697       } else if (cmd.equals("-repair")) {
4698         // this attempts to merge overlapping hdfs regions, needs testing
4699         // under load
4700         setFixHdfsHoles(true);
4701         setFixHdfsOrphans(true);
4702         setFixMeta(true);
4703         setFixAssignments(true);
4704         setFixHdfsOverlaps(true);
4705         setFixVersionFile(true);
4706         setSidelineBigOverlaps(true);
4707         setFixSplitParents(false);
4708         setCheckHdfs(true);
4709         setFixReferenceFiles(true);
4710         setFixTableLocks(true);
4711       } else if (cmd.equals("-repairHoles")) {
4712         // this will make all missing hdfs regions available but may lose data
4713         setFixHdfsHoles(true);
4714         setFixHdfsOrphans(false);
4715         setFixMeta(true);
4716         setFixAssignments(true);
4717         setFixHdfsOverlaps(false);
4718         setSidelineBigOverlaps(false);
4719         setFixSplitParents(false);
4720         setCheckHdfs(true);
4721       } else if (cmd.equals("-maxOverlapsToSideline")) {
4722         if (i == args.length - 1) {
4723           errors.reportError(ERROR_CODE.WRONG_USAGE,
4724             "-maxOverlapsToSideline needs a numeric value argument.");
4725           return printUsageAndExit();
4726         }
4727         try {
4728           int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
4729           setMaxOverlapsToSideline(maxOverlapsToSideline);
4730         } catch (NumberFormatException e) {
4731           errors.reportError(ERROR_CODE.WRONG_USAGE,
4732             "-maxOverlapsToSideline needs a numeric value argument.");
4733           return printUsageAndExit();
4734         }
4735         i++;
4736       } else if (cmd.equals("-maxMerge")) {
4737         if (i == args.length - 1) {
4738           errors.reportError(ERROR_CODE.WRONG_USAGE,
4739             "-maxMerge needs a numeric value argument.");
4740           return printUsageAndExit();
4741         }
4742         try {
4743           int maxMerge = Integer.parseInt(args[i+1]);
4744           setMaxMerge(maxMerge);
4745         } catch (NumberFormatException e) {
4746           errors.reportError(ERROR_CODE.WRONG_USAGE,
4747             "-maxMerge needs a numeric value argument.");
4748           return printUsageAndExit();
4749         }
4750         i++;
4751       } else if (cmd.equals("-summary")) {
4752         setSummary();
4753       } else if (cmd.equals("-metaonly")) {
4754         setCheckMetaOnly();
4755       } else if (cmd.equals("-boundaries")) {
4756         setRegionBoundariesCheck();
4757       } else if (cmd.equals("-fixTableLocks")) {
4758         setFixTableLocks(true);
4759       } else if (cmd.equals("-fixReplication")) {
4760         setFixReplication(true);
4761       } else if (cmd.startsWith("-")) {
4762         errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
4763         return printUsageAndExit();
4764       } else {
4765         includeTable(TableName.valueOf(cmd));
4766         errors.print("Allow checking/fixes for table: " + cmd);
4767       }
4768     }
4769
4770     errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
4771
4772     // pre-check current user has FS write permission or not
4773     try {
4774       preCheckPermission();
4775     } catch (AccessDeniedException ace) {
4776       Runtime.getRuntime().exit(-1);
4777     } catch (IOException ioe) {
4778       Runtime.getRuntime().exit(-1);
4779     }
4780
4781     // do the real work of hbck
4782     connect();
4783
4784     try {
4785       // if corrupt file mode is on, first fix them since they may be opened later
4786       if (checkCorruptHFiles || sidelineCorruptHFiles) {
4787         LOG.info("Checking all hfiles for corruption");
4788         HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
4789         setHFileCorruptionChecker(hfcc); // so we can get result
4790         Collection<TableName> tables = getIncludedTables();
4791         Collection<Path> tableDirs = new ArrayList<Path>();
4792         Path rootdir = FSUtils.getRootDir(getConf());
4793         if (tables.size() > 0) {
4794           for (TableName t : tables) {
4795             tableDirs.add(FSUtils.getTableDir(rootdir, t));
4796           }
4797         } else {
4798           tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
4799         }
4800         hfcc.checkTables(tableDirs);
4801         hfcc.report(errors);
4802       }
4803
4804       // check and fix table integrity, region consistency.
4805       int code = onlineHbck();
4806       setRetCode(code);
4807       // If we have changed the HBase state it is better to run hbck again
4808       // to see if we haven't broken something else in the process.
4809       // We run it only once more because otherwise we can easily fall into
4810       // an infinite loop.
4811       if (shouldRerun()) {
4812         try {
4813           LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
4814           Thread.sleep(sleepBeforeRerun);
4815         } catch (InterruptedException ie) {
4816           LOG.warn("Interrupted while sleeping");
4817           return this;
4818         }
4819         // Just report
4820         setFixAssignments(false);
4821         setFixMeta(false);
4822         setFixHdfsHoles(false);
4823         setFixHdfsOverlaps(false);
4824         setFixVersionFile(false);
4825         setFixTableOrphans(false);
4826         errors.resetErrors();
4827         code = onlineHbck();
4828         setRetCode(code);
4829       }
4830     } finally {
4831       IOUtils.closeQuietly(this);
4832     }
4833     return this;
4834   }
4835
4836   /**
4837    * ls -r for debugging purposes
4838    */
4839   void debugLsr(Path p) throws IOException {
4840     debugLsr(getConf(), p, errors);
4841   }
4842
4843   /**
4844    * ls -r for debugging purposes
4845    */
4846   public static void debugLsr(Configuration conf,
4847       Path p) throws IOException {
4848     debugLsr(conf, p, new PrintingErrorReporter());
4849   }
4850
4851   /**
4852    * ls -r for debugging purposes
4853    */
4854   public static void debugLsr(Configuration conf,
4855       Path p, ErrorReporter errors) throws IOException {
4856     if (!LOG.isDebugEnabled() || p == null) {
4857       return;
4858     }
4859     FileSystem fs = p.getFileSystem(conf);
4860
4861     if (!fs.exists(p)) {
4862       // nothing
4863       return;
4864     }
4865     errors.print(p.toString());
4866
4867     if (fs.isFile(p)) {
4868       return;
4869     }
4870
4871     if (fs.getFileStatus(p).isDirectory()) {
4872       FileStatus[] fss= fs.listStatus(p);
4873       for (FileStatus status : fss) {
4874         debugLsr(conf, status.getPath(), errors);
4875       }
4876     }
4877   }
4878 }