View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import com.google.common.annotations.VisibleForTesting;
21  import com.google.common.base.Joiner;
22  import com.google.common.base.Preconditions;
23  import com.google.common.collect.ImmutableList;
24  import com.google.common.collect.Lists;
25  import com.google.common.collect.Multimap;
26  import com.google.common.collect.Ordering;
27  import com.google.common.collect.TreeMultimap;
28  import com.google.protobuf.ServiceException;
29
30  import java.io.Closeable;
31  import java.io.FileNotFoundException;
32  import java.io.IOException;
33  import java.io.InterruptedIOException;
34  import java.io.PrintWriter;
35  import java.io.StringWriter;
36  import java.net.InetAddress;
37  import java.net.URI;
38  import java.util.ArrayList;
39  import java.util.Arrays;
40  import java.util.Collection;
41  import java.util.Collections;
42  import java.util.Comparator;
43  import java.util.HashMap;
44  import java.util.HashSet;
45  import java.util.Iterator;
46  import java.util.List;
47  import java.util.Locale;
48  import java.util.Map;
49  import java.util.Map.Entry;
50  import java.util.Set;
51  import java.util.SortedMap;
52  import java.util.SortedSet;
53  import java.util.TreeMap;
54  import java.util.TreeSet;
55  import java.util.concurrent.Callable;
56  import java.util.concurrent.ConcurrentSkipListMap;
57  import java.util.concurrent.ExecutionException;
58  import java.util.concurrent.ExecutorService;
59  import java.util.concurrent.Executors;
60  import java.util.concurrent.Future;
61  import java.util.concurrent.FutureTask;
62  import java.util.concurrent.ScheduledThreadPoolExecutor;
63  import java.util.concurrent.TimeUnit;
64  import java.util.concurrent.TimeoutException;
65  import java.util.concurrent.atomic.AtomicBoolean;
66  import java.util.concurrent.atomic.AtomicInteger;
67
68  import org.apache.commons.io.IOUtils;
69  import org.apache.commons.lang.RandomStringUtils;
70  import org.apache.commons.lang.StringUtils;
71  import org.apache.commons.logging.Log;
72  import org.apache.commons.logging.LogFactory;
73  import org.apache.hadoop.conf.Configuration;
74  import org.apache.hadoop.conf.Configured;
75  import org.apache.hadoop.fs.FSDataOutputStream;
76  import org.apache.hadoop.fs.FileStatus;
77  import org.apache.hadoop.fs.FileSystem;
78  import org.apache.hadoop.fs.Path;
79  import org.apache.hadoop.fs.permission.FsAction;
80  import org.apache.hadoop.fs.permission.FsPermission;
81  import org.apache.hadoop.hbase.Abortable;
82  import org.apache.hadoop.hbase.Cell;
83  import org.apache.hadoop.hbase.CellUtil;
84  import org.apache.hadoop.hbase.ClusterStatus;
85  import org.apache.hadoop.hbase.HBaseConfiguration;
86  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
87  import org.apache.hadoop.hbase.HColumnDescriptor;
88  import org.apache.hadoop.hbase.HConstants;
89  import org.apache.hadoop.hbase.HRegionInfo;
90  import org.apache.hadoop.hbase.HRegionLocation;
91  import org.apache.hadoop.hbase.HTableDescriptor;
92  import org.apache.hadoop.hbase.KeyValue;
93  import org.apache.hadoop.hbase.MasterNotRunningException;
94  import org.apache.hadoop.hbase.MetaTableAccessor;
95  import org.apache.hadoop.hbase.RegionLocations;
96  import org.apache.hadoop.hbase.ServerName;
97  import org.apache.hadoop.hbase.TableName;
98  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
99  import org.apache.hadoop.hbase.classification.InterfaceAudience;
100 import org.apache.hadoop.hbase.classification.InterfaceStability;
101 import org.apache.hadoop.hbase.client.Admin;
102 import org.apache.hadoop.hbase.client.ClusterConnection;
103 import org.apache.hadoop.hbase.client.Connection;
104 import org.apache.hadoop.hbase.client.ConnectionFactory;
105 import org.apache.hadoop.hbase.client.Delete;
106 import org.apache.hadoop.hbase.client.Get;
107 import org.apache.hadoop.hbase.client.MasterSwitchType;
108 import org.apache.hadoop.hbase.client.Put;
109 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
110 import org.apache.hadoop.hbase.client.Result;
111 import org.apache.hadoop.hbase.client.RowMutations;
112 import org.apache.hadoop.hbase.client.Table;
113 import org.apache.hadoop.hbase.client.TableState;
114 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
115 import org.apache.hadoop.hbase.io.hfile.HFile;
116 import org.apache.hadoop.hbase.master.MasterFileSystem;
117 import org.apache.hadoop.hbase.master.RegionState;
118 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
119 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
120 import org.apache.hadoop.hbase.regionserver.HRegion;
121 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
122 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
123 import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL;
124 import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
125 import org.apache.hadoop.hbase.security.AccessDeniedException;
126 import org.apache.hadoop.hbase.security.UserProvider;
127 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
128 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
129 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
130 import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
131 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
132 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
133 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
134 import org.apache.hadoop.hbase.wal.WAL;
135 import org.apache.hadoop.hbase.wal.WALFactory;
136 import org.apache.hadoop.hbase.wal.WALSplitter;
137 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
138 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
139 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
140 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
141 import org.apache.hadoop.ipc.RemoteException;
142 import org.apache.hadoop.security.UserGroupInformation;
143 import org.apache.hadoop.util.ReflectionUtils;
144 import org.apache.hadoop.util.Tool;
145 import org.apache.hadoop.util.ToolRunner;
146 import org.apache.zookeeper.KeeperException;
147
148 /**
149  * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
150  * table integrity problems in a corrupted HBase.
151  * <p>
152  * Region consistency checks verify that hbase:meta, region deployment on region
153  * servers and the state of data in HDFS (.regioninfo files) all are in
154  * accordance.
155  * <p>
156  * Table integrity checks verify that all possible row keys resolve to exactly
157  * one region of a table.  This means there are no individual degenerate
158  * or backwards regions; no holes between regions; and that there are no
159  * overlapping regions.
160  * <p>
161  * The general repair strategy works in two phases:
162  * <ol>
163  * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
164  * <li> Repair Region Consistency with hbase:meta and assignments
165  * </ol>
166  * <p>
167  * For table integrity repairs, the tables' region directories are scanned
168  * for .regioninfo files.  Each table's integrity is then verified.  If there
169  * are any orphan regions (regions with no .regioninfo files) or holes, new
170  * regions are fabricated.  Backwards regions are sidelined as well as empty
171  * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
172  * a new region is created and all data is merged into the new region.
173  * <p>
174  * Table integrity repairs deal solely with HDFS and could potentially be done
175  * offline -- the hbase region servers or master do not need to be running.
176  * This phase can eventually be used to completely reconstruct the hbase:meta table in
177  * an offline fashion.
178  * <p>
179  * Region consistency requires three conditions -- 1) valid .regioninfo file
180  * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
181  * and 3) a region is deployed only at the regionserver that was assigned to
182  * with proper state in the master.
183  * <p>
184  * Region consistency repairs require hbase to be online so that hbck can
185  * contact the HBase master and region servers.  The hbck#connect() method must
186  * first be called successfully.  Much of the region consistency information
187  * is transient and less risky to repair.
188  * <p>
189  * If hbck is run from the command line, there are a handful of arguments that
190  * can be used to limit the kinds of repairs hbck will do.  See the code in
191  * {@link #printUsageAndExit()} for more details.
192  */
193 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
194 @InterfaceStability.Evolving
195 public class HBaseFsck extends Configured implements Closeable {
196   public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
197   public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
198   private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
199   private static boolean rsSupportsOffline = true;
200   private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
201   private static final int DEFAULT_MAX_MERGE = 5;
202   private static final String TO_BE_LOADED = "to_be_loaded";
203   private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
204   private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
205   private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
206   private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
207   // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
208   // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
209   // AlreadyBeingCreatedException which is implies timeout on this operations up to
210   // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
211   private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
212
213   /**********************
214    * Internal resources
215    **********************/
216   private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
217   private ClusterStatus status;
218   private ClusterConnection connection;
219   private Admin admin;
220   private Table meta;
221   // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
222   protected ExecutorService executor;
223   private long startMillis = EnvironmentEdgeManager.currentTime();
224   private HFileCorruptionChecker hfcc;
225   private int retcode = 0;
226   private Path HBCK_LOCK_PATH;
227   private FSDataOutputStream hbckOutFd;
228   // This lock is to prevent cleanup of balancer resources twice between
229   // ShutdownHook and the main code. We cleanup only if the connect() is
230   // successful
231   private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
232
233   /***********
234    * Options
235    ***********/
236   private static boolean details = false; // do we display the full report
237   private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
238   private static boolean forceExclusive = false; // only this hbck can modify HBase
239   private static boolean disableBalancer = false; // disable load balancer to keep regions stable
240   private static boolean disableSplitAndMerge = false; // disable split and merge
241   private boolean fixAssignments = false; // fix assignment errors?
242   private boolean fixMeta = false; // fix meta errors?
243   private boolean checkHdfs = true; // load and check fs consistency?
244   private boolean fixHdfsHoles = false; // fix fs holes?
245   private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
246   private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
247   private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
248   private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
249   private boolean fixSplitParents = false; // fix lingering split parents
250   private boolean fixReferenceFiles = false; // fix lingering reference store file
251   private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
252   private boolean fixTableLocks = false; // fix table locks which are expired
253   private boolean fixReplication = false; // fix undeleted replication queues for removed peer
254   private boolean fixAny = false; // Set to true if any of the fix is required.
255
256   // limit checking/fixes to listed tables, if empty attempt to check/fix all
257   // hbase:meta are always checked
258   private Set<TableName> tablesIncluded = new HashSet<TableName>();
259   private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
260   // maximum number of overlapping regions to sideline
261   private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
262   private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
263   private Path sidelineDir = null;
264
265   private boolean rerun = false; // if we tried to fix something, rerun hbck
266   private static boolean summary = false; // if we want to print less output
267   private boolean checkMetaOnly = false;
268   private boolean checkRegionBoundaries = false;
269   private boolean ignorePreCheckPermission = false; // if pre-check permission
270
271   /*********
272    * State
273    *********/
274   final private ErrorReporter errors;
275   int fixes = 0;
276
277   /**
278    * This map contains the state of all hbck items.  It maps from encoded region
279    * name to HbckInfo structure.  The information contained in HbckInfo is used
280    * to detect and correct consistency (hdfs/meta/deployment) problems.
281    */
282   private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
283   // Empty regioninfo qualifiers in hbase:meta
284   private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
285
286   /**
287    * This map from Tablename -> TableInfo contains the structures necessary to
288    * detect table consistency problems (holes, dupes, overlaps).  It is sorted
289    * to prevent dupes.
290    *
291    * If tablesIncluded is empty, this map contains all tables.
292    * Otherwise, it contains only meta tables and tables in tablesIncluded,
293    * unless checkMetaOnly is specified, in which case, it contains only
294    * the meta table
295    */
296   private SortedMap<TableName, TableInfo> tablesInfo =
297       new ConcurrentSkipListMap<TableName, TableInfo>();
298
299   /**
300    * When initially looking at HDFS, we attempt to find any orphaned data.
301    */
302   private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
303
304   private Map<TableName, Set<String>> orphanTableDirs =
305       new HashMap<TableName, Set<String>>();
306   private Map<TableName, TableState> tableStates =
307       new HashMap<TableName, TableState>();
308   private final RetryCounterFactory lockFileRetryCounterFactory;
309
310   private Map<TableName, Set<String>> skippedRegions = new HashMap<TableName, Set<String>>();
311
312   ZooKeeperWatcher zkw = null;
313   /**
314    * Constructor
315    *
316    * @param conf Configuration object
317    * @throws MasterNotRunningException if the master is not running
318    * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
319    */
320   public HBaseFsck(Configuration conf) throws MasterNotRunningException,
321       ZooKeeperConnectionException, IOException, ClassNotFoundException {
322     this(conf, createThreadPool(conf));
323   }
324
325   private static ExecutorService createThreadPool(Configuration conf) {
326     int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
327     return new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
328   }
329
330   /**
331    * Constructor
332    *
333    * @param conf
334    *          Configuration object
335    * @throws MasterNotRunningException
336    *           if the master is not running
337    * @throws ZooKeeperConnectionException
338    *           if unable to connect to ZooKeeper
339    */
340   public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
341       ZooKeeperConnectionException, IOException, ClassNotFoundException {
342     super(conf);
343     errors = getErrorReporter(getConf());
344     this.executor = exec;
345     lockFileRetryCounterFactory = new RetryCounterFactory(
346       getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
347       getConf().getInt(
348         "hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
349       getConf().getInt(
350         "hbase.hbck.lockfile.attempt.maxsleeptime", DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
351     zkw = createZooKeeperWatcher();
352   }
353
354   private class FileLockCallable implements Callable<FSDataOutputStream> {
355     RetryCounter retryCounter;
356
357     public FileLockCallable(RetryCounter retryCounter) {
358       this.retryCounter = retryCounter;
359     }
360     @Override
361     public FSDataOutputStream call() throws IOException {
362       try {
363         FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
364         FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
365             HConstants.DATA_FILE_UMASK_KEY);
366         Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
367         fs.mkdirs(tmpDir);
368         HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
369         final FSDataOutputStream out = createFileWithRetries(fs, HBCK_LOCK_PATH, defaultPerms);
370         out.writeBytes(InetAddress.getLocalHost().toString());
371         out.flush();
372         return out;
373       } catch(RemoteException e) {
374         if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
375           return null;
376         } else {
377           throw e;
378         }
379       }
380     }
381
382     private FSDataOutputStream createFileWithRetries(final FileSystem fs,
383         final Path hbckLockFilePath, final FsPermission defaultPerms)
384         throws IOException {
385
386       IOException exception = null;
387       do {
388         try {
389           return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
390         } catch (IOException ioe) {
391           LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
392               + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
393               + retryCounter.getMaxAttempts());
394           LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(),
395               ioe);
396           try {
397             exception = ioe;
398             retryCounter.sleepUntilNextRetry();
399           } catch (InterruptedException ie) {
400             throw (InterruptedIOException) new InterruptedIOException(
401                 "Can't create lock file " + hbckLockFilePath.getName())
402             .initCause(ie);
403           }
404         }
405       } while (retryCounter.shouldRetry());
406
407       throw exception;
408     }
409   }
410
411   /**
412    * This method maintains a lock using a file. If the creation fails we return null
413    *
414    * @return FSDataOutputStream object corresponding to the newly opened lock file
415    * @throws IOException if IO failure occurs
416    */
417   private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
418     RetryCounter retryCounter = lockFileRetryCounterFactory.create();
419     FileLockCallable callable = new FileLockCallable(retryCounter);
420     ExecutorService executor = Executors.newFixedThreadPool(1);
421     FutureTask<FSDataOutputStream> futureTask = new FutureTask<FSDataOutputStream>(callable);
422     executor.execute(futureTask);
423     final int timeoutInSeconds = getConf().getInt(
424       "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
425     FSDataOutputStream stream = null;
426     try {
427       stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
428     } catch (ExecutionException ee) {
429       LOG.warn("Encountered exception when opening lock file", ee);
430     } catch (InterruptedException ie) {
431       LOG.warn("Interrupted when opening lock file", ie);
432       Thread.currentThread().interrupt();
433     } catch (TimeoutException exception) {
434       // took too long to obtain lock
435       LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
436       futureTask.cancel(true);
437     } finally {
438       executor.shutdownNow();
439     }
440     return stream;
441   }
442
443   private void unlockHbck() {
444     if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) {
445       RetryCounter retryCounter = lockFileRetryCounterFactory.create();
446       do {
447         try {
448           IOUtils.closeQuietly(hbckOutFd);
449           FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()),
450               HBCK_LOCK_PATH, true);
451           LOG.info("Finishing hbck");
452           return;
453         } catch (IOException ioe) {
454           LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
455               + (retryCounter.getAttemptTimes() + 1) + " of "
456               + retryCounter.getMaxAttempts());
457           LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
458           try {
459             retryCounter.sleepUntilNextRetry();
460           } catch (InterruptedException ie) {
461             Thread.currentThread().interrupt();
462             LOG.warn("Interrupted while deleting lock file" +
463                 HBCK_LOCK_PATH);
464             return;
465           }
466         }
467       } while (retryCounter.shouldRetry());
468     }
469   }
470
471   /**
472    * To repair region consistency, one must call connect() in order to repair
473    * online state.
474    */
475   public void connect() throws IOException {
476
477     if (isExclusive()) {
478       // Grab the lock
479       hbckOutFd = checkAndMarkRunningHbck();
480       if (hbckOutFd == null) {
481         setRetCode(-1);
482         LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " +
483             "[If you are sure no other instance is running, delete the lock file " +
484             HBCK_LOCK_PATH + " and rerun the tool]");
485         throw new IOException("Duplicate hbck - Abort");
486       }
487
488       // Make sure to cleanup the lock
489       hbckLockCleanup.set(true);
490     }
491
492
493     // Add a shutdown hook to this thread, in case user tries to
494     // kill the hbck with a ctrl-c, we want to cleanup the lock so that
495     // it is available for further calls
496     Runtime.getRuntime().addShutdownHook(new Thread() {
497       @Override
498       public void run() {
499         IOUtils.closeQuietly(HBaseFsck.this);
500         unlockHbck();
501       }
502     });
503
504     LOG.info("Launching hbck");
505
506     connection = (ClusterConnection)ConnectionFactory.createConnection(getConf());
507     admin = connection.getAdmin();
508     meta = connection.getTable(TableName.META_TABLE_NAME);
509     status = admin.getClusterStatus();
510   }
511
512   /**
513    * Get deployed regions according to the region servers.
514    */
515   private void loadDeployedRegions() throws IOException, InterruptedException {
516     // From the master, get a list of all known live region servers
517     Collection<ServerName> regionServers = status.getServers();
518     errors.print("Number of live region servers: " + regionServers.size());
519     if (details) {
520       for (ServerName rsinfo: regionServers) {
521         errors.print("  " + rsinfo.getServerName());
522       }
523     }
524
525     // From the master, get a list of all dead region servers
526     Collection<ServerName> deadRegionServers = status.getDeadServerNames();
527     errors.print("Number of dead region servers: " + deadRegionServers.size());
528     if (details) {
529       for (ServerName name: deadRegionServers) {
530         errors.print("  " + name);
531       }
532     }
533
534     // Print the current master name and state
535     errors.print("Master: " + status.getMaster());
536
537     // Print the list of all backup masters
538     Collection<ServerName> backupMasters = status.getBackupMasters();
539     errors.print("Number of backup masters: " + backupMasters.size());
540     if (details) {
541       for (ServerName name: backupMasters) {
542         errors.print("  " + name);
543       }
544     }
545
546     errors.print("Average load: " + status.getAverageLoad());
547     errors.print("Number of requests: " + status.getRequestsCount());
548     errors.print("Number of regions: " + status.getRegionsCount());
549
550     Set<RegionState> rits = status.getRegionsInTransition();
551     errors.print("Number of regions in transition: " + rits.size());
552     if (details) {
553       for (RegionState state: rits) {
554         errors.print("  " + state.toDescriptiveString());
555       }
556     }
557
558     // Determine what's deployed
559     processRegionServers(regionServers);
560   }
561
562   /**
563    * Clear the current state of hbck.
564    */
565   private void clearState() {
566     // Make sure regionInfo is empty before starting
567     fixes = 0;
568     regionInfoMap.clear();
569     emptyRegionInfoQualifiers.clear();
570     tableStates.clear();
571     errors.clear();
572     tablesInfo.clear();
573     orphanHdfsDirs.clear();
574     skippedRegions.clear();
575   }
576
577   /**
578    * This repair method analyzes hbase data in hdfs and repairs it to satisfy
579    * the table integrity rules.  HBase doesn't need to be online for this
580    * operation to work.
581    */
582   public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
583     // Initial pass to fix orphans.
584     if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
585         || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
586       LOG.info("Loading regioninfos HDFS");
587       // if nothing is happening this should always complete in two iterations.
588       int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
589       int curIter = 0;
590       do {
591         clearState(); // clears hbck state and reset fixes to 0 and.
592         // repair what's on HDFS
593         restoreHdfsIntegrity();
594         curIter++;// limit the number of iterations.
595       } while (fixes > 0 && curIter <= maxIterations);
596
597       // Repairs should be done in the first iteration and verification in the second.
598       // If there are more than 2 passes, something funny has happened.
599       if (curIter > 2) {
600         if (curIter == maxIterations) {
601           LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
602               + "Tables integrity may not be fully repaired!");
603         } else {
604           LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
605         }
606       }
607     }
608   }
609
610   /**
611    * This repair method requires the cluster to be online since it contacts
612    * region servers and the masters.  It makes each region's state in HDFS, in
613    * hbase:meta, and deployments consistent.
614    *
615    * @return If &gt; 0 , number of errors detected, if &lt; 0 there was an unrecoverable
616    *     error.  If 0, we have a clean hbase.
617    */
618   public int onlineConsistencyRepair() throws IOException, KeeperException,
619     InterruptedException {
620     clearState();
621
622     // get regions according to what is online on each RegionServer
623     loadDeployedRegions();
624     // check whether hbase:meta is deployed and online
625     recordMetaRegion();
626     // Check if hbase:meta is found only once and in the right place
627     if (!checkMetaRegion()) {
628       String errorMsg = "hbase:meta table is not consistent. ";
629       if (shouldFixAssignments()) {
630         errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
631       } else {
632         errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
633       }
634       errors.reportError(errorMsg + " Exiting...");
635       return -2;
636     }
637     // Not going with further consistency check for tables when hbase:meta itself is not consistent.
638     LOG.info("Loading regionsinfo from the hbase:meta table");
639     boolean success = loadMetaEntries();
640     if (!success) return -1;
641
642     // Empty cells in hbase:meta?
643     reportEmptyMetaCells();
644
645     // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
646     if (shouldFixEmptyMetaCells()) {
647       fixEmptyMetaCells();
648     }
649
650     // get a list of all tables that have not changed recently.
651     if (!checkMetaOnly) {
652       reportTablesInFlux();
653     }
654
655     // Get disabled tables states
656     loadTableStates();
657
658     // load regiondirs and regioninfos from HDFS
659     if (shouldCheckHdfs()) {
660       LOG.info("Loading region directories from HDFS");
661       loadHdfsRegionDirs();
662       LOG.info("Loading region information from HDFS");
663       loadHdfsRegionInfos();
664     }
665
666     // fix the orphan tables
667     fixOrphanTables();
668
669     LOG.info("Checking and fixing region consistency");
670     // Check and fix consistency
671     checkAndFixConsistency();
672
673     // Check integrity (does not fix)
674     checkIntegrity();
675     return errors.getErrorList().size();
676   }
677
678   /**
679    * Contacts the master and prints out cluster-wide information
680    * @return 0 on success, non-zero on failure
681    */
682   public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
683     // print hbase server version
684     errors.print("Version: " + status.getHBaseVersion());
685     offlineHdfsIntegrityRepair();
686
687     boolean oldBalancer = false;
688     if (shouldDisableBalancer()) {
689       oldBalancer = admin.setBalancerRunning(false, true);
690     }
691     boolean[] oldSplitAndMerge = null;
692     if (shouldDisableSplitAndMerge()) {
693       admin.releaseSplitOrMergeLockAndRollback();
694       oldSplitAndMerge = admin.setSplitOrMergeEnabled(false, false, false,
695         MasterSwitchType.SPLIT, MasterSwitchType.MERGE);
696     }
697
698     try {
699       onlineConsistencyRepair();
700     }
701     finally {
702       // Only restore the balancer if it was true when we started repairing and
703       // we actually disabled it. Otherwise, we might clobber another run of
704       // hbck that has just restored it.
705       if (shouldDisableBalancer() && oldBalancer) {
706         admin.setBalancerRunning(oldBalancer, false);
707       }
708
709       if (shouldDisableSplitAndMerge()) {
710         if (oldSplitAndMerge != null) {
711           admin.releaseSplitOrMergeLockAndRollback();
712         }
713       }
714     }
715
716     if (checkRegionBoundaries) {
717       checkRegionBoundaries();
718     }
719
720     offlineReferenceFileRepair();
721
722     checkAndFixTableLocks();
723
724     checkAndFixReplication();
725
726     // Remove the hbck lock
727     unlockHbck();
728
729     // Print table summary
730     printTableSummary(tablesInfo);
731     return errors.summarize();
732   }
733
734   public static byte[] keyOnly (byte[] b) {
735     if (b == null)
736       return b;
737     int rowlength = Bytes.toShort(b, 0);
738     byte[] result = new byte[rowlength];
739     System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
740     return result;
741   }
742
743   @Override
744   public void close() throws IOException {
745     try {
746       unlockHbck();
747     } catch (Exception io) {
748       LOG.warn(io);
749     } finally {
750       if (zkw != null) {
751         zkw.close();
752         zkw = null;
753       }
754       IOUtils.closeQuietly(admin);
755       IOUtils.closeQuietly(meta);
756       IOUtils.closeQuietly(connection);
757     }
758   }
759
760   private static class RegionBoundariesInformation {
761     public byte [] regionName;
762     public byte [] metaFirstKey;
763     public byte [] metaLastKey;
764     public byte [] storesFirstKey;
765     public byte [] storesLastKey;
766     @Override
767     public String toString () {
768       return "regionName=" + Bytes.toStringBinary(regionName) +
769              "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
770              "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
771              "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
772              "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
773     }
774   }
775
776   public void checkRegionBoundaries() {
777     try {
778       ByteArrayComparator comparator = new ByteArrayComparator();
779       List<HRegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
780       final RegionBoundariesInformation currentRegionBoundariesInformation =
781           new RegionBoundariesInformation();
782       Path hbaseRoot = FSUtils.getRootDir(getConf());
783       for (HRegionInfo regionInfo : regions) {
784         Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
785         currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
786         // For each region, get the start and stop key from the META and compare them to the
787         // same information from the Stores.
788         Path path = new Path(tableDir, regionInfo.getEncodedName());
789         FileSystem fs = path.getFileSystem(getConf());
790         FileStatus[] files = fs.listStatus(path);
791         // For all the column families in this region...
792         byte[] storeFirstKey = null;
793         byte[] storeLastKey = null;
794         for (FileStatus file : files) {
795           String fileName = file.getPath().toString();
796           fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
797           if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
798             FileStatus[] storeFiles = fs.listStatus(file.getPath());
799             // For all the stores in this column family.
800             for (FileStatus storeFile : storeFiles) {
801               HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
802                   getConf()), getConf());
803               if ((reader.getFirstKey() != null)
804                   && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
805                       ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey()).getKey()) > 0))) {
806                 storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey()).getKey();
807               }
808               if ((reader.getLastKey() != null)
809                   && ((storeLastKey == null) || (comparator.compare(storeLastKey,
810                       ((KeyValue.KeyOnlyKeyValue)reader.getLastKey()).getKey())) < 0)) {
811                 storeLastKey = ((KeyValue.KeyOnlyKeyValue)reader.getLastKey()).getKey();
812               }
813               reader.close();
814             }
815           }
816         }
817         currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
818         currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
819         currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
820         currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
821         if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
822           currentRegionBoundariesInformation.metaFirstKey = null;
823         if (currentRegionBoundariesInformation.metaLastKey.length == 0)
824           currentRegionBoundariesInformation.metaLastKey = null;
825
826         // For a region to be correct, we need the META start key to be smaller or equal to the
827         // smallest start key from all the stores, and the start key from the next META entry to
828         // be bigger than the last key from all the current stores. First region start key is null;
829         // Last region end key is null; some regions can be empty and not have any store.
830
831         boolean valid = true;
832         // Checking start key.
833         if ((currentRegionBoundariesInformation.storesFirstKey != null)
834             && (currentRegionBoundariesInformation.metaFirstKey != null)) {
835           valid = valid
836               && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
837                 currentRegionBoundariesInformation.metaFirstKey) >= 0;
838         }
839         // Checking stop key.
840         if ((currentRegionBoundariesInformation.storesLastKey != null)
841             && (currentRegionBoundariesInformation.metaLastKey != null)) {
842           valid = valid
843               && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
844                 currentRegionBoundariesInformation.metaLastKey) < 0;
845         }
846         if (!valid) {
847           errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
848             tablesInfo.get(regionInfo.getTable()));
849           LOG.warn("Region's boundaries not alligned between stores and META for:");
850           LOG.warn(currentRegionBoundariesInformation);
851         }
852       }
853     } catch (IOException e) {
854       LOG.error(e);
855     }
856   }
857
858   /**
859    * Iterates through the list of all orphan/invalid regiondirs.
860    */
861   private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
862     for (HbckInfo hi : orphanHdfsDirs) {
863       LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
864       adoptHdfsOrphan(hi);
865     }
866   }
867
868   /**
869    * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
870    * these orphans by creating a new region, and moving the column families,
871    * recovered edits, WALs, into the new region dir.  We determine the region
872    * startkey and endkeys by looking at all of the hfiles inside the column
873    * families to identify the min and max keys. The resulting region will
874    * likely violate table integrity but will be dealt with by merging
875    * overlapping regions.
876    */
877   @SuppressWarnings("deprecation")
878   private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
879     Path p = hi.getHdfsRegionDir();
880     FileSystem fs = p.getFileSystem(getConf());
881     FileStatus[] dirs = fs.listStatus(p);
882     if (dirs == null) {
883       LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
884           p + ". This dir could probably be deleted.");
885       return ;
886     }
887
888     TableName tableName = hi.getTableName();
889     TableInfo tableInfo = tablesInfo.get(tableName);
890     Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
891     HTableDescriptor template = tableInfo.getHTD();
892
893     // find min and max key values
894     Pair<byte[],byte[]> orphanRegionRange = null;
895     for (FileStatus cf : dirs) {
896       String cfName= cf.getPath().getName();
897       // TODO Figure out what the special dirs are
898       if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
899
900       FileStatus[] hfiles = fs.listStatus(cf.getPath());
901       for (FileStatus hfile : hfiles) {
902         byte[] start, end;
903         HFile.Reader hf = null;
904         try {
905           CacheConfig cacheConf = new CacheConfig(getConf());
906           hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
907           hf.loadFileInfo();
908           Cell startKv = hf.getFirstKey();
909           start = CellUtil.cloneRow(startKv);
910           Cell endKv = hf.getLastKey();
911           end = CellUtil.cloneRow(endKv);
912         } catch (IOException ioe) {
913           LOG.warn("Problem reading orphan file " + hfile + ", skipping");
914           continue;
915         } catch (NullPointerException ioe) {
916           LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
917           continue;
918         } finally {
919           if (hf != null) {
920             hf.close();
921           }
922         }
923
924         // expand the range to include the range of all hfiles
925         if (orphanRegionRange == null) {
926           // first range
927           orphanRegionRange = new Pair<byte[], byte[]>(start, end);
928         } else {
929           // TODO add test
930
931           // expand range only if the hfile is wider.
932           if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
933             orphanRegionRange.setFirst(start);
934           }
935           if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
936             orphanRegionRange.setSecond(end);
937           }
938         }
939       }
940     }
941     if (orphanRegionRange == null) {
942       LOG.warn("No data in dir " + p + ", sidelining data");
943       fixes++;
944       sidelineRegionDir(fs, hi);
945       return;
946     }
947     LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
948         Bytes.toString(orphanRegionRange.getSecond()) + ")");
949
950     // create new region on hdfs. move data into place.
951     HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(),
952         Bytes.add(orphanRegionRange.getSecond(), new byte[1]));
953     LOG.info("Creating new region : " + hri);
954     HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
955     Path target = region.getRegionFileSystem().getRegionDir();
956
957     // rename all the data to new region
958     mergeRegionDirs(target, hi);
959     fixes++;
960   }
961
962   /**
963    * This method determines if there are table integrity errors in HDFS.  If
964    * there are errors and the appropriate "fix" options are enabled, the method
965    * will first correct orphan regions making them into legit regiondirs, and
966    * then reload to merge potentially overlapping regions.
967    *
968    * @return number of table integrity errors found
969    */
970   private int restoreHdfsIntegrity() throws IOException, InterruptedException {
971     // Determine what's on HDFS
972     LOG.info("Loading HBase regioninfo from HDFS...");
973     loadHdfsRegionDirs(); // populating regioninfo table.
974
975     int errs = errors.getErrorList().size();
976     // First time just get suggestions.
977     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
978     checkHdfsIntegrity(false, false);
979
980     if (errors.getErrorList().size() == errs) {
981       LOG.info("No integrity errors.  We are done with this phase. Glorious.");
982       return 0;
983     }
984
985     if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
986       adoptHdfsOrphans(orphanHdfsDirs);
987       // TODO optimize by incrementally adding instead of reloading.
988     }
989
990     // Make sure there are no holes now.
991     if (shouldFixHdfsHoles()) {
992       clearState(); // this also resets # fixes.
993       loadHdfsRegionDirs();
994       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
995       tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
996     }
997
998     // Now we fix overlaps
999     if (shouldFixHdfsOverlaps()) {
1000       // second pass we fix overlaps.
1001       clearState(); // this also resets # fixes.
1002       loadHdfsRegionDirs();
1003       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1004       tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1005     }
1006
1007     return errors.getErrorList().size();
1008   }
1009
1010   /**
1011    * Scan all the store file names to find any lingering reference files,
1012    * which refer to some none-exiting files. If "fix" option is enabled,
1013    * any lingering reference file will be sidelined if found.
1014    * <p>
1015    * Lingering reference file prevents a region from opening. It has to
1016    * be fixed before a cluster can start properly.
1017    */
1018   private void offlineReferenceFileRepair() throws IOException {
1019     Configuration conf = getConf();
1020     Path hbaseRoot = FSUtils.getRootDir(conf);
1021     FileSystem fs = hbaseRoot.getFileSystem(conf);
1022     LOG.info("Computing mapping of all store files");
1023     Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot, errors);
1024     errors.print("");
1025     LOG.info("Validating mapping using HDFS state");
1026     for (Path path: allFiles.values()) {
1027       boolean isReference = false;
1028       try {
1029         isReference = StoreFileInfo.isReference(path);
1030       } catch (Throwable t) {
1031         // Ignore. Some files may not be store files at all.
1032         // For example, files under .oldlogs folder in hbase:meta
1033         // Warning message is already logged by
1034         // StoreFile#isReference.
1035       }
1036       if (!isReference) continue;
1037
1038       Path referredToFile = StoreFileInfo.getReferredToFile(path);
1039       if (fs.exists(referredToFile)) continue;  // good, expected
1040
1041       // Found a lingering reference file
1042       errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1043         "Found lingering reference file " + path);
1044       if (!shouldFixReferenceFiles()) continue;
1045
1046       // Now, trying to fix it since requested
1047       boolean success = false;
1048       String pathStr = path.toString();
1049
1050       // A reference file path should be like
1051       // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1052       // Up 5 directories to get the root folder.
1053       // So the file will be sidelined to a similar folder structure.
1054       int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1055       for (int i = 0; index > 0 && i < 5; i++) {
1056         index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1057       }
1058       if (index > 0) {
1059         Path rootDir = getSidelineDir();
1060         Path dst = new Path(rootDir, pathStr.substring(index + 1));
1061         fs.mkdirs(dst.getParent());
1062         LOG.info("Trying to sildeline reference file "
1063           + path + " to " + dst);
1064         setShouldRerun();
1065
1066         success = fs.rename(path, dst);
1067       }
1068       if (!success) {
1069         LOG.error("Failed to sideline reference file " + path);
1070       }
1071     }
1072   }
1073
1074   /**
1075    * TODO -- need to add tests for this.
1076    */
1077   private void reportEmptyMetaCells() {
1078     errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1079       emptyRegionInfoQualifiers.size());
1080     if (details) {
1081       for (Result r: emptyRegionInfoQualifiers) {
1082         errors.print("  " + r);
1083       }
1084     }
1085   }
1086
1087   /**
1088    * TODO -- need to add tests for this.
1089    */
1090   private void reportTablesInFlux() {
1091     AtomicInteger numSkipped = new AtomicInteger(0);
1092     HTableDescriptor[] allTables = getTables(numSkipped);
1093     errors.print("Number of Tables: " + allTables.length);
1094     if (details) {
1095       if (numSkipped.get() > 0) {
1096         errors.detail("Number of Tables in flux: " + numSkipped.get());
1097       }
1098       for (HTableDescriptor td : allTables) {
1099         errors.detail("  Table: " + td.getTableName() + "\t" +
1100                            (td.isReadOnly() ? "ro" : "rw") + "\t" +
1101                             (td.isMetaRegion() ? "META" : "    ") + "\t" +
1102                            " families: " + td.getFamilies().size());
1103       }
1104     }
1105   }
1106
1107   public ErrorReporter getErrors() {
1108     return errors;
1109   }
1110
1111   /**
1112    * Read the .regioninfo file from the file system.  If there is no
1113    * .regioninfo, add it to the orphan hdfs region list.
1114    */
1115   private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
1116     Path regionDir = hbi.getHdfsRegionDir();
1117     if (regionDir == null) {
1118       LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
1119       return;
1120     }
1121
1122     if (hbi.hdfsEntry.hri != null) {
1123       // already loaded data
1124       return;
1125     }
1126
1127     FileSystem fs = FileSystem.get(getConf());
1128     HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
1129     LOG.debug("HRegionInfo read: " + hri.toString());
1130     hbi.hdfsEntry.hri = hri;
1131   }
1132
1133   /**
1134    * Exception thrown when a integrity repair operation fails in an
1135    * unresolvable way.
1136    */
1137   public static class RegionRepairException extends IOException {
1138     private static final long serialVersionUID = 1L;
1139     final IOException ioe;
1140     public RegionRepairException(String s, IOException ioe) {
1141       super(s);
1142       this.ioe = ioe;
1143     }
1144   }
1145
1146   /**
1147    * Populate hbi's from regionInfos loaded from file system.
1148    */
1149   private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1150       throws IOException, InterruptedException {
1151     tablesInfo.clear(); // regenerating the data
1152     // generate region split structure
1153     Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1154
1155     // Parallelized read of .regioninfo files.
1156     List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
1157     List<Future<Void>> hbiFutures;
1158
1159     for (HbckInfo hbi : hbckInfos) {
1160       WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1161       hbis.add(work);
1162     }
1163
1164     // Submit and wait for completion
1165     hbiFutures = executor.invokeAll(hbis);
1166
1167     for(int i=0; i<hbiFutures.size(); i++) {
1168       WorkItemHdfsRegionInfo work = hbis.get(i);
1169       Future<Void> f = hbiFutures.get(i);
1170       try {
1171         f.get();
1172       } catch(ExecutionException e) {
1173         LOG.warn("Failed to read .regioninfo file for region " +
1174               work.hbi.getRegionNameAsString(), e.getCause());
1175       }
1176     }
1177
1178     Path hbaseRoot = FSUtils.getRootDir(getConf());
1179     FileSystem fs = hbaseRoot.getFileSystem(getConf());
1180     // serialized table info gathering.
1181     for (HbckInfo hbi: hbckInfos) {
1182
1183       if (hbi.getHdfsHRI() == null) {
1184         // was an orphan
1185         continue;
1186       }
1187
1188
1189       // get table name from hdfs, populate various HBaseFsck tables.
1190       TableName tableName = hbi.getTableName();
1191       if (tableName == null) {
1192         // There was an entry in hbase:meta not in the HDFS?
1193         LOG.warn("tableName was null for: " + hbi);
1194         continue;
1195       }
1196
1197       TableInfo modTInfo = tablesInfo.get(tableName);
1198       if (modTInfo == null) {
1199         // only executed once per table.
1200         modTInfo = new TableInfo(tableName);
1201         tablesInfo.put(tableName, modTInfo);
1202         try {
1203           HTableDescriptor htd =
1204               FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1205           modTInfo.htds.add(htd);
1206         } catch (IOException ioe) {
1207           if (!orphanTableDirs.containsKey(tableName)) {
1208             LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1209             //should only report once for each table
1210             errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1211                 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1212             Set<String> columns = new HashSet<String>();
1213             orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1214           }
1215         }
1216       }
1217       if (!hbi.isSkipChecks()) {
1218         modTInfo.addRegionInfo(hbi);
1219       }
1220     }
1221
1222     loadTableInfosForTablesWithNoRegion();
1223     errors.print("");
1224
1225     return tablesInfo;
1226   }
1227
1228   /**
1229    * To get the column family list according to the column family dirs
1230    * @param columns
1231    * @param hbi
1232    * @return a set of column families
1233    * @throws IOException
1234    */
1235   private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1236     Path regionDir = hbi.getHdfsRegionDir();
1237     FileSystem fs = regionDir.getFileSystem(getConf());
1238     FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1239     for (FileStatus subdir : subDirs) {
1240       String columnfamily = subdir.getPath().getName();
1241       columns.add(columnfamily);
1242     }
1243     return columns;
1244   }
1245
1246   /**
1247    * To fabricate a .tableinfo file with following contents<br>
1248    * 1. the correct tablename <br>
1249    * 2. the correct colfamily list<br>
1250    * 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1251    * @throws IOException
1252    */
1253   private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1254       Set<String> columns) throws IOException {
1255     if (columns ==null || columns.isEmpty()) return false;
1256     HTableDescriptor htd = new HTableDescriptor(tableName);
1257     for (String columnfamimly : columns) {
1258       htd.addFamily(new HColumnDescriptor(columnfamimly));
1259     }
1260     fstd.createTableDescriptor(htd, true);
1261     return true;
1262   }
1263
1264   /**
1265    * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1266    * @throws IOException
1267    */
1268   public void fixEmptyMetaCells() throws IOException {
1269     if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1270       LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1271       for (Result region : emptyRegionInfoQualifiers) {
1272         deleteMetaRegion(region.getRow());
1273         errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1274       }
1275       emptyRegionInfoQualifiers.clear();
1276     }
1277   }
1278
1279   /**
1280    * To fix orphan table by creating a .tableinfo file under tableDir <br>
1281    * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1282    * 2. else create a default .tableinfo file with following items<br>
1283    * &nbsp;2.1 the correct tablename <br>
1284    * &nbsp;2.2 the correct colfamily list<br>
1285    * &nbsp;2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1286    * @throws IOException
1287    */
1288   public void fixOrphanTables() throws IOException {
1289     if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1290
1291       List<TableName> tmpList = new ArrayList<TableName>();
1292       tmpList.addAll(orphanTableDirs.keySet());
1293       HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1294       Iterator<Entry<TableName, Set<String>>> iter =
1295           orphanTableDirs.entrySet().iterator();
1296       int j = 0;
1297       int numFailedCase = 0;
1298       FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1299       while (iter.hasNext()) {
1300         Entry<TableName, Set<String>> entry =
1301             iter.next();
1302         TableName tableName = entry.getKey();
1303         LOG.info("Trying to fix orphan table error: " + tableName);
1304         if (j < htds.length) {
1305           if (tableName.equals(htds[j].getTableName())) {
1306             HTableDescriptor htd = htds[j];
1307             LOG.info("fixing orphan table: " + tableName + " from cache");
1308             fstd.createTableDescriptor(htd, true);
1309             j++;
1310             iter.remove();
1311           }
1312         } else {
1313           if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1314             LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1315             LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1316             iter.remove();
1317           } else {
1318             LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1319             numFailedCase++;
1320           }
1321         }
1322         fixes++;
1323       }
1324
1325       if (orphanTableDirs.isEmpty()) {
1326         // all orphanTableDirs are luckily recovered
1327         // re-run doFsck after recovering the .tableinfo file
1328         setShouldRerun();
1329         LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1330       } else if (numFailedCase > 0) {
1331         LOG.error("Failed to fix " + numFailedCase
1332             + " OrphanTables with default .tableinfo files");
1333       }
1334
1335     }
1336     //cleanup the list
1337     orphanTableDirs.clear();
1338
1339   }
1340
1341   /**
1342    * This borrows code from MasterFileSystem.bootstrap(). Explicitly creates it's own WAL, so be
1343    * sure to close it as well as the region when you're finished.
1344    *
1345    * @return an open hbase:meta HRegion
1346    */
1347   private HRegion createNewMeta() throws IOException {
1348     Path rootdir = FSUtils.getRootDir(getConf());
1349     Configuration c = getConf();
1350     HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1351     HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1352     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1353     // The WAL subsystem will use the default rootDir rather than the passed in rootDir
1354     // unless I pass along via the conf.
1355     Configuration confForWAL = new Configuration(c);
1356     confForWAL.set(HConstants.HBASE_DIR, rootdir.toString());
1357     WAL wal = (new WALFactory(confForWAL,
1358         Collections.<WALActionsListener>singletonList(new MetricsWAL()),
1359         "hbck-meta-recovery-" + RandomStringUtils.randomNumeric(8))).
1360         getWAL(metaHRI.getEncodedNameAsBytes(), metaHRI.getTable().getNamespace());
1361     HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor, wal);
1362     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1363     return meta;
1364   }
1365
1366   /**
1367    * Generate set of puts to add to new meta.  This expects the tables to be
1368    * clean with no overlaps or holes.  If there are any problems it returns null.
1369    *
1370    * @return An array list of puts to do in bulk, null if tables have problems
1371    */
1372   private ArrayList<Put> generatePuts(
1373       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1374     ArrayList<Put> puts = new ArrayList<Put>();
1375     boolean hasProblems = false;
1376     for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1377       TableName name = e.getKey();
1378
1379       // skip "hbase:meta"
1380       if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1381         continue;
1382       }
1383
1384       TableInfo ti = e.getValue();
1385       puts.add(MetaTableAccessor
1386           .makePutFromTableState(new TableState(ti.tableName, TableState.State.ENABLED)));
1387       for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1388           .entrySet()) {
1389         Collection<HbckInfo> his = spl.getValue();
1390         int sz = his.size();
1391         if (sz != 1) {
1392           // problem
1393           LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1394               + " had " +  sz + " regions instead of exactly 1." );
1395           hasProblems = true;
1396           continue;
1397         }
1398
1399         // add the row directly to meta.
1400         HbckInfo hi = his.iterator().next();
1401         HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
1402         Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1403         puts.add(p);
1404       }
1405     }
1406     return hasProblems ? null : puts;
1407   }
1408
1409   /**
1410    * Suggest fixes for each table
1411    */
1412   private void suggestFixes(
1413       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1414     logParallelMerge();
1415     for (TableInfo tInfo : tablesInfo.values()) {
1416       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1417       tInfo.checkRegionChain(handler);
1418     }
1419   }
1420
1421   /**
1422    * Rebuilds meta from information in hdfs/fs.  Depends on configuration settings passed into
1423    * hbck constructor to point to a particular fs/dir. Assumes HBase is OFFLINE.
1424    *
1425    * @param fix flag that determines if method should attempt to fix holes
1426    * @return true if successful, false if attempt failed.
1427    */
1428   public boolean rebuildMeta(boolean fix) throws IOException,
1429       InterruptedException {
1430
1431     // TODO check to make sure hbase is offline. (or at least the table
1432     // currently being worked on is off line)
1433
1434     // Determine what's on HDFS
1435     LOG.info("Loading HBase regioninfo from HDFS...");
1436     loadHdfsRegionDirs(); // populating regioninfo table.
1437
1438     int errs = errors.getErrorList().size();
1439     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1440     checkHdfsIntegrity(false, false);
1441
1442     // make sure ok.
1443     if (errors.getErrorList().size() != errs) {
1444       // While in error state, iterate until no more fixes possible
1445       while(true) {
1446         fixes = 0;
1447         suggestFixes(tablesInfo);
1448         errors.clear();
1449         loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1450         checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1451
1452         int errCount = errors.getErrorList().size();
1453
1454         if (fixes == 0) {
1455           if (errCount > 0) {
1456             return false; // failed to fix problems.
1457           } else {
1458             break; // no fixes and no problems? drop out and fix stuff!
1459           }
1460         }
1461       }
1462     }
1463
1464     // we can rebuild, move old meta out of the way and start
1465     LOG.info("HDFS regioninfo's seems good.  Sidelining old hbase:meta");
1466     Path backupDir = sidelineOldMeta();
1467
1468     LOG.info("Creating new hbase:meta");
1469     HRegion meta = createNewMeta();
1470
1471     // populate meta
1472     List<Put> puts = generatePuts(tablesInfo);
1473     if (puts == null) {
1474       LOG.fatal("Problem encountered when creating new hbase:meta entries.  " +
1475         "You may need to restore the previously sidelined hbase:meta");
1476       return false;
1477     }
1478     meta.batchMutate(puts.toArray(new Put[puts.size()]), HConstants.NO_NONCE, HConstants.NO_NONCE);
1479     meta.close();
1480     if (meta.getWAL() != null) {
1481       meta.getWAL().close();
1482     }
1483     LOG.info("Success! hbase:meta table rebuilt.");
1484     LOG.info("Old hbase:meta is moved into " + backupDir);
1485     return true;
1486   }
1487
1488   /**
1489    * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1490    */
1491   private void logParallelMerge() {
1492     if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1493       LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1494           " false to run serially.");
1495     } else {
1496       LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
1497           " true to run in parallel.");
1498     }
1499   }
1500
1501   private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1502       boolean fixOverlaps) throws IOException {
1503     LOG.info("Checking HBase region split map from HDFS data...");
1504     logParallelMerge();
1505     for (TableInfo tInfo : tablesInfo.values()) {
1506       TableIntegrityErrorHandler handler;
1507       if (fixHoles || fixOverlaps) {
1508         handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1509           fixHoles, fixOverlaps);
1510       } else {
1511         handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1512       }
1513       if (!tInfo.checkRegionChain(handler)) {
1514         // should dump info as well.
1515         errors.report("Found inconsistency in table " + tInfo.getName());
1516       }
1517     }
1518     return tablesInfo;
1519   }
1520
1521   private Path getSidelineDir() throws IOException {
1522     if (sidelineDir == null) {
1523       Path hbaseDir = FSUtils.getRootDir(getConf());
1524       Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1525       sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1526           + startMillis);
1527     }
1528     return sidelineDir;
1529   }
1530
1531   /**
1532    * Sideline a region dir (instead of deleting it)
1533    */
1534   Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1535     return sidelineRegionDir(fs, null, hi);
1536   }
1537
1538   /**
1539    * Sideline a region dir (instead of deleting it)
1540    *
1541    * @param parentDir if specified, the region will be sidelined to folder like
1542    *     {@literal .../parentDir/<table name>/<region name>}. The purpose is to group together
1543    *     similar regions sidelined, for example, those regions should be bulk loaded back later
1544    *     on. If NULL, it is ignored.
1545    */
1546   Path sidelineRegionDir(FileSystem fs,
1547       String parentDir, HbckInfo hi) throws IOException {
1548     TableName tableName = hi.getTableName();
1549     Path regionDir = hi.getHdfsRegionDir();
1550
1551     if (!fs.exists(regionDir)) {
1552       LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1553       return null;
1554     }
1555
1556     Path rootDir = getSidelineDir();
1557     if (parentDir != null) {
1558       rootDir = new Path(rootDir, parentDir);
1559     }
1560     Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1561     Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1562     fs.mkdirs(sidelineRegionDir);
1563     boolean success = false;
1564     FileStatus[] cfs =  fs.listStatus(regionDir);
1565     if (cfs == null) {
1566       LOG.info("Region dir is empty: " + regionDir);
1567     } else {
1568       for (FileStatus cf : cfs) {
1569         Path src = cf.getPath();
1570         Path dst =  new Path(sidelineRegionDir, src.getName());
1571         if (fs.isFile(src)) {
1572           // simple file
1573           success = fs.rename(src, dst);
1574           if (!success) {
1575             String msg = "Unable to rename file " + src +  " to " + dst;
1576             LOG.error(msg);
1577             throw new IOException(msg);
1578           }
1579           continue;
1580         }
1581
1582         // is a directory.
1583         fs.mkdirs(dst);
1584
1585         LOG.info("Sidelining files from " + src + " into containing region " + dst);
1586         // FileSystem.rename is inconsistent with directories -- if the
1587         // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1588         // it moves the src into the dst dir resulting in (foo/a/b).  If
1589         // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1590         FileStatus[] hfiles = fs.listStatus(src);
1591         if (hfiles != null && hfiles.length > 0) {
1592           for (FileStatus hfile : hfiles) {
1593             success = fs.rename(hfile.getPath(), dst);
1594             if (!success) {
1595               String msg = "Unable to rename file " + src +  " to " + dst;
1596               LOG.error(msg);
1597               throw new IOException(msg);
1598             }
1599           }
1600         }
1601         LOG.debug("Sideline directory contents:");
1602         debugLsr(sidelineRegionDir);
1603       }
1604     }
1605
1606     LOG.info("Removing old region dir: " + regionDir);
1607     success = fs.delete(regionDir, true);
1608     if (!success) {
1609       String msg = "Unable to delete dir " + regionDir;
1610       LOG.error(msg);
1611       throw new IOException(msg);
1612     }
1613     return sidelineRegionDir;
1614   }
1615
1616   /**
1617    * Side line an entire table.
1618    */
1619   void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1620       Path backupHbaseDir) throws IOException {
1621     Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1622     if (fs.exists(tableDir)) {
1623       Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1624       fs.mkdirs(backupTableDir.getParent());
1625       boolean success = fs.rename(tableDir, backupTableDir);
1626       if (!success) {
1627         throw new IOException("Failed to move  " + tableName + " from "
1628             +  tableDir + " to " + backupTableDir);
1629       }
1630     } else {
1631       LOG.info("No previous " + tableName +  " exists.  Continuing.");
1632     }
1633   }
1634
1635   /**
1636    * @return Path to backup of original directory
1637    */
1638   Path sidelineOldMeta() throws IOException {
1639     // put current hbase:meta aside.
1640     Path hbaseDir = FSUtils.getRootDir(getConf());
1641     FileSystem fs = hbaseDir.getFileSystem(getConf());
1642     Path backupDir = getSidelineDir();
1643     fs.mkdirs(backupDir);
1644
1645     try {
1646       sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1647     } catch (IOException e) {
1648         LOG.fatal("... failed to sideline meta. Currently in inconsistent state.  To restore "
1649             + "try to rename hbase:meta in " + backupDir.getName() + " to "
1650             + hbaseDir.getName() + ".", e);
1651       throw e; // throw original exception
1652     }
1653     return backupDir;
1654   }
1655
1656   /**
1657    * Load the list of disabled tables in ZK into local set.
1658    * @throws ZooKeeperConnectionException
1659    * @throws IOException
1660    */
1661   private void loadTableStates()
1662   throws IOException {
1663     tableStates = MetaTableAccessor.getTableStates(connection);
1664   }
1665
1666   /**
1667    * Check if the specified region's table is disabled.
1668    * @param tableName table to check status of
1669    */
1670   private boolean isTableDisabled(TableName tableName) {
1671     return tableStates.containsKey(tableName)
1672         && tableStates.get(tableName)
1673         .inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1674   }
1675
1676   /**
1677    * Scan HDFS for all regions, recording their information into
1678    * regionInfoMap
1679    */
1680   public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1681     Path rootDir = FSUtils.getRootDir(getConf());
1682     FileSystem fs = rootDir.getFileSystem(getConf());
1683
1684     // list all tables from HDFS
1685     List<FileStatus> tableDirs = Lists.newArrayList();
1686
1687     boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1688
1689     List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1690     for (Path path : paths) {
1691       TableName tableName = FSUtils.getTableName(path);
1692        if ((!checkMetaOnly &&
1693            isTableIncluded(tableName)) ||
1694            tableName.equals(TableName.META_TABLE_NAME)) {
1695          tableDirs.add(fs.getFileStatus(path));
1696        }
1697     }
1698
1699     // verify that version file exists
1700     if (!foundVersionFile) {
1701       errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1702           "Version file does not exist in root dir " + rootDir);
1703       if (shouldFixVersionFile()) {
1704         LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1705             + " file.");
1706         setShouldRerun();
1707         FSUtils.setVersion(fs, rootDir, getConf().getInt(
1708             HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1709             HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1710             HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1711       }
1712     }
1713
1714     // level 1:  <HBASE_DIR>/*
1715     List<WorkItemHdfsDir> dirs = new ArrayList<WorkItemHdfsDir>(tableDirs.size());
1716     List<Future<Void>> dirsFutures;
1717
1718     for (FileStatus tableDir : tableDirs) {
1719       LOG.debug("Loading region dirs from " +tableDir.getPath());
1720       dirs.add(new WorkItemHdfsDir(this, fs, errors, tableDir));
1721     }
1722
1723     // Invoke and wait for Callables to complete
1724     dirsFutures = executor.invokeAll(dirs);
1725
1726     for(Future<Void> f: dirsFutures) {
1727       try {
1728         f.get();
1729       } catch(ExecutionException e) {
1730         LOG.warn("Could not load region dir " , e.getCause());
1731       }
1732     }
1733     errors.print("");
1734   }
1735
1736   /**
1737    * Record the location of the hbase:meta region as found in ZooKeeper.
1738    */
1739   private boolean recordMetaRegion() throws IOException {
1740     RegionLocations rl = ((ClusterConnection)connection).locateRegion(TableName.META_TABLE_NAME,
1741         HConstants.EMPTY_START_ROW, false, false);
1742     if (rl == null) {
1743       errors.reportError(ERROR_CODE.NULL_META_REGION,
1744           "META region was not found in ZooKeeper");
1745       return false;
1746     }
1747     for (HRegionLocation metaLocation : rl.getRegionLocations()) {
1748       // Check if Meta region is valid and existing
1749       if (metaLocation == null ) {
1750         errors.reportError(ERROR_CODE.NULL_META_REGION,
1751             "META region location is null");
1752         return false;
1753       }
1754       if (metaLocation.getRegionInfo() == null) {
1755         errors.reportError(ERROR_CODE.NULL_META_REGION,
1756             "META location regionInfo is null");
1757         return false;
1758       }
1759       if (metaLocation.getHostname() == null) {
1760         errors.reportError(ERROR_CODE.NULL_META_REGION,
1761             "META location hostName is null");
1762         return false;
1763       }
1764       ServerName sn = metaLocation.getServerName();
1765       MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, EnvironmentEdgeManager.currentTime());
1766       HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1767       if (hbckInfo == null) {
1768         regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1769       } else {
1770         hbckInfo.metaEntry = m;
1771       }
1772     }
1773     return true;
1774   }
1775
1776   private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1777     return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1778       @Override
1779       public void abort(String why, Throwable e) {
1780         LOG.error(why, e);
1781         System.exit(1);
1782       }
1783
1784       @Override
1785       public boolean isAborted() {
1786         return false;
1787       }
1788
1789     });
1790   }
1791
1792   private ServerName getMetaRegionServerName(int replicaId)
1793   throws IOException, KeeperException {
1794     return new MetaTableLocator().getMetaRegionLocation(zkw, replicaId);
1795   }
1796
1797   /**
1798    * Contacts each regionserver and fetches metadata about regions.
1799    * @param regionServerList - the list of region servers to connect to
1800    * @throws IOException if a remote or network exception occurs
1801    */
1802   void processRegionServers(Collection<ServerName> regionServerList)
1803     throws IOException, InterruptedException {
1804
1805     List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1806     List<Future<Void>> workFutures;
1807
1808     // loop to contact each region server in parallel
1809     for (ServerName rsinfo: regionServerList) {
1810       workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1811     }
1812
1813     workFutures = executor.invokeAll(workItems);
1814
1815     for(int i=0; i<workFutures.size(); i++) {
1816       WorkItemRegion item = workItems.get(i);
1817       Future<Void> f = workFutures.get(i);
1818       try {
1819         f.get();
1820       } catch(ExecutionException e) {
1821         LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1822             e.getCause());
1823       }
1824     }
1825   }
1826
1827   /**
1828    * Check consistency of all regions that have been found in previous phases.
1829    */
1830   private void checkAndFixConsistency()
1831   throws IOException, KeeperException, InterruptedException {
1832     // Divide the checks in two phases. One for default/primary replicas and another
1833     // for the non-primary ones. Keeps code cleaner this way.
1834
1835     List<CheckRegionConsistencyWorkItem> workItems =
1836         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1837     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1838       if (e.getValue().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
1839         workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1840       }
1841     }
1842     checkRegionConsistencyConcurrently(workItems);
1843
1844     boolean prevHdfsCheck = shouldCheckHdfs();
1845     setCheckHdfs(false); //replicas don't have any hdfs data
1846     // Run a pass over the replicas and fix any assignment issues that exist on the currently
1847     // deployed/undeployed replicas.
1848     List<CheckRegionConsistencyWorkItem> replicaWorkItems =
1849         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1850     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1851       if (e.getValue().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
1852         replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1853       }
1854     }
1855     checkRegionConsistencyConcurrently(replicaWorkItems);
1856     setCheckHdfs(prevHdfsCheck);
1857
1858     // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
1859     // not get accurate state of the hbase if continuing. The config here allows users to tune
1860     // the tolerance of number of skipped region.
1861     // TODO: evaluate the consequence to continue the hbck operation without config.
1862     int terminateThreshold =  getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
1863     int numOfSkippedRegions = skippedRegions.size();
1864     if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
1865       throw new IOException(numOfSkippedRegions
1866         + " region(s) could not be checked or repaired.  See logs for detail.");
1867     }
1868
1869     if (shouldCheckHdfs()) {
1870       checkAndFixTableStates();
1871     }
1872   }
1873
1874   /**
1875    * Check consistency of all regions using mulitple threads concurrently.
1876    */
1877   private void checkRegionConsistencyConcurrently(
1878     final List<CheckRegionConsistencyWorkItem> workItems)
1879     throws IOException, KeeperException, InterruptedException {
1880     if (workItems.isEmpty()) {
1881       return;  // nothing to check
1882     }
1883
1884     List<Future<Void>> workFutures = executor.invokeAll(workItems);
1885     for(Future<Void> f: workFutures) {
1886       try {
1887         f.get();
1888       } catch(ExecutionException e1) {
1889         LOG.warn("Could not check region consistency " , e1.getCause());
1890         if (e1.getCause() instanceof IOException) {
1891           throw (IOException)e1.getCause();
1892         } else if (e1.getCause() instanceof KeeperException) {
1893           throw (KeeperException)e1.getCause();
1894         } else if (e1.getCause() instanceof InterruptedException) {
1895           throw (InterruptedException)e1.getCause();
1896         } else {
1897           throw new IOException(e1.getCause());
1898         }
1899       }
1900     }
1901   }
1902
1903   class CheckRegionConsistencyWorkItem implements Callable<Void> {
1904     private final String key;
1905     private final HbckInfo hbi;
1906
1907     CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
1908       this.key = key;
1909       this.hbi = hbi;
1910     }
1911
1912     @Override
1913     public synchronized Void call() throws Exception {
1914       try {
1915         checkRegionConsistency(key, hbi);
1916       } catch (Exception e) {
1917         // If the region is non-META region, skip this region and send warning/error message; if
1918         // the region is META region, we should not continue.
1919         LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()
1920           + "'.", e);
1921         if (hbi.getHdfsHRI().isMetaRegion()) {
1922           throw e;
1923         }
1924         LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
1925         addSkippedRegion(hbi);
1926       }
1927       return null;
1928     }
1929   }
1930
1931   private void addSkippedRegion(final HbckInfo hbi) {
1932     Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
1933     if (skippedRegionNames == null) {
1934       skippedRegionNames = new HashSet<String>();
1935     }
1936     skippedRegionNames.add(hbi.getRegionNameAsString());
1937     skippedRegions.put(hbi.getTableName(), skippedRegionNames);
1938   }
1939
1940   /**
1941    * Check and fix table states, assumes full info available:
1942    * - tableInfos
1943    * - empty tables loaded
1944    */
1945   private void checkAndFixTableStates() throws IOException {
1946     // first check dangling states
1947     for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
1948       TableName tableName = entry.getKey();
1949       TableState tableState = entry.getValue();
1950       TableInfo tableInfo = tablesInfo.get(tableName);
1951       if (isTableIncluded(tableName)
1952           && !tableName.isSystemTable()
1953           && tableInfo == null) {
1954         if (fixMeta) {
1955           MetaTableAccessor.deleteTableState(connection, tableName);
1956           TableState state = MetaTableAccessor.getTableState(connection, tableName);
1957           if (state != null) {
1958             errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1959                 tableName + " unable to delete dangling table state " + tableState);
1960           }
1961         } else {
1962           errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1963               tableName + " has dangling table state " + tableState);
1964         }
1965       }
1966     }
1967     // check that all tables have states
1968     for (TableName tableName : tablesInfo.keySet()) {
1969       if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
1970         if (fixMeta) {
1971           MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
1972           TableState newState = MetaTableAccessor.getTableState(connection, tableName);
1973           if (newState == null) {
1974             errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1975                 "Unable to change state for table " + tableName + " in meta ");
1976           }
1977         } else {
1978           errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1979               tableName + " has no state in meta ");
1980         }
1981       }
1982     }
1983   }
1984
1985   private void preCheckPermission() throws IOException, AccessDeniedException {
1986     if (shouldIgnorePreCheckPermission()) {
1987       return;
1988     }
1989
1990     Path hbaseDir = FSUtils.getRootDir(getConf());
1991     FileSystem fs = hbaseDir.getFileSystem(getConf());
1992     UserProvider userProvider = UserProvider.instantiate(getConf());
1993     UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1994     FileStatus[] files = fs.listStatus(hbaseDir);
1995     for (FileStatus file : files) {
1996       try {
1997         FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1998       } catch (AccessDeniedException ace) {
1999         LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
2000         errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
2001           + " does not have write perms to " + file.getPath()
2002           + ". Please rerun hbck as hdfs user " + file.getOwner());
2003         throw ace;
2004       }
2005     }
2006   }
2007
2008   /**
2009    * Deletes region from meta table
2010    */
2011   private void deleteMetaRegion(HbckInfo hi) throws IOException {
2012     deleteMetaRegion(hi.metaEntry.getRegionName());
2013   }
2014
2015   /**
2016    * Deletes region from meta table
2017    */
2018   private void deleteMetaRegion(byte[] metaKey) throws IOException {
2019     Delete d = new Delete(metaKey);
2020     meta.delete(d);
2021     LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
2022   }
2023
2024   /**
2025    * Reset the split parent region info in meta table
2026    */
2027   private void resetSplitParent(HbckInfo hi) throws IOException {
2028     RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
2029     Delete d = new Delete(hi.metaEntry.getRegionName());
2030     d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
2031     d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
2032     mutations.add(d);
2033
2034     HRegionInfo hri = new HRegionInfo(hi.metaEntry);
2035     hri.setOffline(false);
2036     hri.setSplit(false);
2037     Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
2038     mutations.add(p);
2039
2040     meta.mutateRow(mutations);
2041     LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
2042   }
2043
2044   /**
2045    * This backwards-compatibility wrapper for permanently offlining a region
2046    * that should not be alive.  If the region server does not support the
2047    * "offline" method, it will use the closest unassign method instead.  This
2048    * will basically work until one attempts to disable or delete the affected
2049    * table.  The problem has to do with in-memory only master state, so
2050    * restarting the HMaster or failing over to another should fix this.
2051    */
2052   private void offline(byte[] regionName) throws IOException {
2053     String regionString = Bytes.toStringBinary(regionName);
2054     if (!rsSupportsOffline) {
2055       LOG.warn("Using unassign region " + regionString
2056           + " instead of using offline method, you should"
2057           + " restart HMaster after these repairs");
2058       admin.unassign(regionName, true);
2059       return;
2060     }
2061
2062     // first time we assume the rs's supports #offline.
2063     try {
2064       LOG.info("Offlining region " + regionString);
2065       admin.offline(regionName);
2066     } catch (IOException ioe) {
2067       String notFoundMsg = "java.lang.NoSuchMethodException: " +
2068         "org.apache.hadoop.hbase.master.HMaster.offline([B)";
2069       if (ioe.getMessage().contains(notFoundMsg)) {
2070         LOG.warn("Using unassign region " + regionString
2071             + " instead of using offline method, you should"
2072             + " restart HMaster after these repairs");
2073         rsSupportsOffline = false; // in the future just use unassign
2074         admin.unassign(regionName, true);
2075         return;
2076       }
2077       throw ioe;
2078     }
2079   }
2080
2081   private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
2082     undeployRegionsForHbi(hi);
2083     // undeploy replicas of the region (but only if the method is invoked for the primary)
2084     if (hi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2085       return;
2086     }
2087     int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2088     for (int i = 1; i < numReplicas; i++) {
2089       if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2090       HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
2091           hi.getPrimaryHRIForDeployedReplica(), i);
2092       HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2093       if (h != null) {
2094         undeployRegionsForHbi(h);
2095         //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2096         //in consistency checks
2097         h.setSkipChecks(true);
2098       }
2099     }
2100   }
2101
2102   private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {
2103     for (OnlineEntry rse : hi.deployedEntries) {
2104       LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
2105       try {
2106         HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);
2107         offline(rse.hri.getRegionName());
2108       } catch (IOException ioe) {
2109         LOG.warn("Got exception when attempting to offline region "
2110             + Bytes.toString(rse.hri.getRegionName()), ioe);
2111       }
2112     }
2113   }
2114
2115   /**
2116    * Attempts to undeploy a region from a region server based in information in
2117    * META.  Any operations that modify the file system should make sure that
2118    * its corresponding region is not deployed to prevent data races.
2119    *
2120    * A separate call is required to update the master in-memory region state
2121    * kept in the AssignementManager.  Because disable uses this state instead of
2122    * that found in META, we can't seem to cleanly disable/delete tables that
2123    * have been hbck fixed.  When used on a version of HBase that does not have
2124    * the offline ipc call exposed on the master (&lt;0.90.5, &lt;0.92.0) a master
2125    * restart or failover may be required.
2126    */
2127   private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
2128     if (hi.metaEntry == null && hi.hdfsEntry == null) {
2129       undeployRegions(hi);
2130       return;
2131     }
2132
2133     // get assignment info and hregioninfo from meta.
2134     Get get = new Get(hi.getRegionName());
2135     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2136     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2137     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2138     // also get the locations of the replicas to close if the primary region is being closed
2139     if (hi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2140       int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2141       for (int i = 0; i < numReplicas; i++) {
2142         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
2143         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
2144       }
2145     }
2146     Result r = meta.get(get);
2147     RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2148     if (rl == null) {
2149       LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2150           " since meta does not have handle to reach it");
2151       return;
2152     }
2153     for (HRegionLocation h : rl.getRegionLocations()) {
2154       ServerName serverName = h.getServerName();
2155       if (serverName == null) {
2156         errors.reportError("Unable to close region "
2157             + hi.getRegionNameAsString() +  " because meta does not "
2158             + "have handle to reach it.");
2159         continue;
2160       }
2161       HRegionInfo hri = h.getRegionInfo();
2162       if (hri == null) {
2163         LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2164             + " because hbase:meta had invalid or missing "
2165             + HConstants.CATALOG_FAMILY_STR + ":"
2166             + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2167             + " qualifier value.");
2168         continue;
2169       }
2170       // close the region -- close files and remove assignment
2171       HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2172     }
2173   }
2174
2175   private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
2176     KeeperException, InterruptedException {
2177     // If we are trying to fix the errors
2178     if (shouldFixAssignments()) {
2179       errors.print(msg);
2180       undeployRegions(hbi);
2181       setShouldRerun();
2182       HRegionInfo hri = hbi.getHdfsHRI();
2183       if (hri == null) {
2184         hri = hbi.metaEntry;
2185       }
2186       HBaseFsckRepair.fixUnassigned(admin, hri);
2187       HBaseFsckRepair.waitUntilAssigned(admin, hri);
2188
2189       // also assign replicas if needed (do it only when this call operates on a primary replica)
2190       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) return;
2191       int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();
2192       for (int i = 1; i < replicationCount; i++) {
2193         hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2194         HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2195         if (h != null) {
2196           undeployRegions(h);
2197           //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2198           //in consistency checks
2199           h.setSkipChecks(true);
2200         }
2201         HBaseFsckRepair.fixUnassigned(admin, hri);
2202         HBaseFsckRepair.waitUntilAssigned(admin, hri);
2203       }
2204
2205     }
2206   }
2207
2208   /**
2209    * Check a single region for consistency and correct deployment.
2210    */
2211   private void checkRegionConsistency(final String key, final HbckInfo hbi)
2212   throws IOException, KeeperException, InterruptedException {
2213
2214     if (hbi.isSkipChecks()) return;
2215     String descriptiveName = hbi.toString();
2216     boolean inMeta = hbi.metaEntry != null;
2217     // In case not checking HDFS, assume the region is on HDFS
2218     boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2219     boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
2220     boolean isDeployed = !hbi.deployedOn.isEmpty();
2221     boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
2222     boolean deploymentMatchesMeta =
2223       hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2224       hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
2225     boolean splitParent =
2226         inMeta && hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
2227     boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry.getTable());
2228     boolean recentlyModified = inHdfs &&
2229       hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2230
2231     // ========== First the healthy cases =============
2232     if (hbi.containsOnlyHdfsEdits()) {
2233       return;
2234     }
2235     if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2236       return;
2237     } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2238       LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2239         "tabled that is not deployed");
2240       return;
2241     } else if (recentlyModified) {
2242       LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2243       return;
2244     }
2245     // ========== Cases where the region is not in hbase:meta =============
2246     else if (!inMeta && !inHdfs && !isDeployed) {
2247       // We shouldn't have record of this region at all then!
2248       assert false : "Entry for region with no data";
2249     } else if (!inMeta && !inHdfs && isDeployed) {
2250       errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2251           + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2252           "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2253       if (shouldFixAssignments()) {
2254         undeployRegions(hbi);
2255       }
2256
2257     } else if (!inMeta && inHdfs && !isDeployed) {
2258       if (hbi.isMerged()) {
2259         // This region has already been merged, the remaining hdfs file will be
2260         // cleaned by CatalogJanitor later
2261         hbi.setSkipChecks(true);
2262         LOG.info("Region " + descriptiveName
2263             + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2264         return;
2265       }
2266       errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2267           + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2268           "or deployed on any region server");
2269       // restore region consistency of an adopted orphan
2270       if (shouldFixMeta()) {
2271         if (!hbi.isHdfsRegioninfoPresent()) {
2272           LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2273               +  " in table integrity repair phase if -fixHdfsOrphans was" +
2274               " used.");
2275           return;
2276         }
2277
2278         HRegionInfo hri = hbi.getHdfsHRI();
2279         TableInfo tableInfo = tablesInfo.get(hri.getTable());
2280
2281         for (HRegionInfo region : tableInfo.getRegionsFromMeta()) {
2282           if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2283               && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2284                 hri.getEndKey()) >= 0)
2285               && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2286             if(region.isSplit() || region.isOffline()) continue;
2287             Path regionDir = hbi.getHdfsRegionDir();
2288             FileSystem fs = regionDir.getFileSystem(getConf());
2289             List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2290             for (Path familyDir : familyDirs) {
2291               List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2292               for (Path referenceFilePath : referenceFilePaths) {
2293                 Path parentRegionDir =
2294                     StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2295                 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2296                   LOG.warn(hri + " start and stop keys are in the range of " + region
2297                       + ". The region might not be cleaned up from hdfs when region " + region
2298                       + " split failed. Hence deleting from hdfs.");
2299                   HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2300                     regionDir.getParent(), hri);
2301                   return;
2302                 }
2303               }
2304             }
2305           }
2306         }
2307         LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2308         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2309         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2310             admin.getClusterStatus().getServers(), numReplicas);
2311
2312         tryAssignmentRepair(hbi, "Trying to reassign region...");
2313       }
2314
2315     } else if (!inMeta && inHdfs && isDeployed) {
2316       errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2317           + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2318       debugLsr(hbi.getHdfsRegionDir());
2319       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2320         // for replicas, this means that we should undeploy the region (we would have
2321         // gone over the primaries and fixed meta holes in first phase under
2322         // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2323         // this stage unless unwanted replica)
2324         if (shouldFixAssignments()) {
2325           undeployRegionsForHbi(hbi);
2326         }
2327       }
2328       if (shouldFixMeta() && hbi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2329         if (!hbi.isHdfsRegioninfoPresent()) {
2330           LOG.error("This should have been repaired in table integrity repair phase");
2331           return;
2332         }
2333
2334         LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2335         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2336         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2337             admin.getClusterStatus().getServers(), numReplicas);
2338         tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2339       }
2340
2341     // ========== Cases where the region is in hbase:meta =============
2342     } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2343       // check whether this is an actual error, or just transient state where parent
2344       // is not cleaned
2345       if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2346         // check that split daughters are there
2347         HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2348         HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2349         if (infoA != null && infoB != null) {
2350           // we already processed or will process daughters. Move on, nothing to see here.
2351           hbi.setSkipChecks(true);
2352           return;
2353         }
2354       }
2355       errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2356           + descriptiveName + " is a split parent in META, in HDFS, "
2357           + "and not deployed on any region server. This could be transient.");
2358       if (shouldFixSplitParents()) {
2359         setShouldRerun();
2360         resetSplitParent(hbi);
2361       }
2362     } else if (inMeta && !inHdfs && !isDeployed) {
2363       errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2364           + descriptiveName + " found in META, but not in HDFS "
2365           + "or deployed on any region server.");
2366       if (shouldFixMeta()) {
2367         deleteMetaRegion(hbi);
2368       }
2369     } else if (inMeta && !inHdfs && isDeployed) {
2370       errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2371           + " found in META, but not in HDFS, " +
2372           "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2373       // We treat HDFS as ground truth.  Any information in meta is transient
2374       // and equivalent data can be regenerated.  So, lets unassign and remove
2375       // these problems from META.
2376       if (shouldFixAssignments()) {
2377         errors.print("Trying to fix unassigned region...");
2378         undeployRegions(hbi);
2379       }
2380       if (shouldFixMeta()) {
2381         // wait for it to complete
2382         deleteMetaRegion(hbi);
2383       }
2384     } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2385       errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2386           + " not deployed on any region server.");
2387       tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2388     } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2389       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2390           "Region " + descriptiveName + " should not be deployed according " +
2391           "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2392       if (shouldFixAssignments()) {
2393         errors.print("Trying to close the region " + descriptiveName);
2394         setShouldRerun();
2395         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2396       }
2397     } else if (inMeta && inHdfs && isMultiplyDeployed) {
2398       errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2399           + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2400           + " but is multiply assigned to region servers " +
2401           Joiner.on(", ").join(hbi.deployedOn));
2402       // If we are trying to fix the errors
2403       if (shouldFixAssignments()) {
2404         errors.print("Trying to fix assignment error...");
2405         setShouldRerun();
2406         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2407       }
2408     } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2409       errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2410           + descriptiveName + " listed in hbase:meta on region server " +
2411           hbi.metaEntry.regionServer + " but found on region server " +
2412           hbi.deployedOn.get(0));
2413       // If we are trying to fix the errors
2414       if (shouldFixAssignments()) {
2415         errors.print("Trying to fix assignment error...");
2416         setShouldRerun();
2417         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2418         HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2419       }
2420     } else {
2421       errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2422           " is in an unforeseen state:" +
2423           " inMeta=" + inMeta +
2424           " inHdfs=" + inHdfs +
2425           " isDeployed=" + isDeployed +
2426           " isMultiplyDeployed=" + isMultiplyDeployed +
2427           " deploymentMatchesMeta=" + deploymentMatchesMeta +
2428           " shouldBeDeployed=" + shouldBeDeployed);
2429     }
2430   }
2431
2432   /**
2433    * Checks tables integrity. Goes over all regions and scans the tables.
2434    * Collects all the pieces for each table and checks if there are missing,
2435    * repeated or overlapping ones.
2436    * @throws IOException
2437    */
2438   SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2439     tablesInfo = new TreeMap<TableName,TableInfo> ();
2440     LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2441     for (HbckInfo hbi : regionInfoMap.values()) {
2442       // Check only valid, working regions
2443       if (hbi.metaEntry == null) {
2444         // this assumes that consistency check has run loadMetaEntry
2445         Path p = hbi.getHdfsRegionDir();
2446         if (p == null) {
2447           errors.report("No regioninfo in Meta or HDFS. " + hbi);
2448         }
2449
2450         // TODO test.
2451         continue;
2452       }
2453       if (hbi.metaEntry.regionServer == null) {
2454         errors.detail("Skipping region because no region server: " + hbi);
2455         continue;
2456       }
2457       if (hbi.metaEntry.isOffline()) {
2458         errors.detail("Skipping region because it is offline: " + hbi);
2459         continue;
2460       }
2461       if (hbi.containsOnlyHdfsEdits()) {
2462         errors.detail("Skipping region because it only contains edits" + hbi);
2463         continue;
2464       }
2465
2466       // Missing regionDir or over-deployment is checked elsewhere. Include
2467       // these cases in modTInfo, so we can evaluate those regions as part of
2468       // the region chain in META
2469       //if (hbi.foundRegionDir == null) continue;
2470       //if (hbi.deployedOn.size() != 1) continue;
2471       if (hbi.deployedOn.size() == 0) continue;
2472
2473       // We should be safe here
2474       TableName tableName = hbi.metaEntry.getTable();
2475       TableInfo modTInfo = tablesInfo.get(tableName);
2476       if (modTInfo == null) {
2477         modTInfo = new TableInfo(tableName);
2478       }
2479       for (ServerName server : hbi.deployedOn) {
2480         modTInfo.addServer(server);
2481       }
2482
2483       if (!hbi.isSkipChecks()) {
2484         modTInfo.addRegionInfo(hbi);
2485       }
2486
2487       tablesInfo.put(tableName, modTInfo);
2488     }
2489
2490     loadTableInfosForTablesWithNoRegion();
2491
2492     logParallelMerge();
2493     for (TableInfo tInfo : tablesInfo.values()) {
2494       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2495       if (!tInfo.checkRegionChain(handler)) {
2496         errors.report("Found inconsistency in table " + tInfo.getName());
2497       }
2498     }
2499     return tablesInfo;
2500   }
2501
2502   /** Loads table info's for tables that may not have been included, since there are no
2503    * regions reported for the table, but table dir is there in hdfs
2504    */
2505   private void loadTableInfosForTablesWithNoRegion() throws IOException {
2506     Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2507     for (HTableDescriptor htd : allTables.values()) {
2508       if (checkMetaOnly && !htd.isMetaTable()) {
2509         continue;
2510       }
2511
2512       TableName tableName = htd.getTableName();
2513       if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2514         TableInfo tableInfo = new TableInfo(tableName);
2515         tableInfo.htds.add(htd);
2516         tablesInfo.put(htd.getTableName(), tableInfo);
2517       }
2518     }
2519   }
2520
2521   /**
2522    * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2523    * @return number of file move fixes done to merge regions.
2524    */
2525   public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2526     int fileMoves = 0;
2527     String thread = Thread.currentThread().getName();
2528     LOG.debug("[" + thread + "] Contained region dir after close and pause");
2529     debugLsr(contained.getHdfsRegionDir());
2530
2531     // rename the contained into the container.
2532     FileSystem fs = targetRegionDir.getFileSystem(getConf());
2533     FileStatus[] dirs = null;
2534     try {
2535       dirs = fs.listStatus(contained.getHdfsRegionDir());
2536     } catch (FileNotFoundException fnfe) {
2537       // region we are attempting to merge in is not present!  Since this is a merge, there is
2538       // no harm skipping this region if it does not exist.
2539       if (!fs.exists(contained.getHdfsRegionDir())) {
2540         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2541             + " is missing. Assuming already sidelined or moved.");
2542       } else {
2543         sidelineRegionDir(fs, contained);
2544       }
2545       return fileMoves;
2546     }
2547
2548     if (dirs == null) {
2549       if (!fs.exists(contained.getHdfsRegionDir())) {
2550         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2551             + " already sidelined.");
2552       } else {
2553         sidelineRegionDir(fs, contained);
2554       }
2555       return fileMoves;
2556     }
2557
2558     for (FileStatus cf : dirs) {
2559       Path src = cf.getPath();
2560       Path dst =  new Path(targetRegionDir, src.getName());
2561
2562       if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2563         // do not copy the old .regioninfo file.
2564         continue;
2565       }
2566
2567       if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2568         // do not copy the .oldlogs files
2569         continue;
2570       }
2571
2572       LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2573       // FileSystem.rename is inconsistent with directories -- if the
2574       // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2575       // it moves the src into the dst dir resulting in (foo/a/b).  If
2576       // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2577       for (FileStatus hfile : fs.listStatus(src)) {
2578         boolean success = fs.rename(hfile.getPath(), dst);
2579         if (success) {
2580           fileMoves++;
2581         }
2582       }
2583       LOG.debug("[" + thread + "] Sideline directory contents:");
2584       debugLsr(targetRegionDir);
2585     }
2586
2587     // if all success.
2588     sidelineRegionDir(fs, contained);
2589     LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2590         getSidelineDir());
2591     debugLsr(contained.getHdfsRegionDir());
2592
2593     return fileMoves;
2594   }
2595
2596
2597   static class WorkItemOverlapMerge implements Callable<Void> {
2598     private TableIntegrityErrorHandler handler;
2599     Collection<HbckInfo> overlapgroup;
2600
2601     WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2602       this.handler = handler;
2603       this.overlapgroup = overlapgroup;
2604     }
2605
2606     @Override
2607     public Void call() throws Exception {
2608       handler.handleOverlapGroup(overlapgroup);
2609       return null;
2610     }
2611   };
2612
2613
2614   /**
2615    * Maintain information about a particular table.
2616    */
2617   public class TableInfo {
2618     TableName tableName;
2619     TreeSet <ServerName> deployedOn;
2620
2621     // backwards regions
2622     final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2623
2624     // sidelined big overlapped regions
2625     final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2626
2627     // region split calculator
2628     final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2629
2630     // Histogram of different HTableDescriptors found.  Ideally there is only one!
2631     final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2632
2633     // key = start split, values = set of splits in problem group
2634     final Multimap<byte[], HbckInfo> overlapGroups =
2635       TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2636
2637     // list of regions derived from meta entries.
2638     private ImmutableList<HRegionInfo> regionsFromMeta = null;
2639
2640     TableInfo(TableName name) {
2641       this.tableName = name;
2642       deployedOn = new TreeSet <ServerName>();
2643     }
2644
2645     /**
2646      * @return descriptor common to all regions.  null if are none or multiple!
2647      */
2648     private HTableDescriptor getHTD() {
2649       if (htds.size() == 1) {
2650         return (HTableDescriptor)htds.toArray()[0];
2651       } else {
2652         LOG.error("None/Multiple table descriptors found for table '"
2653           + tableName + "' regions: " + htds);
2654       }
2655       return null;
2656     }
2657
2658     public void addRegionInfo(HbckInfo hir) {
2659       if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2660         // end key is absolute end key, just add it.
2661         // ignore replicas other than primary for these checks
2662         if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2663         return;
2664       }
2665
2666       // if not the absolute end key, check for cycle
2667       if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2668         errors.reportError(
2669             ERROR_CODE.REGION_CYCLE,
2670             String.format("The endkey for this region comes before the "
2671                 + "startkey, startkey=%s, endkey=%s",
2672                 Bytes.toStringBinary(hir.getStartKey()),
2673                 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2674         backwards.add(hir);
2675         return;
2676       }
2677
2678       // main case, add to split calculator
2679       // ignore replicas other than primary for these checks
2680       if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2681     }
2682
2683     public void addServer(ServerName server) {
2684       this.deployedOn.add(server);
2685     }
2686
2687     public TableName getName() {
2688       return tableName;
2689     }
2690
2691     public int getNumRegions() {
2692       return sc.getStarts().size() + backwards.size();
2693     }
2694
2695     public synchronized ImmutableList<HRegionInfo> getRegionsFromMeta() {
2696       // lazy loaded, synchronized to ensure a single load
2697       if (regionsFromMeta == null) {
2698         List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
2699         for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2700           if (tableName.equals(h.getTableName())) {
2701             if (h.metaEntry != null) {
2702               regions.add((HRegionInfo) h.metaEntry);
2703             }
2704           }
2705         }
2706         regionsFromMeta = Ordering.natural().immutableSortedCopy(regions);
2707       }
2708
2709       return regionsFromMeta;
2710     }
2711
2712     private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2713       ErrorReporter errors;
2714
2715       IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2716         this.errors = errors;
2717         setTableInfo(ti);
2718       }
2719
2720       @Override
2721       public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2722         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2723             "First region should start with an empty key.  You need to "
2724             + " create a new region and regioninfo in HDFS to plug the hole.",
2725             getTableInfo(), hi);
2726       }
2727
2728       @Override
2729       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2730         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2731             "Last region should end with an empty key. You need to "
2732                 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2733       }
2734
2735       @Override
2736       public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2737         errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2738             "Region has the same start and end key.", getTableInfo(), hi);
2739       }
2740
2741       @Override
2742       public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2743         byte[] key = r1.getStartKey();
2744         // dup start key
2745         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2746             "Multiple regions have the same startkey: "
2747             + Bytes.toStringBinary(key), getTableInfo(), r1);
2748         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2749             "Multiple regions have the same startkey: "
2750             + Bytes.toStringBinary(key), getTableInfo(), r2);
2751       }
2752
2753       @Override
2754       public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2755         errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2756             "There is an overlap in the region chain.",
2757             getTableInfo(), hi1, hi2);
2758       }
2759
2760       @Override
2761       public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2762         errors.reportError(
2763             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2764             "There is a hole in the region chain between "
2765                 + Bytes.toStringBinary(holeStart) + " and "
2766                 + Bytes.toStringBinary(holeStop)
2767                 + ".  You need to create a new .regioninfo and region "
2768                 + "dir in hdfs to plug the hole.");
2769       }
2770     };
2771
2772     /**
2773      * This handler fixes integrity errors from hdfs information.  There are
2774      * basically three classes of integrity problems 1) holes, 2) overlaps, and
2775      * 3) invalid regions.
2776      *
2777      * This class overrides methods that fix holes and the overlap group case.
2778      * Individual cases of particular overlaps are handled by the general
2779      * overlap group merge repair case.
2780      *
2781      * If hbase is online, this forces regions offline before doing merge
2782      * operations.
2783      */
2784     private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2785       Configuration conf;
2786
2787       boolean fixOverlaps = true;
2788
2789       HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2790           boolean fixHoles, boolean fixOverlaps) {
2791         super(ti, errors);
2792         this.conf = conf;
2793         this.fixOverlaps = fixOverlaps;
2794         // TODO properly use fixHoles
2795       }
2796
2797       /**
2798        * This is a special case hole -- when the first region of a table is
2799        * missing from META, HBase doesn't acknowledge the existance of the
2800        * table.
2801        */
2802       @Override
2803       public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2804         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2805             "First region should start with an empty key.  Creating a new " +
2806             "region and regioninfo in HDFS to plug the hole.",
2807             getTableInfo(), next);
2808         HTableDescriptor htd = getTableInfo().getHTD();
2809         // from special EMPTY_START_ROW to next region's startKey
2810         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2811             HConstants.EMPTY_START_ROW, next.getStartKey());
2812
2813         // TODO test
2814         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2815         LOG.info("Table region start key was not empty.  Created new empty region: "
2816             + newRegion + " " +region);
2817         fixes++;
2818       }
2819
2820       @Override
2821       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2822         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2823             "Last region should end with an empty key.  Creating a new "
2824                 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2825         HTableDescriptor htd = getTableInfo().getHTD();
2826         // from curEndKey to EMPTY_START_ROW
2827         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2828             HConstants.EMPTY_START_ROW);
2829
2830         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2831         LOG.info("Table region end key was not empty.  Created new empty region: " + newRegion
2832             + " " + region);
2833         fixes++;
2834       }
2835
2836       /**
2837        * There is a hole in the hdfs regions that violates the table integrity
2838        * rules.  Create a new empty region that patches the hole.
2839        */
2840       @Override
2841       public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2842         errors.reportError(
2843             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2844             "There is a hole in the region chain between "
2845                 + Bytes.toStringBinary(holeStartKey) + " and "
2846                 + Bytes.toStringBinary(holeStopKey)
2847                 + ".  Creating a new regioninfo and region "
2848                 + "dir in hdfs to plug the hole.");
2849         HTableDescriptor htd = getTableInfo().getHTD();
2850         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2851         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2852         LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2853         fixes++;
2854       }
2855
2856       /**
2857        * This takes set of overlapping regions and merges them into a single
2858        * region.  This covers cases like degenerate regions, shared start key,
2859        * general overlaps, duplicate ranges, and partial overlapping regions.
2860        *
2861        * Cases:
2862        * - Clean regions that overlap
2863        * - Only .oldlogs regions (can't find start/stop range, or figure out)
2864        *
2865        * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
2866        */
2867       @Override
2868       public void handleOverlapGroup(Collection<HbckInfo> overlap)
2869           throws IOException {
2870         Preconditions.checkNotNull(overlap);
2871         Preconditions.checkArgument(overlap.size() >0);
2872
2873         if (!this.fixOverlaps) {
2874           LOG.warn("Not attempting to repair overlaps.");
2875           return;
2876         }
2877
2878         if (overlap.size() > maxMerge) {
2879           LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2880             "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2881           if (sidelineBigOverlaps) {
2882             // we only sideline big overlapped groups that exceeds the max number of regions to merge
2883             sidelineBigOverlaps(overlap);
2884           }
2885           return;
2886         }
2887
2888         mergeOverlaps(overlap);
2889       }
2890
2891       void mergeOverlaps(Collection<HbckInfo> overlap)
2892           throws IOException {
2893         String thread = Thread.currentThread().getName();
2894         LOG.info("== [" + thread + "] Merging regions into one region: "
2895           + Joiner.on(",").join(overlap));
2896         // get the min / max range and close all concerned regions
2897         Pair<byte[], byte[]> range = null;
2898         for (HbckInfo hi : overlap) {
2899           if (range == null) {
2900             range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2901           } else {
2902             if (RegionSplitCalculator.BYTES_COMPARATOR
2903                 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2904               range.setFirst(hi.getStartKey());
2905             }
2906             if (RegionSplitCalculator.BYTES_COMPARATOR
2907                 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2908               range.setSecond(hi.getEndKey());
2909             }
2910           }
2911           // need to close files so delete can happen.
2912           LOG.debug("[" + thread + "] Closing region before moving data around: " +  hi);
2913           LOG.debug("[" + thread + "] Contained region dir before close");
2914           debugLsr(hi.getHdfsRegionDir());
2915           try {
2916             LOG.info("[" + thread + "] Closing region: " + hi);
2917             closeRegion(hi);
2918           } catch (IOException ioe) {
2919             LOG.warn("[" + thread + "] Was unable to close region " + hi
2920               + ".  Just continuing... ", ioe);
2921           } catch (InterruptedException e) {
2922             LOG.warn("[" + thread + "] Was unable to close region " + hi
2923               + ".  Just continuing... ", e);
2924           }
2925
2926           try {
2927             LOG.info("[" + thread + "] Offlining region: " + hi);
2928             offline(hi.getRegionName());
2929           } catch (IOException ioe) {
2930             LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2931               + ".  Just continuing... ", ioe);
2932           }
2933         }
2934
2935         // create new empty container region.
2936         HTableDescriptor htd = getTableInfo().getHTD();
2937         // from start key to end Key
2938         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2939             range.getSecond());
2940         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2941         LOG.info("[" + thread + "] Created new empty container region: " +
2942             newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2943         debugLsr(region.getRegionFileSystem().getRegionDir());
2944
2945         // all target regions are closed, should be able to safely cleanup.
2946         boolean didFix= false;
2947         Path target = region.getRegionFileSystem().getRegionDir();
2948         for (HbckInfo contained : overlap) {
2949           LOG.info("[" + thread + "] Merging " + contained  + " into " + target );
2950           int merges = mergeRegionDirs(target, contained);
2951           if (merges > 0) {
2952             didFix = true;
2953           }
2954         }
2955         if (didFix) {
2956           fixes++;
2957         }
2958       }
2959
2960       /**
2961        * Sideline some regions in a big overlap group so that it
2962        * will have fewer regions, and it is easier to merge them later on.
2963        *
2964        * @param bigOverlap the overlapped group with regions more than maxMerge
2965        * @throws IOException
2966        */
2967       void sidelineBigOverlaps(
2968           Collection<HbckInfo> bigOverlap) throws IOException {
2969         int overlapsToSideline = bigOverlap.size() - maxMerge;
2970         if (overlapsToSideline > maxOverlapsToSideline) {
2971           overlapsToSideline = maxOverlapsToSideline;
2972         }
2973         List<HbckInfo> regionsToSideline =
2974           RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2975         FileSystem fs = FileSystem.get(conf);
2976         for (HbckInfo regionToSideline: regionsToSideline) {
2977           try {
2978             LOG.info("Closing region: " + regionToSideline);
2979             closeRegion(regionToSideline);
2980           } catch (IOException ioe) {
2981             LOG.warn("Was unable to close region " + regionToSideline
2982               + ".  Just continuing... ", ioe);
2983           } catch (InterruptedException e) {
2984             LOG.warn("Was unable to close region " + regionToSideline
2985               + ".  Just continuing... ", e);
2986           }
2987
2988           try {
2989             LOG.info("Offlining region: " + regionToSideline);
2990             offline(regionToSideline.getRegionName());
2991           } catch (IOException ioe) {
2992             LOG.warn("Unable to offline region from master: " + regionToSideline
2993               + ".  Just continuing... ", ioe);
2994           }
2995
2996           LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2997           Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2998           if (sidelineRegionDir != null) {
2999             sidelinedRegions.put(sidelineRegionDir, regionToSideline);
3000             LOG.info("After sidelined big overlapped region: "
3001               + regionToSideline.getRegionNameAsString()
3002               + " to " + sidelineRegionDir.toString());
3003             fixes++;
3004           }
3005         }
3006       }
3007     }
3008
3009     /**
3010      * Check the region chain (from META) of this table.  We are looking for
3011      * holes, overlaps, and cycles.
3012      * @return false if there are errors
3013      * @throws IOException
3014      */
3015     public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
3016       // When table is disabled no need to check for the region chain. Some of the regions
3017       // accidently if deployed, this below code might report some issues like missing start
3018       // or end regions or region hole in chain and may try to fix which is unwanted.
3019       if (isTableDisabled(this.tableName)) {
3020         return true;
3021       }
3022       int originalErrorsCount = errors.getErrorList().size();
3023       Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
3024       SortedSet<byte[]> splits = sc.getSplits();
3025
3026       byte[] prevKey = null;
3027       byte[] problemKey = null;
3028
3029       if (splits.size() == 0) {
3030         // no region for this table
3031         handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
3032       }
3033
3034       for (byte[] key : splits) {
3035         Collection<HbckInfo> ranges = regions.get(key);
3036         if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
3037           for (HbckInfo rng : ranges) {
3038             handler.handleRegionStartKeyNotEmpty(rng);
3039           }
3040         }
3041
3042         // check for degenerate ranges
3043         for (HbckInfo rng : ranges) {
3044           // special endkey case converts '' to null
3045           byte[] endKey = rng.getEndKey();
3046           endKey = (endKey.length == 0) ? null : endKey;
3047           if (Bytes.equals(rng.getStartKey(),endKey)) {
3048             handler.handleDegenerateRegion(rng);
3049           }
3050         }
3051
3052         if (ranges.size() == 1) {
3053           // this split key is ok -- no overlap, not a hole.
3054           if (problemKey != null) {
3055             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3056           }
3057           problemKey = null; // fell through, no more problem.
3058         } else if (ranges.size() > 1) {
3059           // set the new problem key group name, if already have problem key, just
3060           // keep using it.
3061           if (problemKey == null) {
3062             // only for overlap regions.
3063             LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
3064             problemKey = key;
3065           }
3066           overlapGroups.putAll(problemKey, ranges);
3067
3068           // record errors
3069           ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
3070           //  this dumb and n^2 but this shouldn't happen often
3071           for (HbckInfo r1 : ranges) {
3072             if (r1.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
3073             subRange.remove(r1);
3074             for (HbckInfo r2 : subRange) {
3075               if (r2.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
3076               if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
3077                 handler.handleDuplicateStartKeys(r1,r2);
3078               } else {
3079                 // overlap
3080                 handler.handleOverlapInRegionChain(r1, r2);
3081               }
3082             }
3083           }
3084
3085         } else if (ranges.size() == 0) {
3086           if (problemKey != null) {
3087             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3088           }
3089           problemKey = null;
3090
3091           byte[] holeStopKey = sc.getSplits().higher(key);
3092           // if higher key is null we reached the top.
3093           if (holeStopKey != null) {
3094             // hole
3095             handler.handleHoleInRegionChain(key, holeStopKey);
3096           }
3097         }
3098         prevKey = key;
3099       }
3100
3101       // When the last region of a table is proper and having an empty end key, 'prevKey'
3102       // will be null.
3103       if (prevKey != null) {
3104         handler.handleRegionEndKeyNotEmpty(prevKey);
3105       }
3106
3107       // TODO fold this into the TableIntegrityHandler
3108       if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
3109         boolean ok = handleOverlapsParallel(handler, prevKey);
3110         if (!ok) {
3111           return false;
3112         }
3113       } else {
3114         for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3115           handler.handleOverlapGroup(overlap);
3116         }
3117       }
3118
3119       if (details) {
3120         // do full region split map dump
3121         errors.print("---- Table '"  +  this.tableName
3122             + "': region split map");
3123         dump(splits, regions);
3124         errors.print("---- Table '"  +  this.tableName
3125             + "': overlap groups");
3126         dumpOverlapProblems(overlapGroups);
3127         errors.print("There are " + overlapGroups.keySet().size()
3128             + " overlap groups with " + overlapGroups.size()
3129             + " overlapping regions");
3130       }
3131       if (!sidelinedRegions.isEmpty()) {
3132         LOG.warn("Sidelined big overlapped regions, please bulk load them!");
3133         errors.print("---- Table '"  +  this.tableName
3134             + "': sidelined big overlapped regions");
3135         dumpSidelinedRegions(sidelinedRegions);
3136       }
3137       return errors.getErrorList().size() == originalErrorsCount;
3138     }
3139
3140     private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
3141         throws IOException {
3142       // we parallelize overlap handler for the case we have lots of groups to fix.  We can
3143       // safely assume each group is independent.
3144       List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
3145       List<Future<Void>> rets;
3146       for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3147         //
3148         merges.add(new WorkItemOverlapMerge(overlap, handler));
3149       }
3150       try {
3151         rets = executor.invokeAll(merges);
3152       } catch (InterruptedException e) {
3153         LOG.error("Overlap merges were interrupted", e);
3154         return false;
3155       }
3156       for(int i=0; i<merges.size(); i++) {
3157         WorkItemOverlapMerge work = merges.get(i);
3158         Future<Void> f = rets.get(i);
3159         try {
3160           f.get();
3161         } catch(ExecutionException e) {
3162           LOG.warn("Failed to merge overlap group" + work, e.getCause());
3163         } catch (InterruptedException e) {
3164           LOG.error("Waiting for overlap merges was interrupted", e);
3165           return false;
3166         }
3167       }
3168       return true;
3169     }
3170
3171     /**
3172      * This dumps data in a visually reasonable way for visual debugging
3173      *
3174      * @param splits
3175      * @param regions
3176      */
3177     void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
3178       // we display this way because the last end key should be displayed as well.
3179       StringBuilder sb = new StringBuilder();
3180       for (byte[] k : splits) {
3181         sb.setLength(0); // clear out existing buffer, if any.
3182         sb.append(Bytes.toStringBinary(k) + ":\t");
3183         for (HbckInfo r : regions.get(k)) {
3184           sb.append("[ "+ r.toString() + ", "
3185               + Bytes.toStringBinary(r.getEndKey())+ "]\t");
3186         }
3187         errors.print(sb.toString());
3188       }
3189     }
3190   }
3191
3192   public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
3193     // we display this way because the last end key should be displayed as
3194     // well.
3195     for (byte[] k : regions.keySet()) {
3196       errors.print(Bytes.toStringBinary(k) + ":");
3197       for (HbckInfo r : regions.get(k)) {
3198         errors.print("[ " + r.toString() + ", "
3199             + Bytes.toStringBinary(r.getEndKey()) + "]");
3200       }
3201       errors.print("----");
3202     }
3203   }
3204
3205   public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
3206     for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
3207       TableName tableName = entry.getValue().getTableName();
3208       Path path = entry.getKey();
3209       errors.print("This sidelined region dir should be bulk loaded: "
3210         + path.toString());
3211       errors.print("Bulk load command looks like: "
3212         + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
3213         + path.toUri().getPath() + " "+ tableName);
3214     }
3215   }
3216
3217   public Multimap<byte[], HbckInfo> getOverlapGroups(
3218       TableName table) {
3219     TableInfo ti = tablesInfo.get(table);
3220     return ti.overlapGroups;
3221   }
3222
3223   /**
3224    * Return a list of user-space table names whose metadata have not been
3225    * modified in the last few milliseconds specified by timelag
3226    * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
3227    * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
3228    * milliseconds specified by timelag, then the table is a candidate to be returned.
3229    * @return tables that have not been modified recently
3230    * @throws IOException if an error is encountered
3231    */
3232   HTableDescriptor[] getTables(AtomicInteger numSkipped) {
3233     List<TableName> tableNames = new ArrayList<TableName>();
3234     long now = EnvironmentEdgeManager.currentTime();
3235
3236     for (HbckInfo hbi : regionInfoMap.values()) {
3237       MetaEntry info = hbi.metaEntry;
3238
3239       // if the start key is zero, then we have found the first region of a table.
3240       // pick only those tables that were not modified in the last few milliseconds.
3241       if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
3242         if (info.modTime + timelag < now) {
3243           tableNames.add(info.getTable());
3244         } else {
3245           numSkipped.incrementAndGet(); // one more in-flux table
3246         }
3247       }
3248     }
3249     return getHTableDescriptors(tableNames);
3250   }
3251
3252   HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
3253     HTableDescriptor[] htd = new HTableDescriptor[0];
3254       LOG.info("getHTableDescriptors == tableNames => " + tableNames);
3255     try (Connection conn = ConnectionFactory.createConnection(getConf());
3256         Admin admin = conn.getAdmin()) {
3257       htd = admin.getTableDescriptorsByTableName(tableNames);
3258     } catch (IOException e) {
3259       LOG.debug("Exception getting table descriptors", e);
3260     }
3261     return htd;
3262   }
3263
3264   /**
3265    * Gets the entry in regionInfo corresponding to the the given encoded
3266    * region name. If the region has not been seen yet, a new entry is added
3267    * and returned.
3268    */
3269   private synchronized HbckInfo getOrCreateInfo(String name) {
3270     HbckInfo hbi = regionInfoMap.get(name);
3271     if (hbi == null) {
3272       hbi = new HbckInfo(null);
3273       regionInfoMap.put(name, hbi);
3274     }
3275     return hbi;
3276   }
3277
3278   private void checkAndFixTableLocks() throws IOException {
3279     TableLockChecker checker = new TableLockChecker(zkw, errors);
3280     checker.checkTableLocks();
3281
3282     if (this.fixTableLocks) {
3283       checker.fixExpiredTableLocks();
3284     }
3285   }
3286
3287   private void checkAndFixReplication() throws IOException {
3288     ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, connection, errors);
3289     checker.checkUnDeletedQueues();
3290
3291     if (checker.hasUnDeletedQueues() && this.fixReplication) {
3292       checker.fixUnDeletedQueues();
3293       setShouldRerun();
3294     }
3295   }
3296
3297   /**
3298     * Check values in regionInfo for hbase:meta
3299     * Check if zero or more than one regions with hbase:meta are found.
3300     * If there are inconsistencies (i.e. zero or more than one regions
3301     * pretend to be holding the hbase:meta) try to fix that and report an error.
3302     * @throws IOException from HBaseFsckRepair functions
3303     * @throws KeeperException
3304     * @throws InterruptedException
3305     */
3306   boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3307     Map<Integer, HbckInfo> metaRegions = new HashMap<Integer, HbckInfo>();
3308     for (HbckInfo value : regionInfoMap.values()) {
3309       if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3310         metaRegions.put(value.getReplicaId(), value);
3311       }
3312     }
3313     int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME)
3314         .getRegionReplication();
3315     boolean noProblem = true;
3316     // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
3317     // Check the deployed servers. It should be exactly one server for each replica.
3318     for (int i = 0; i < metaReplication; i++) {
3319       HbckInfo metaHbckInfo = metaRegions.remove(i);
3320       List<ServerName> servers = new ArrayList<ServerName>();
3321       if (metaHbckInfo != null) {
3322         servers = metaHbckInfo.deployedOn;
3323       }
3324       if (servers.size() != 1) {
3325         noProblem = false;
3326         if (servers.size() == 0) {
3327           assignMetaReplica(i);
3328         } else if (servers.size() > 1) {
3329           errors
3330           .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
3331                        metaHbckInfo.getReplicaId() + " is found on more than one region.");
3332           if (shouldFixAssignments()) {
3333             errors.print("Trying to fix a problem with hbase:meta, replicaId " +
3334                          metaHbckInfo.getReplicaId() +"..");
3335             setShouldRerun();
3336             // try fix it (treat is a dupe assignment)
3337             HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers);
3338           }
3339         }
3340       }
3341     }
3342     // unassign whatever is remaining in metaRegions. They are excess replicas.
3343     for (Map.Entry<Integer, HbckInfo> entry : metaRegions.entrySet()) {
3344       noProblem = false;
3345       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
3346           "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
3347           ", deployed " + metaRegions.size());
3348       if (shouldFixAssignments()) {
3349         errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
3350             " of hbase:meta..");
3351         setShouldRerun();
3352         unassignMetaReplica(entry.getValue());
3353       }
3354     }
3355     // if noProblem is false, rerun hbck with hopefully fixed META
3356     // if noProblem is true, no errors, so continue normally
3357     return noProblem;
3358   }
3359
3360   private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException,
3361   KeeperException {
3362     undeployRegions(hi);
3363     ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId()));
3364   }
3365
3366   private void assignMetaReplica(int replicaId)
3367       throws IOException, KeeperException, InterruptedException {
3368     errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
3369         replicaId +" is not found on any region.");
3370     if (shouldFixAssignments()) {
3371       errors.print("Trying to fix a problem with hbase:meta..");
3372       setShouldRerun();
3373       // try to fix it (treat it as unassigned region)
3374       HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
3375           HRegionInfo.FIRST_META_REGIONINFO, replicaId);
3376       HBaseFsckRepair.fixUnassigned(admin, h);
3377       HBaseFsckRepair.waitUntilAssigned(admin, h);
3378     }
3379   }
3380
3381   /**
3382    * Scan hbase:meta, adding all regions found to the regionInfo map.
3383    * @throws IOException if an error is encountered
3384    */
3385   boolean loadMetaEntries() throws IOException {
3386     MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() {
3387       int countRecord = 1;
3388
3389       // comparator to sort KeyValues with latest modtime
3390       final Comparator<Cell> comp = new Comparator<Cell>() {
3391         @Override
3392         public int compare(Cell k1, Cell k2) {
3393           return (int)(k1.getTimestamp() - k2.getTimestamp());
3394         }
3395       };
3396
3397       @Override
3398       public boolean visit(Result result) throws IOException {
3399         try {
3400
3401           // record the latest modification of this META record
3402           long ts =  Collections.max(result.listCells(), comp).getTimestamp();
3403           RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
3404           if (rl == null) {
3405             emptyRegionInfoQualifiers.add(result);
3406             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3407               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3408             return true;
3409           }
3410           ServerName sn = null;
3411           if (rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID) == null ||
3412               rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) {
3413             emptyRegionInfoQualifiers.add(result);
3414             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3415               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3416             return true;
3417           }
3418           HRegionInfo hri = rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo();
3419           if (!(isTableIncluded(hri.getTable())
3420               || hri.isMetaRegion())) {
3421             return true;
3422           }
3423           PairOfSameType<HRegionInfo> daughters = MetaTableAccessor.getDaughterRegions(result);
3424           for (HRegionLocation h : rl.getRegionLocations()) {
3425             if (h == null || h.getRegionInfo() == null) {
3426               continue;
3427             }
3428             sn = h.getServerName();
3429             hri = h.getRegionInfo();
3430
3431             MetaEntry m = null;
3432             if (hri.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
3433               m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3434             } else {
3435               m = new MetaEntry(hri, sn, ts, null, null);
3436             }
3437             HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3438             if (previous == null) {
3439               regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3440             } else if (previous.metaEntry == null) {
3441               previous.metaEntry = m;
3442             } else {
3443               throw new IOException("Two entries in hbase:meta are same " + previous);
3444             }
3445           }
3446           PairOfSameType<HRegionInfo> mergeRegions = MetaTableAccessor.getMergeRegions(result);
3447           for (HRegionInfo mergeRegion : new HRegionInfo[] {
3448               mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3449             if (mergeRegion != null) {
3450               // This region is already been merged
3451               HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3452               hbInfo.setMerged(true);
3453             }
3454           }
3455
3456           // show proof of progress to the user, once for every 100 records.
3457           if (countRecord % 100 == 0) {
3458             errors.progress();
3459           }
3460           countRecord++;
3461           return true;
3462         } catch (RuntimeException e) {
3463           LOG.error("Result=" + result);
3464           throw e;
3465         }
3466       }
3467     };
3468     if (!checkMetaOnly) {
3469       // Scan hbase:meta to pick up user regions
3470       MetaTableAccessor.fullScanRegions(connection, visitor);
3471     }
3472
3473     errors.print("");
3474     return true;
3475   }
3476
3477   /**
3478    * Stores the regioninfo entries scanned from META
3479    */
3480   static class MetaEntry extends HRegionInfo {
3481     ServerName regionServer;   // server hosting this region
3482     long modTime;          // timestamp of most recent modification metadata
3483     HRegionInfo splitA, splitB; //split daughters
3484
3485     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
3486       this(rinfo, regionServer, modTime, null, null);
3487     }
3488
3489     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
3490         HRegionInfo splitA, HRegionInfo splitB) {
3491       super(rinfo);
3492       this.regionServer = regionServer;
3493       this.modTime = modTime;
3494       this.splitA = splitA;
3495       this.splitB = splitB;
3496     }
3497
3498     @Override
3499     public boolean equals(Object o) {
3500       boolean superEq = super.equals(o);
3501       if (!superEq) {
3502         return superEq;
3503       }
3504
3505       MetaEntry me = (MetaEntry) o;
3506       if (!regionServer.equals(me.regionServer)) {
3507         return false;
3508       }
3509       return (modTime == me.modTime);
3510     }
3511
3512     @Override
3513     public int hashCode() {
3514       int hash = Arrays.hashCode(getRegionName());
3515       hash ^= getRegionId();
3516       hash ^= Arrays.hashCode(getStartKey());
3517       hash ^= Arrays.hashCode(getEndKey());
3518       hash ^= Boolean.valueOf(isOffline()).hashCode();
3519       hash ^= getTable().hashCode();
3520       if (regionServer != null) {
3521         hash ^= regionServer.hashCode();
3522       }
3523       hash ^= modTime;
3524       return hash;
3525     }
3526   }
3527
3528   /**
3529    * Stores the regioninfo entries from HDFS
3530    */
3531   static class HdfsEntry {
3532     HRegionInfo hri;
3533     Path hdfsRegionDir = null;
3534     long hdfsRegionDirModTime  = 0;
3535     boolean hdfsRegioninfoFilePresent = false;
3536     boolean hdfsOnlyEdits = false;
3537   }
3538
3539   /**
3540    * Stores the regioninfo retrieved from Online region servers.
3541    */
3542   static class OnlineEntry {
3543     HRegionInfo hri;
3544     ServerName hsa;
3545
3546     @Override
3547     public String toString() {
3548       return hsa.toString() + ";" + hri.getRegionNameAsString();
3549     }
3550   }
3551
3552   /**
3553    * Maintain information about a particular region.  It gathers information
3554    * from three places -- HDFS, META, and region servers.
3555    */
3556   public static class HbckInfo implements KeyRange {
3557     private MetaEntry metaEntry = null; // info in META
3558     private HdfsEntry hdfsEntry = null; // info in HDFS
3559     private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
3560     private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
3561     private boolean skipChecks = false; // whether to skip further checks to this region info.
3562     private boolean isMerged = false;// whether this region has already been merged into another one
3563     private int deployedReplicaId = HRegionInfo.DEFAULT_REPLICA_ID;
3564     private HRegionInfo primaryHRIForDeployedReplica = null;
3565
3566     HbckInfo(MetaEntry metaEntry) {
3567       this.metaEntry = metaEntry;
3568     }
3569
3570     public synchronized int getReplicaId() {
3571       return metaEntry != null? metaEntry.getReplicaId(): deployedReplicaId;
3572     }
3573
3574     public synchronized void addServer(HRegionInfo hri, ServerName server) {
3575       OnlineEntry rse = new OnlineEntry() ;
3576       rse.hri = hri;
3577       rse.hsa = server;
3578       this.deployedEntries.add(rse);
3579       this.deployedOn.add(server);
3580       // save the replicaId that we see deployed in the cluster
3581       this.deployedReplicaId = hri.getReplicaId();
3582       this.primaryHRIForDeployedReplica =
3583           RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
3584     }
3585
3586     @Override
3587     public synchronized String toString() {
3588       StringBuilder sb = new StringBuilder();
3589       sb.append("{ meta => ");
3590       sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3591       sb.append( ", hdfs => " + getHdfsRegionDir());
3592       sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3593       sb.append( ", replicaId => " + getReplicaId());
3594       sb.append(" }");
3595       return sb.toString();
3596     }
3597
3598     @Override
3599     public byte[] getStartKey() {
3600       if (this.metaEntry != null) {
3601         return this.metaEntry.getStartKey();
3602       } else if (this.hdfsEntry != null) {
3603         return this.hdfsEntry.hri.getStartKey();
3604       } else {
3605         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3606         return null;
3607       }
3608     }
3609
3610     @Override
3611     public byte[] getEndKey() {
3612       if (this.metaEntry != null) {
3613         return this.metaEntry.getEndKey();
3614       } else if (this.hdfsEntry != null) {
3615         return this.hdfsEntry.hri.getEndKey();
3616       } else {
3617         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3618         return null;
3619       }
3620     }
3621
3622     public TableName getTableName() {
3623       if (this.metaEntry != null) {
3624         return this.metaEntry.getTable();
3625       } else if (this.hdfsEntry != null) {
3626         // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3627         // so we get the name from the Path
3628         Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3629         return FSUtils.getTableName(tableDir);
3630       } else {
3631         // return the info from the first online/deployed hri
3632         for (OnlineEntry e : deployedEntries) {
3633           return e.hri.getTable();
3634         }
3635         return null;
3636       }
3637     }
3638
3639     public String getRegionNameAsString() {
3640       if (metaEntry != null) {
3641         return metaEntry.getRegionNameAsString();
3642       } else if (hdfsEntry != null) {
3643         if (hdfsEntry.hri != null) {
3644           return hdfsEntry.hri.getRegionNameAsString();
3645         }
3646       } else {
3647         // return the info from the first online/deployed hri
3648         for (OnlineEntry e : deployedEntries) {
3649           return e.hri.getRegionNameAsString();
3650         }
3651       }
3652       return null;
3653     }
3654
3655     public byte[] getRegionName() {
3656       if (metaEntry != null) {
3657         return metaEntry.getRegionName();
3658       } else if (hdfsEntry != null) {
3659         return hdfsEntry.hri.getRegionName();
3660       } else {
3661         // return the info from the first online/deployed hri
3662         for (OnlineEntry e : deployedEntries) {
3663           return e.hri.getRegionName();
3664         }
3665         return null;
3666       }
3667     }
3668
3669     public HRegionInfo getPrimaryHRIForDeployedReplica() {
3670       return primaryHRIForDeployedReplica;
3671     }
3672
3673     Path getHdfsRegionDir() {
3674       if (hdfsEntry == null) {
3675         return null;
3676       }
3677       return hdfsEntry.hdfsRegionDir;
3678     }
3679
3680     boolean containsOnlyHdfsEdits() {
3681       if (hdfsEntry == null) {
3682         return false;
3683       }
3684       return hdfsEntry.hdfsOnlyEdits;
3685     }
3686
3687     boolean isHdfsRegioninfoPresent() {
3688       if (hdfsEntry == null) {
3689         return false;
3690       }
3691       return hdfsEntry.hdfsRegioninfoFilePresent;
3692     }
3693
3694     long getModTime() {
3695       if (hdfsEntry == null) {
3696         return 0;
3697       }
3698       return hdfsEntry.hdfsRegionDirModTime;
3699     }
3700
3701     HRegionInfo getHdfsHRI() {
3702       if (hdfsEntry == null) {
3703         return null;
3704       }
3705       return hdfsEntry.hri;
3706     }
3707
3708     public void setSkipChecks(boolean skipChecks) {
3709       this.skipChecks = skipChecks;
3710     }
3711
3712     public boolean isSkipChecks() {
3713       return skipChecks;
3714     }
3715
3716     public void setMerged(boolean isMerged) {
3717       this.isMerged = isMerged;
3718     }
3719
3720     public boolean isMerged() {
3721       return this.isMerged;
3722     }
3723   }
3724
3725   final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3726     @Override
3727     public int compare(HbckInfo l, HbckInfo r) {
3728       if (l == r) {
3729         // same instance
3730         return 0;
3731       }
3732
3733       int tableCompare = l.getTableName().compareTo(r.getTableName());
3734       if (tableCompare != 0) {
3735         return tableCompare;
3736       }
3737
3738       int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3739           l.getStartKey(), r.getStartKey());
3740       if (startComparison != 0) {
3741         return startComparison;
3742       }
3743
3744       // Special case for absolute endkey
3745       byte[] endKey = r.getEndKey();
3746       endKey = (endKey.length == 0) ? null : endKey;
3747       byte[] endKey2 = l.getEndKey();
3748       endKey2 = (endKey2.length == 0) ? null : endKey2;
3749       int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3750           endKey2,  endKey);
3751
3752       if (endComparison != 0) {
3753         return endComparison;
3754       }
3755
3756       // use regionId as tiebreaker.
3757       // Null is considered after all possible values so make it bigger.
3758       if (l.hdfsEntry == null && r.hdfsEntry == null) {
3759         return 0;
3760       }
3761       if (l.hdfsEntry == null && r.hdfsEntry != null) {
3762         return 1;
3763       }
3764       // l.hdfsEntry must not be null
3765       if (r.hdfsEntry == null) {
3766         return -1;
3767       }
3768       // both l.hdfsEntry and r.hdfsEntry must not be null.
3769       return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3770     }
3771   };
3772
3773   /**
3774    * Prints summary of all tables found on the system.
3775    */
3776   private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
3777     StringBuilder sb = new StringBuilder();
3778     int numOfSkippedRegions;
3779     errors.print("Summary:");
3780     for (TableInfo tInfo : tablesInfo.values()) {
3781       numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
3782           skippedRegions.get(tInfo.getName()).size() : 0;
3783
3784       if (errors.tableHasErrors(tInfo)) {
3785         errors.print("Table " + tInfo.getName() + " is inconsistent.");
3786       } else if (numOfSkippedRegions > 0){
3787         errors.print("Table " + tInfo.getName() + " is okay (with "
3788           + numOfSkippedRegions + " skipped regions).");
3789       }
3790       else {
3791         errors.print("Table " + tInfo.getName() + " is okay.");
3792       }
3793       errors.print("    Number of regions: " + tInfo.getNumRegions());
3794       if (numOfSkippedRegions > 0) {
3795         Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
3796         System.out.println("    Number of skipped regions: " + numOfSkippedRegions);
3797         System.out.println("      List of skipped regions:");
3798         for(String sr : skippedRegionStrings) {
3799           System.out.println("        " + sr);
3800         }
3801       }
3802       sb.setLength(0); // clear out existing buffer, if any.
3803       sb.append("    Deployed on: ");
3804       for (ServerName server : tInfo.deployedOn) {
3805         sb.append(" " + server.toString());
3806       }
3807       errors.print(sb.toString());
3808     }
3809   }
3810
3811   static ErrorReporter getErrorReporter(
3812       final Configuration conf) throws ClassNotFoundException {
3813     Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3814     return ReflectionUtils.newInstance(reporter, conf);
3815   }
3816
3817   public interface ErrorReporter {
3818     enum ERROR_CODE {
3819       UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3820       NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META,
3821       NOT_DEPLOYED,
3822       MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3823       FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3824       HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3825       ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3826       WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR, ORPHAN_TABLE_STATE,
3827       NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE
3828     }
3829     void clear();
3830     void report(String message);
3831     void reportError(String message);
3832     void reportError(ERROR_CODE errorCode, String message);
3833     void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3834     void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3835     void reportError(
3836       ERROR_CODE errorCode,
3837       String message,
3838       TableInfo table,
3839       HbckInfo info1,
3840       HbckInfo info2
3841     );
3842     int summarize();
3843     void detail(String details);
3844     ArrayList<ERROR_CODE> getErrorList();
3845     void progress();
3846     void print(String message);
3847     void resetErrors();
3848     boolean tableHasErrors(TableInfo table);
3849   }
3850
3851   static class PrintingErrorReporter implements ErrorReporter {
3852     public int errorCount = 0;
3853     private int showProgress;
3854     // How frequently calls to progress() will create output
3855     private static final int progressThreshold = 100;
3856
3857     Set<TableInfo> errorTables = new HashSet<TableInfo>();
3858
3859     // for use by unit tests to verify which errors were discovered
3860     private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3861
3862     @Override
3863     public void clear() {
3864       errorTables.clear();
3865       errorList.clear();
3866       errorCount = 0;
3867     }
3868
3869     @Override
3870     public synchronized void reportError(ERROR_CODE errorCode, String message) {
3871       if (errorCode == ERROR_CODE.WRONG_USAGE) {
3872         System.err.println(message);
3873         return;
3874       }
3875
3876       errorList.add(errorCode);
3877       if (!summary) {
3878         System.out.println("ERROR: " + message);
3879       }
3880       errorCount++;
3881       showProgress = 0;
3882     }
3883
3884     @Override
3885     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3886       errorTables.add(table);
3887       reportError(errorCode, message);
3888     }
3889
3890     @Override
3891     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3892                                          HbckInfo info) {
3893       errorTables.add(table);
3894       String reference = "(region " + info.getRegionNameAsString() + ")";
3895       reportError(errorCode, reference + " " + message);
3896     }
3897
3898     @Override
3899     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3900                                          HbckInfo info1, HbckInfo info2) {
3901       errorTables.add(table);
3902       String reference = "(regions " + info1.getRegionNameAsString()
3903           + " and " + info2.getRegionNameAsString() + ")";
3904       reportError(errorCode, reference + " " + message);
3905     }
3906
3907     @Override
3908     public synchronized void reportError(String message) {
3909       reportError(ERROR_CODE.UNKNOWN, message);
3910     }
3911
3912     /**
3913      * Report error information, but do not increment the error count.  Intended for cases
3914      * where the actual error would have been reported previously.
3915      * @param message
3916      */
3917     @Override
3918     public synchronized void report(String message) {
3919       if (! summary) {
3920         System.out.println("ERROR: " + message);
3921       }
3922       showProgress = 0;
3923     }
3924
3925     @Override
3926     public synchronized int summarize() {
3927       System.out.println(Integer.toString(errorCount) +
3928                          " inconsistencies detected.");
3929       if (errorCount == 0) {
3930         System.out.println("Status: OK");
3931         return 0;
3932       } else {
3933         System.out.println("Status: INCONSISTENT");
3934         return -1;
3935       }
3936     }
3937
3938     @Override
3939     public ArrayList<ERROR_CODE> getErrorList() {
3940       return errorList;
3941     }
3942
3943     @Override
3944     public synchronized void print(String message) {
3945       if (!summary) {
3946         System.out.println(message);
3947       }
3948     }
3949
3950     @Override
3951     public boolean tableHasErrors(TableInfo table) {
3952       return errorTables.contains(table);
3953     }
3954
3955     @Override
3956     public void resetErrors() {
3957       errorCount = 0;
3958     }
3959
3960     @Override
3961     public synchronized void detail(String message) {
3962       if (details) {
3963         System.out.println(message);
3964       }
3965       showProgress = 0;
3966     }
3967
3968     @Override
3969     public synchronized void progress() {
3970       if (showProgress++ == progressThreshold) {
3971         if (!summary) {
3972           System.out.print(".");
3973         }
3974         showProgress = 0;
3975       }
3976     }
3977   }
3978
3979   /**
3980    * Contact a region server and get all information from it
3981    */
3982   static class WorkItemRegion implements Callable<Void> {
3983     private final HBaseFsck hbck;
3984     private final ServerName rsinfo;
3985     private final ErrorReporter errors;
3986     private final ClusterConnection connection;
3987
3988     WorkItemRegion(HBaseFsck hbck, ServerName info,
3989                    ErrorReporter errors, ClusterConnection connection) {
3990       this.hbck = hbck;
3991       this.rsinfo = info;
3992       this.errors = errors;
3993       this.connection = connection;
3994     }
3995
3996     @Override
3997     public synchronized Void call() throws IOException {
3998       errors.progress();
3999       try {
4000         BlockingInterface server = connection.getAdmin(rsinfo);
4001
4002         // list all online regions from this region server
4003         List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
4004         regions = filterRegions(regions);
4005
4006         if (details) {
4007           errors.detail("RegionServer: " + rsinfo.getServerName() +
4008                            " number of regions: " + regions.size());
4009           for (HRegionInfo rinfo: regions) {
4010             errors.detail("  " + rinfo.getRegionNameAsString() +
4011                              " id: " + rinfo.getRegionId() +
4012                              " encoded_name: " + rinfo.getEncodedName() +
4013                              " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
4014                              " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
4015           }
4016         }
4017
4018         // check to see if the existence of this region matches the region in META
4019         for (HRegionInfo r:regions) {
4020           HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
4021           hbi.addServer(r, rsinfo);
4022         }
4023       } catch (IOException e) {          // unable to connect to the region server.
4024         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
4025           " Unable to fetch region information. " + e);
4026         throw e;
4027       }
4028       return null;
4029     }
4030
4031     private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
4032       List<HRegionInfo> ret = Lists.newArrayList();
4033       for (HRegionInfo hri : regions) {
4034         if (hri.isMetaTable() || (!hbck.checkMetaOnly
4035             && hbck.isTableIncluded(hri.getTable()))) {
4036           ret.add(hri);
4037         }
4038       }
4039       return ret;
4040     }
4041   }
4042
4043   /**
4044    * Contact hdfs and get all information about specified table directory into
4045    * regioninfo list.
4046    */
4047   static class WorkItemHdfsDir implements Callable<Void> {
4048     private HBaseFsck hbck;
4049     private FileStatus tableDir;
4050     private ErrorReporter errors;
4051     private FileSystem fs;
4052
4053     WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
4054                     FileStatus status) {
4055       this.hbck = hbck;
4056       this.fs = fs;
4057       this.tableDir = status;
4058       this.errors = errors;
4059     }
4060
4061     @Override
4062     public synchronized Void call() throws IOException {
4063       try {
4064         // level 2: <HBASE_DIR>/<table>/*
4065         FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
4066         for (FileStatus regionDir : regionDirs) {
4067           errors.progress();
4068           String encodedName = regionDir.getPath().getName();
4069           // ignore directories that aren't hexadecimal
4070           if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
4071             continue;
4072           }
4073
4074           LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
4075           HbckInfo hbi = hbck.getOrCreateInfo(encodedName);
4076           HdfsEntry he = new HdfsEntry();
4077           synchronized (hbi) {
4078             if (hbi.getHdfsRegionDir() != null) {
4079               errors.print("Directory " + encodedName + " duplicate??" +
4080                            hbi.getHdfsRegionDir());
4081             }
4082
4083             he.hdfsRegionDir = regionDir.getPath();
4084             he.hdfsRegionDirModTime = regionDir.getModificationTime();
4085             Path regioninfoFile = new Path(he.hdfsRegionDir, HRegionFileSystem.REGION_INFO_FILE);
4086             he.hdfsRegioninfoFilePresent = fs.exists(regioninfoFile);
4087             // we add to orphan list when we attempt to read .regioninfo
4088
4089             // Set a flag if this region contains only edits
4090             // This is special case if a region is left after split
4091             he.hdfsOnlyEdits = true;
4092             FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
4093             Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath());
4094             for (FileStatus subDir : subDirs) {
4095               errors.progress();
4096               String sdName = subDir.getPath().getName();
4097               if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
4098                 he.hdfsOnlyEdits = false;
4099                 break;
4100               }
4101             }
4102             hbi.hdfsEntry = he;
4103           }
4104         }
4105       } catch (IOException e) {
4106         // unable to connect to the region server.
4107         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
4108             + tableDir.getPath().getName()
4109             + " Unable to fetch region information. " + e);
4110         throw e;
4111       }
4112       return null;
4113     }
4114   }
4115
4116   /**
4117    * Contact hdfs and get all information about specified table directory into
4118    * regioninfo list.
4119    */
4120   static class WorkItemHdfsRegionInfo implements Callable<Void> {
4121     private HbckInfo hbi;
4122     private HBaseFsck hbck;
4123     private ErrorReporter errors;
4124
4125     WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
4126       this.hbi = hbi;
4127       this.hbck = hbck;
4128       this.errors = errors;
4129     }
4130
4131     @Override
4132     public synchronized Void call() throws IOException {
4133       // only load entries that haven't been loaded yet.
4134       if (hbi.getHdfsHRI() == null) {
4135         try {
4136           errors.progress();
4137           hbck.loadHdfsRegioninfo(hbi);
4138         } catch (IOException ioe) {
4139           String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
4140               + hbi.getTableName() + " in hdfs dir "
4141               + hbi.getHdfsRegionDir()
4142               + "!  It may be an invalid format or version file.  Treating as "
4143               + "an orphaned regiondir.";
4144           errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
4145           try {
4146             hbck.debugLsr(hbi.getHdfsRegionDir());
4147           } catch (IOException ioe2) {
4148             LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
4149             throw ioe2;
4150           }
4151           hbck.orphanHdfsDirs.add(hbi);
4152           throw ioe;
4153         }
4154       }
4155       return null;
4156     }
4157   };
4158
4159   /**
4160    * Display the full report from fsck. This displays all live and dead region
4161    * servers, and all known regions.
4162    */
4163   public static void setDisplayFullReport() {
4164     details = true;
4165   }
4166
4167   /**
4168    * Set exclusive mode.
4169    */
4170   public static void setForceExclusive() {
4171     forceExclusive = true;
4172   }
4173
4174   /**
4175    * Only one instance of hbck can modify HBase at a time.
4176    */
4177   public boolean isExclusive() {
4178     return fixAny || forceExclusive;
4179   }
4180
4181   /**
4182    * Disable the load balancer.
4183    */
4184   public static void setDisableBalancer() {
4185     disableBalancer = true;
4186   }
4187
4188   /**
4189    * Disable the split and merge
4190    */
4191   public static void setDisableSplitAndMerge() {
4192     setDisableSplitAndMerge(true);
4193   }
4194
4195   @VisibleForTesting
4196   public static void setDisableSplitAndMerge(boolean flag) {
4197     disableSplitAndMerge = flag;
4198   }
4199
4200   /**
4201    * The balancer should be disabled if we are modifying HBase.
4202    * It can be disabled if you want to prevent region movement from causing
4203    * false positives.
4204    */
4205   public boolean shouldDisableBalancer() {
4206     return fixAny || disableBalancer;
4207   }
4208
4209   /**
4210    * The split and merge should be disabled if we are modifying HBase.
4211    * It can be disabled if you want to prevent region movement from causing
4212    * false positives.
4213    */
4214   public boolean shouldDisableSplitAndMerge() {
4215     return fixAny || disableSplitAndMerge;
4216   }
4217
4218   /**
4219    * Set summary mode.
4220    * Print only summary of the tables and status (OK or INCONSISTENT)
4221    */
4222   static void setSummary() {
4223     summary = true;
4224   }
4225
4226   /**
4227    * Set hbase:meta check mode.
4228    * Print only info about hbase:meta table deployment/state
4229    */
4230   void setCheckMetaOnly() {
4231     checkMetaOnly = true;
4232   }
4233
4234   /**
4235    * Set region boundaries check mode.
4236    */
4237   void setRegionBoundariesCheck() {
4238     checkRegionBoundaries = true;
4239   }
4240
4241   /**
4242    * Set table locks fix mode.
4243    * Delete table locks held for a long time
4244    */
4245   public void setFixTableLocks(boolean shouldFix) {
4246     fixTableLocks = shouldFix;
4247     fixAny |= shouldFix;
4248   }
4249
4250   /**
4251    * Set replication fix mode.
4252    */
4253   public void setFixReplication(boolean shouldFix) {
4254     fixReplication = shouldFix;
4255     fixAny |= shouldFix;
4256   }
4257
4258   /**
4259    * Check if we should rerun fsck again. This checks if we've tried to
4260    * fix something and we should rerun fsck tool again.
4261    * Display the full report from fsck. This displays all live and dead
4262    * region servers, and all known regions.
4263    */
4264   void setShouldRerun() {
4265     rerun = true;
4266   }
4267
4268   boolean shouldRerun() {
4269     return rerun;
4270   }
4271
4272   /**
4273    * Fix inconsistencies found by fsck. This should try to fix errors (if any)
4274    * found by fsck utility.
4275    */
4276   public void setFixAssignments(boolean shouldFix) {
4277     fixAssignments = shouldFix;
4278     fixAny |= shouldFix;
4279   }
4280
4281   boolean shouldFixAssignments() {
4282     return fixAssignments;
4283   }
4284
4285   public void setFixMeta(boolean shouldFix) {
4286     fixMeta = shouldFix;
4287     fixAny |= shouldFix;
4288   }
4289
4290   boolean shouldFixMeta() {
4291     return fixMeta;
4292   }
4293
4294   public void setFixEmptyMetaCells(boolean shouldFix) {
4295     fixEmptyMetaCells = shouldFix;
4296     fixAny |= shouldFix;
4297   }
4298
4299   boolean shouldFixEmptyMetaCells() {
4300     return fixEmptyMetaCells;
4301   }
4302
4303   public void setCheckHdfs(boolean checking) {
4304     checkHdfs = checking;
4305   }
4306
4307   boolean shouldCheckHdfs() {
4308     return checkHdfs;
4309   }
4310
4311   public void setFixHdfsHoles(boolean shouldFix) {
4312     fixHdfsHoles = shouldFix;
4313     fixAny |= shouldFix;
4314   }
4315
4316   boolean shouldFixHdfsHoles() {
4317     return fixHdfsHoles;
4318   }
4319
4320   public void setFixTableOrphans(boolean shouldFix) {
4321     fixTableOrphans = shouldFix;
4322     fixAny |= shouldFix;
4323   }
4324
4325   boolean shouldFixTableOrphans() {
4326     return fixTableOrphans;
4327   }
4328
4329   public void setFixHdfsOverlaps(boolean shouldFix) {
4330     fixHdfsOverlaps = shouldFix;
4331     fixAny |= shouldFix;
4332   }
4333
4334   boolean shouldFixHdfsOverlaps() {
4335     return fixHdfsOverlaps;
4336   }
4337
4338   public void setFixHdfsOrphans(boolean shouldFix) {
4339     fixHdfsOrphans = shouldFix;
4340     fixAny |= shouldFix;
4341   }
4342
4343   boolean shouldFixHdfsOrphans() {
4344     return fixHdfsOrphans;
4345   }
4346
4347   public void setFixVersionFile(boolean shouldFix) {
4348     fixVersionFile = shouldFix;
4349     fixAny |= shouldFix;
4350   }
4351
4352   public boolean shouldFixVersionFile() {
4353     return fixVersionFile;
4354   }
4355
4356   public void setSidelineBigOverlaps(boolean sbo) {
4357     this.sidelineBigOverlaps = sbo;
4358   }
4359
4360   public boolean shouldSidelineBigOverlaps() {
4361     return sidelineBigOverlaps;
4362   }
4363
4364   public void setFixSplitParents(boolean shouldFix) {
4365     fixSplitParents = shouldFix;
4366     fixAny |= shouldFix;
4367   }
4368
4369   boolean shouldFixSplitParents() {
4370     return fixSplitParents;
4371   }
4372
4373   public void setFixReferenceFiles(boolean shouldFix) {
4374     fixReferenceFiles = shouldFix;
4375     fixAny |= shouldFix;
4376   }
4377
4378   boolean shouldFixReferenceFiles() {
4379     return fixReferenceFiles;
4380   }
4381
4382   public boolean shouldIgnorePreCheckPermission() {
4383     return !fixAny || ignorePreCheckPermission;
4384   }
4385
4386   public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
4387     this.ignorePreCheckPermission = ignorePreCheckPermission;
4388   }
4389
4390   /**
4391    * @param mm maximum number of regions to merge into a single region.
4392    */
4393   public void setMaxMerge(int mm) {
4394     this.maxMerge = mm;
4395   }
4396
4397   public int getMaxMerge() {
4398     return maxMerge;
4399   }
4400
4401   public void setMaxOverlapsToSideline(int mo) {
4402     this.maxOverlapsToSideline = mo;
4403   }
4404
4405   public int getMaxOverlapsToSideline() {
4406     return maxOverlapsToSideline;
4407   }
4408
4409   /**
4410    * Only check/fix tables specified by the list,
4411    * Empty list means all tables are included.
4412    */
4413   boolean isTableIncluded(TableName table) {
4414     return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
4415   }
4416
4417   public void includeTable(TableName table) {
4418     tablesIncluded.add(table);
4419   }
4420
4421   Set<TableName> getIncludedTables() {
4422     return new HashSet<TableName>(tablesIncluded);
4423   }
4424
4425   /**
4426    * We are interested in only those tables that have not changed their state in
4427    * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
4428    * @param seconds - the time in seconds
4429    */
4430   public void setTimeLag(long seconds) {
4431     timelag = seconds * 1000; // convert to milliseconds
4432   }
4433
4434   /**
4435    *
4436    * @param sidelineDir - HDFS path to sideline data
4437    */
4438   public void setSidelineDir(String sidelineDir) {
4439     this.sidelineDir = new Path(sidelineDir);
4440   }
4441
4442   protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4443     return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4444   }
4445
4446   public HFileCorruptionChecker getHFilecorruptionChecker() {
4447     return hfcc;
4448   }
4449
4450   public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4451     this.hfcc = hfcc;
4452   }
4453
4454   public void setRetCode(int code) {
4455     this.retcode = code;
4456   }
4457
4458   public int getRetCode() {
4459     return retcode;
4460   }
4461
4462   protected HBaseFsck printUsageAndExit() {
4463     StringWriter sw = new StringWriter(2048);
4464     PrintWriter out = new PrintWriter(sw);
4465     out.println("Usage: fsck [opts] {only tables}");
4466     out.println(" where [opts] are:");
4467     out.println("   -help Display help options (this)");
4468     out.println("   -details Display full report of all regions.");
4469     out.println("   -timelag <timeInSeconds>  Process only regions that " +
4470                        " have not experienced any metadata updates in the last " +
4471                        " <timeInSeconds> seconds.");
4472     out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4473         " before checking if the fix worked if run with -fix");
4474     out.println("   -summary Print only summary of the tables and status.");
4475     out.println("   -metaonly Only check the state of the hbase:meta table.");
4476     out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4477     out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
4478     out.println("   -exclusive Abort if another hbck is exclusive or fixing.");
4479     out.println("   -disableBalancer Disable the load balancer.");
4480
4481     out.println("");
4482     out.println("  Metadata Repair options: (expert features, use with caution!)");
4483     out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
4484     out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
4485     out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
4486     out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
4487         + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4488     out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
4489     out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
4490     out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4491     out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
4492     out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
4493     out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4494     out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
4495     out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4496     out.println("   -fixSplitParents  Try to force offline split parents to be online.");
4497     out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
4498     out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
4499     out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
4500         + " (empty REGIONINFO_QUALIFIER rows)");
4501
4502     out.println("");
4503     out.println("  Datafile Repair options: (expert features, use with caution!)");
4504     out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
4505     out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
4506
4507     out.println("");
4508     out.println("  Metadata Repair shortcuts");
4509     out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4510         "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles -fixTableLocks");
4511     out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4512
4513     out.println("");
4514     out.println("  Table lock options");
4515     out.println("   -fixTableLocks    Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
4516
4517     out.println("");
4518     out.println(" Replication options");
4519     out.println("   -fixReplication   Deletes replication queues for removed peers");
4520
4521     out.flush();
4522     errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4523
4524     setRetCode(-2);
4525     return this;
4526   }
4527
4528   /**
4529    * Main program
4530    *
4531    * @param args
4532    * @throws Exception
4533    */
4534   public static void main(String[] args) throws Exception {
4535     // create a fsck object
4536     Configuration conf = HBaseConfiguration.create();
4537     Path hbasedir = FSUtils.getRootDir(conf);
4538     URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4539     FSUtils.setFsDefault(conf, new Path(defaultFs));
4540     int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4541     System.exit(ret);
4542   }
4543
4544   /**
4545    * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
4546    */
4547   static class HBaseFsckTool extends Configured implements Tool {
4548     HBaseFsckTool(Configuration conf) { super(conf); }
4549     @Override
4550     public int run(String[] args) throws Exception {
4551       HBaseFsck hbck = new HBaseFsck(getConf());
4552       hbck.exec(hbck.executor, args);
4553       hbck.close();
4554       return hbck.getRetCode();
4555     }
4556   };
4557
4558
4559   public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
4560     ServiceException, InterruptedException {
4561     long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4562
4563     boolean checkCorruptHFiles = false;
4564     boolean sidelineCorruptHFiles = false;
4565
4566     // Process command-line args.
4567     for (int i = 0; i < args.length; i++) {
4568       String cmd = args[i];
4569       if (cmd.equals("-help") || cmd.equals("-h")) {
4570         return printUsageAndExit();
4571       } else if (cmd.equals("-details")) {
4572         setDisplayFullReport();
4573       } else if (cmd.equals("-exclusive")) {
4574         setForceExclusive();
4575       } else if (cmd.equals("-disableBalancer")) {
4576         setDisableBalancer();
4577       }  else if (cmd.equals("-disableSplitAndMerge")) {
4578         setDisableSplitAndMerge();
4579       } else if (cmd.equals("-timelag")) {
4580         if (i == args.length - 1) {
4581           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4582           return printUsageAndExit();
4583         }
4584         try {
4585           long timelag = Long.parseLong(args[i+1]);
4586           setTimeLag(timelag);
4587         } catch (NumberFormatException e) {
4588           errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4589           return printUsageAndExit();
4590         }
4591         i++;
4592       } else if (cmd.equals("-sleepBeforeRerun")) {
4593         if (i == args.length - 1) {
4594           errors.reportError(ERROR_CODE.WRONG_USAGE,
4595             "HBaseFsck: -sleepBeforeRerun needs a value.");
4596           return printUsageAndExit();
4597         }
4598         try {
4599           sleepBeforeRerun = Long.parseLong(args[i+1]);
4600         } catch (NumberFormatException e) {
4601           errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4602           return printUsageAndExit();
4603         }
4604         i++;
4605       } else if (cmd.equals("-sidelineDir")) {
4606         if (i == args.length - 1) {
4607           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4608           return printUsageAndExit();
4609         }
4610         i++;
4611         setSidelineDir(args[i]);
4612       } else if (cmd.equals("-fix")) {
4613         errors.reportError(ERROR_CODE.WRONG_USAGE,
4614           "This option is deprecated, please use  -fixAssignments instead.");
4615         setFixAssignments(true);
4616       } else if (cmd.equals("-fixAssignments")) {
4617         setFixAssignments(true);
4618       } else if (cmd.equals("-fixMeta")) {
4619         setFixMeta(true);
4620       } else if (cmd.equals("-noHdfsChecking")) {
4621         setCheckHdfs(false);
4622       } else if (cmd.equals("-fixHdfsHoles")) {
4623         setFixHdfsHoles(true);
4624       } else if (cmd.equals("-fixHdfsOrphans")) {
4625         setFixHdfsOrphans(true);
4626       } else if (cmd.equals("-fixTableOrphans")) {
4627         setFixTableOrphans(true);
4628       } else if (cmd.equals("-fixHdfsOverlaps")) {
4629         setFixHdfsOverlaps(true);
4630       } else if (cmd.equals("-fixVersionFile")) {
4631         setFixVersionFile(true);
4632       } else if (cmd.equals("-sidelineBigOverlaps")) {
4633         setSidelineBigOverlaps(true);
4634       } else if (cmd.equals("-fixSplitParents")) {
4635         setFixSplitParents(true);
4636       } else if (cmd.equals("-ignorePreCheckPermission")) {
4637         setIgnorePreCheckPermission(true);
4638       } else if (cmd.equals("-checkCorruptHFiles")) {
4639         checkCorruptHFiles = true;
4640       } else if (cmd.equals("-sidelineCorruptHFiles")) {
4641         sidelineCorruptHFiles = true;
4642       } else if (cmd.equals("-fixReferenceFiles")) {
4643         setFixReferenceFiles(true);
4644       } else if (cmd.equals("-fixEmptyMetaCells")) {
4645         setFixEmptyMetaCells(true);
4646       } else if (cmd.equals("-repair")) {
4647         // this attempts to merge overlapping hdfs regions, needs testing
4648         // under load
4649         setFixHdfsHoles(true);
4650         setFixHdfsOrphans(true);
4651         setFixMeta(true);
4652         setFixAssignments(true);
4653         setFixHdfsOverlaps(true);
4654         setFixVersionFile(true);
4655         setSidelineBigOverlaps(true);
4656         setFixSplitParents(false);
4657         setCheckHdfs(true);
4658         setFixReferenceFiles(true);
4659         setFixTableLocks(true);
4660       } else if (cmd.equals("-repairHoles")) {
4661         // this will make all missing hdfs regions available but may lose data
4662         setFixHdfsHoles(true);
4663         setFixHdfsOrphans(false);
4664         setFixMeta(true);
4665         setFixAssignments(true);
4666         setFixHdfsOverlaps(false);
4667         setSidelineBigOverlaps(false);
4668         setFixSplitParents(false);
4669         setCheckHdfs(true);
4670       } else if (cmd.equals("-maxOverlapsToSideline")) {
4671         if (i == args.length - 1) {
4672           errors.reportError(ERROR_CODE.WRONG_USAGE,
4673             "-maxOverlapsToSideline needs a numeric value argument.");
4674           return printUsageAndExit();
4675         }
4676         try {
4677           int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
4678           setMaxOverlapsToSideline(maxOverlapsToSideline);
4679         } catch (NumberFormatException e) {
4680           errors.reportError(ERROR_CODE.WRONG_USAGE,
4681             "-maxOverlapsToSideline needs a numeric value argument.");
4682           return printUsageAndExit();
4683         }
4684         i++;
4685       } else if (cmd.equals("-maxMerge")) {
4686         if (i == args.length - 1) {
4687           errors.reportError(ERROR_CODE.WRONG_USAGE,
4688             "-maxMerge needs a numeric value argument.");
4689           return printUsageAndExit();
4690         }
4691         try {
4692           int maxMerge = Integer.parseInt(args[i+1]);
4693           setMaxMerge(maxMerge);
4694         } catch (NumberFormatException e) {
4695           errors.reportError(ERROR_CODE.WRONG_USAGE,
4696             "-maxMerge needs a numeric value argument.");
4697           return printUsageAndExit();
4698         }
4699         i++;
4700       } else if (cmd.equals("-summary")) {
4701         setSummary();
4702       } else if (cmd.equals("-metaonly")) {
4703         setCheckMetaOnly();
4704       } else if (cmd.equals("-boundaries")) {
4705         setRegionBoundariesCheck();
4706       } else if (cmd.equals("-fixTableLocks")) {
4707         setFixTableLocks(true);
4708       } else if (cmd.equals("-fixReplication")) {
4709         setFixReplication(true);
4710       } else if (cmd.startsWith("-")) {
4711         errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
4712         return printUsageAndExit();
4713       } else {
4714         includeTable(TableName.valueOf(cmd));
4715         errors.print("Allow checking/fixes for table: " + cmd);
4716       }
4717     }
4718
4719     errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
4720
4721     // pre-check current user has FS write permission or not
4722     try {
4723       preCheckPermission();
4724     } catch (AccessDeniedException ace) {
4725       Runtime.getRuntime().exit(-1);
4726     } catch (IOException ioe) {
4727       Runtime.getRuntime().exit(-1);
4728     }
4729
4730     // do the real work of hbck
4731     connect();
4732
4733     try {
4734       // if corrupt file mode is on, first fix them since they may be opened later
4735       if (checkCorruptHFiles || sidelineCorruptHFiles) {
4736         LOG.info("Checking all hfiles for corruption");
4737         HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
4738         setHFileCorruptionChecker(hfcc); // so we can get result
4739         Collection<TableName> tables = getIncludedTables();
4740         Collection<Path> tableDirs = new ArrayList<Path>();
4741         Path rootdir = FSUtils.getRootDir(getConf());
4742         if (tables.size() > 0) {
4743           for (TableName t : tables) {
4744             tableDirs.add(FSUtils.getTableDir(rootdir, t));
4745           }
4746         } else {
4747           tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
4748         }
4749         hfcc.checkTables(tableDirs);
4750         hfcc.report(errors);
4751       }
4752
4753       // check and fix table integrity, region consistency.
4754       int code = onlineHbck();
4755       setRetCode(code);
4756       // If we have changed the HBase state it is better to run hbck again
4757       // to see if we haven't broken something else in the process.
4758       // We run it only once more because otherwise we can easily fall into
4759       // an infinite loop.
4760       if (shouldRerun()) {
4761         try {
4762           LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
4763           Thread.sleep(sleepBeforeRerun);
4764         } catch (InterruptedException ie) {
4765           LOG.warn("Interrupted while sleeping");
4766           return this;
4767         }
4768         // Just report
4769         setFixAssignments(false);
4770         setFixMeta(false);
4771         setFixHdfsHoles(false);
4772         setFixHdfsOverlaps(false);
4773         setFixVersionFile(false);
4774         setFixTableOrphans(false);
4775         errors.resetErrors();
4776         code = onlineHbck();
4777         setRetCode(code);
4778       }
4779     } finally {
4780       IOUtils.closeQuietly(this);
4781     }
4782     return this;
4783   }
4784
4785   /**
4786    * ls -r for debugging purposes
4787    */
4788   void debugLsr(Path p) throws IOException {
4789     debugLsr(getConf(), p, errors);
4790   }
4791
4792   /**
4793    * ls -r for debugging purposes
4794    */
4795   public static void debugLsr(Configuration conf,
4796       Path p) throws IOException {
4797     debugLsr(conf, p, new PrintingErrorReporter());
4798   }
4799
4800   /**
4801    * ls -r for debugging purposes
4802    */
4803   public static void debugLsr(Configuration conf,
4804       Path p, ErrorReporter errors) throws IOException {
4805     if (!LOG.isDebugEnabled() || p == null) {
4806       return;
4807     }
4808     FileSystem fs = p.getFileSystem(conf);
4809
4810     if (!fs.exists(p)) {
4811       // nothing
4812       return;
4813     }
4814     errors.print(p.toString());
4815
4816     if (fs.isFile(p)) {
4817       return;
4818     }
4819
4820     if (fs.getFileStatus(p).isDirectory()) {
4821       FileStatus[] fss= fs.listStatus(p);
4822       for (FileStatus status : fss) {
4823         debugLsr(conf, status.getPath(), errors);
4824       }
4825     }
4826   }
4827 }