View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import com.google.common.base.Joiner;
21  import com.google.common.base.Preconditions;
22  import com.google.common.collect.ImmutableList;
23  import com.google.common.collect.Lists;
24  import com.google.common.collect.Multimap;
25  import com.google.common.collect.Ordering;
26  import com.google.common.collect.TreeMultimap;
27  import com.google.protobuf.ServiceException;
28
29  import java.io.Closeable;
30  import java.io.FileNotFoundException;
31  import java.io.IOException;
32  import java.io.InterruptedIOException;
33  import java.io.PrintWriter;
34  import java.io.StringWriter;
35  import java.net.InetAddress;
36  import java.net.URI;
37  import java.util.ArrayList;
38  import java.util.Arrays;
39  import java.util.Collection;
40  import java.util.Collections;
41  import java.util.Comparator;
42  import java.util.HashMap;
43  import java.util.HashSet;
44  import java.util.Iterator;
45  import java.util.List;
46  import java.util.Locale;
47  import java.util.Map;
48  import java.util.Map.Entry;
49  import java.util.Set;
50  import java.util.SortedMap;
51  import java.util.SortedSet;
52  import java.util.TreeMap;
53  import java.util.TreeSet;
54  import java.util.Vector;
55  import java.util.concurrent.Callable;
56  import java.util.concurrent.ConcurrentSkipListMap;
57  import java.util.concurrent.ExecutionException;
58  import java.util.concurrent.ExecutorService;
59  import java.util.concurrent.Executors;
60  import java.util.concurrent.Future;
61  import java.util.concurrent.FutureTask;
62  import java.util.concurrent.ScheduledThreadPoolExecutor;
63  import java.util.concurrent.TimeUnit;
64  import java.util.concurrent.TimeoutException;
65  import java.util.concurrent.atomic.AtomicBoolean;
66  import java.util.concurrent.atomic.AtomicInteger;
67
68  import org.apache.commons.io.IOUtils;
69  import org.apache.commons.lang.RandomStringUtils;
70  import org.apache.commons.lang.StringUtils;
71  import org.apache.commons.logging.Log;
72  import org.apache.commons.logging.LogFactory;
73  import org.apache.hadoop.conf.Configuration;
74  import org.apache.hadoop.conf.Configured;
75  import org.apache.hadoop.fs.FSDataOutputStream;
76  import org.apache.hadoop.fs.FileStatus;
77  import org.apache.hadoop.fs.FileSystem;
78  import org.apache.hadoop.fs.Path;
79  import org.apache.hadoop.fs.permission.FsAction;
80  import org.apache.hadoop.fs.permission.FsPermission;
81  import org.apache.hadoop.hbase.Abortable;
82  import org.apache.hadoop.hbase.Cell;
83  import org.apache.hadoop.hbase.CellUtil;
84  import org.apache.hadoop.hbase.ClusterStatus;
85  import org.apache.hadoop.hbase.HBaseConfiguration;
86  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
87  import org.apache.hadoop.hbase.HColumnDescriptor;
88  import org.apache.hadoop.hbase.HConstants;
89  import org.apache.hadoop.hbase.HRegionInfo;
90  import org.apache.hadoop.hbase.HRegionLocation;
91  import org.apache.hadoop.hbase.HTableDescriptor;
92  import org.apache.hadoop.hbase.KeyValue;
93  import org.apache.hadoop.hbase.MasterNotRunningException;
94  import org.apache.hadoop.hbase.MetaTableAccessor;
95  import org.apache.hadoop.hbase.RegionLocations;
96  import org.apache.hadoop.hbase.ServerName;
97  import org.apache.hadoop.hbase.TableName;
98  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
99  import org.apache.hadoop.hbase.classification.InterfaceAudience;
100 import org.apache.hadoop.hbase.classification.InterfaceStability;
101 import org.apache.hadoop.hbase.client.Admin;
102 import org.apache.hadoop.hbase.client.ClusterConnection;
103 import org.apache.hadoop.hbase.client.Connection;
104 import org.apache.hadoop.hbase.client.ConnectionFactory;
105 import org.apache.hadoop.hbase.client.Delete;
106 import org.apache.hadoop.hbase.client.Get;
107 import org.apache.hadoop.hbase.client.Put;
108 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
109 import org.apache.hadoop.hbase.client.Result;
110 import org.apache.hadoop.hbase.client.RowMutations;
111 import org.apache.hadoop.hbase.client.Table;
112 import org.apache.hadoop.hbase.client.TableState;
113 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
114 import org.apache.hadoop.hbase.io.hfile.HFile;
115 import org.apache.hadoop.hbase.master.MasterFileSystem;
116 import org.apache.hadoop.hbase.master.RegionState;
117 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
118 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
119 import org.apache.hadoop.hbase.regionserver.HRegion;
120 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
121 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
122 import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL;
123 import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
124 import org.apache.hadoop.hbase.security.AccessDeniedException;
125 import org.apache.hadoop.hbase.security.UserProvider;
126 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
127 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
128 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
129 import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
130 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
131 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
132 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
133 import org.apache.hadoop.hbase.wal.WAL;
134 import org.apache.hadoop.hbase.wal.WALFactory;
135 import org.apache.hadoop.hbase.wal.WALSplitter;
136 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
137 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
138 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
139 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
140 import org.apache.hadoop.ipc.RemoteException;
141 import org.apache.hadoop.security.UserGroupInformation;
142 import org.apache.hadoop.util.ReflectionUtils;
143 import org.apache.hadoop.util.Tool;
144 import org.apache.hadoop.util.ToolRunner;
145 import org.apache.zookeeper.KeeperException;
146
147 /**
148  * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
149  * table integrity problems in a corrupted HBase.
150  * <p>
151  * Region consistency checks verify that hbase:meta, region deployment on region
152  * servers and the state of data in HDFS (.regioninfo files) all are in
153  * accordance.
154  * <p>
155  * Table integrity checks verify that all possible row keys resolve to exactly
156  * one region of a table.  This means there are no individual degenerate
157  * or backwards regions; no holes between regions; and that there are no
158  * overlapping regions.
159  * <p>
160  * The general repair strategy works in two phases:
161  * <ol>
162  * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
163  * <li> Repair Region Consistency with hbase:meta and assignments
164  * </ol>
165  * <p>
166  * For table integrity repairs, the tables' region directories are scanned
167  * for .regioninfo files.  Each table's integrity is then verified.  If there
168  * are any orphan regions (regions with no .regioninfo files) or holes, new
169  * regions are fabricated.  Backwards regions are sidelined as well as empty
170  * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
171  * a new region is created and all data is merged into the new region.
172  * <p>
173  * Table integrity repairs deal solely with HDFS and could potentially be done
174  * offline -- the hbase region servers or master do not need to be running.
175  * This phase can eventually be used to completely reconstruct the hbase:meta table in
176  * an offline fashion.
177  * <p>
178  * Region consistency requires three conditions -- 1) valid .regioninfo file
179  * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
180  * and 3) a region is deployed only at the regionserver that was assigned to
181  * with proper state in the master.
182  * <p>
183  * Region consistency repairs require hbase to be online so that hbck can
184  * contact the HBase master and region servers.  The hbck#connect() method must
185  * first be called successfully.  Much of the region consistency information
186  * is transient and less risky to repair.
187  * <p>
188  * If hbck is run from the command line, there are a handful of arguments that
189  * can be used to limit the kinds of repairs hbck will do.  See the code in
190  * {@link #printUsageAndExit()} for more details.
191  */
192 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
193 @InterfaceStability.Evolving
194 public class HBaseFsck extends Configured implements Closeable {
195   public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
196   public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
197   private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
198   private static boolean rsSupportsOffline = true;
199   private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
200   private static final int DEFAULT_MAX_MERGE = 5;
201   private static final String TO_BE_LOADED = "to_be_loaded";
202   private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
203   private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
204   private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
205   private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
206   // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
207   // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
208   // AlreadyBeingCreatedException which is implies timeout on this operations up to
209   // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
210   private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
211   private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
212   private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
213   private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
214
215   /**********************
216    * Internal resources
217    **********************/
218   private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
219   private ClusterStatus status;
220   private ClusterConnection connection;
221   private Admin admin;
222   private Table meta;
223   // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
224   protected ExecutorService executor;
225   private long startMillis = EnvironmentEdgeManager.currentTime();
226   private HFileCorruptionChecker hfcc;
227   private int retcode = 0;
228   private Path HBCK_LOCK_PATH;
229   private FSDataOutputStream hbckOutFd;
230   // This lock is to prevent cleanup of balancer resources twice between
231   // ShutdownHook and the main code. We cleanup only if the connect() is
232   // successful
233   private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
234
235   /***********
236    * Options
237    ***********/
238   private static boolean details = false; // do we display the full report
239   private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
240   private static boolean forceExclusive = false; // only this hbck can modify HBase
241   private boolean fixAssignments = false; // fix assignment errors?
242   private boolean fixMeta = false; // fix meta errors?
243   private boolean checkHdfs = true; // load and check fs consistency?
244   private boolean fixHdfsHoles = false; // fix fs holes?
245   private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
246   private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
247   private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
248   private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
249   private boolean fixSplitParents = false; // fix lingering split parents
250   private boolean fixReferenceFiles = false; // fix lingering reference store file
251   private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
252   private boolean fixTableLocks = false; // fix table locks which are expired
253   private boolean fixReplication = false; // fix undeleted replication queues for removed peer
254   private boolean fixAny = false; // Set to true if any of the fix is required.
255
256   // limit checking/fixes to listed tables, if empty attempt to check/fix all
257   // hbase:meta are always checked
258   private Set<TableName> tablesIncluded = new HashSet<TableName>();
259   private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
260   // maximum number of overlapping regions to sideline
261   private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
262   private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
263   private Path sidelineDir = null;
264
265   private boolean rerun = false; // if we tried to fix something, rerun hbck
266   private static boolean summary = false; // if we want to print less output
267   private boolean checkMetaOnly = false;
268   private boolean checkRegionBoundaries = false;
269   private boolean ignorePreCheckPermission = false; // if pre-check permission
270
271   /*********
272    * State
273    *********/
274   final private ErrorReporter errors;
275   int fixes = 0;
276
277   /**
278    * This map contains the state of all hbck items.  It maps from encoded region
279    * name to HbckInfo structure.  The information contained in HbckInfo is used
280    * to detect and correct consistency (hdfs/meta/deployment) problems.
281    */
282   private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
283   // Empty regioninfo qualifiers in hbase:meta
284   private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
285
286   /**
287    * This map from Tablename -> TableInfo contains the structures necessary to
288    * detect table consistency problems (holes, dupes, overlaps).  It is sorted
289    * to prevent dupes.
290    *
291    * If tablesIncluded is empty, this map contains all tables.
292    * Otherwise, it contains only meta tables and tables in tablesIncluded,
293    * unless checkMetaOnly is specified, in which case, it contains only
294    * the meta table
295    */
296   private SortedMap<TableName, TableInfo> tablesInfo =
297       new ConcurrentSkipListMap<TableName, TableInfo>();
298
299   /**
300    * When initially looking at HDFS, we attempt to find any orphaned data.
301    */
302   private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
303
304   private Map<TableName, Set<String>> orphanTableDirs =
305       new HashMap<TableName, Set<String>>();
306   private Map<TableName, TableState> tableStates =
307       new HashMap<TableName, TableState>();
308   private final RetryCounterFactory lockFileRetryCounterFactory;
309   private final RetryCounterFactory createZNodeRetryCounterFactory;
310
311   private Map<TableName, Set<String>> skippedRegions = new HashMap<TableName, Set<String>>();
312
313   private ZooKeeperWatcher zkw = null;
314   private String hbckEphemeralNodePath = null;
315   private boolean hbckZodeCreated = false;
316
317   /**
318    * Constructor
319    *
320    * @param conf Configuration object
321    * @throws MasterNotRunningException if the master is not running
322    * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
323    */
324   public HBaseFsck(Configuration conf) throws MasterNotRunningException,
325       ZooKeeperConnectionException, IOException, ClassNotFoundException {
326     this(conf, createThreadPool(conf));
327   }
328
329   private static ExecutorService createThreadPool(Configuration conf) {
330     int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
331     return new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
332   }
333
334   /**
335    * Constructor
336    *
337    * @param conf
338    *          Configuration object
339    * @throws MasterNotRunningException
340    *           if the master is not running
341    * @throws ZooKeeperConnectionException
342    *           if unable to connect to ZooKeeper
343    */
344   public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
345       ZooKeeperConnectionException, IOException, ClassNotFoundException {
346     super(conf);
347     errors = getErrorReporter(getConf());
348     this.executor = exec;
349     lockFileRetryCounterFactory = new RetryCounterFactory(
350       getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
351       getConf().getInt(
352         "hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
353       getConf().getInt(
354         "hbase.hbck.lockfile.attempt.maxsleeptime", DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
355     createZNodeRetryCounterFactory = new RetryCounterFactory(
356       getConf().getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
357       getConf().getInt(
358         "hbase.hbck.createznode.attempt.sleep.interval",
359         DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
360       getConf().getInt(
361         "hbase.hbck.createznode.attempt.maxsleeptime",
362         DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
363     zkw = createZooKeeperWatcher();
364   }
365
366   private class FileLockCallable implements Callable<FSDataOutputStream> {
367     RetryCounter retryCounter;
368
369     public FileLockCallable(RetryCounter retryCounter) {
370       this.retryCounter = retryCounter;
371     }
372     @Override
373     public FSDataOutputStream call() throws IOException {
374       try {
375         FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
376         FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
377             HConstants.DATA_FILE_UMASK_KEY);
378         Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
379         fs.mkdirs(tmpDir);
380         HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
381         final FSDataOutputStream out = createFileWithRetries(fs, HBCK_LOCK_PATH, defaultPerms);
382         out.writeBytes(InetAddress.getLocalHost().toString());
383         out.flush();
384         return out;
385       } catch(RemoteException e) {
386         if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
387           return null;
388         } else {
389           throw e;
390         }
391       }
392     }
393
394     private FSDataOutputStream createFileWithRetries(final FileSystem fs,
395         final Path hbckLockFilePath, final FsPermission defaultPerms)
396         throws IOException {
397
398       IOException exception = null;
399       do {
400         try {
401           return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
402         } catch (IOException ioe) {
403           LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
404               + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
405               + retryCounter.getMaxAttempts());
406           LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(),
407               ioe);
408           try {
409             exception = ioe;
410             retryCounter.sleepUntilNextRetry();
411           } catch (InterruptedException ie) {
412             throw (InterruptedIOException) new InterruptedIOException(
413                 "Can't create lock file " + hbckLockFilePath.getName())
414             .initCause(ie);
415           }
416         }
417       } while (retryCounter.shouldRetry());
418
419       throw exception;
420     }
421   }
422
423   /**
424    * This method maintains a lock using a file. If the creation fails we return null
425    *
426    * @return FSDataOutputStream object corresponding to the newly opened lock file
427    * @throws IOException if IO failure occurs
428    */
429   private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
430     RetryCounter retryCounter = lockFileRetryCounterFactory.create();
431     FileLockCallable callable = new FileLockCallable(retryCounter);
432     ExecutorService executor = Executors.newFixedThreadPool(1);
433     FutureTask<FSDataOutputStream> futureTask = new FutureTask<FSDataOutputStream>(callable);
434     executor.execute(futureTask);
435     final int timeoutInSeconds = getConf().getInt(
436       "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
437     FSDataOutputStream stream = null;
438     try {
439       stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
440     } catch (ExecutionException ee) {
441       LOG.warn("Encountered exception when opening lock file", ee);
442     } catch (InterruptedException ie) {
443       LOG.warn("Interrupted when opening lock file", ie);
444       Thread.currentThread().interrupt();
445     } catch (TimeoutException exception) {
446       // took too long to obtain lock
447       LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
448       futureTask.cancel(true);
449     } finally {
450       executor.shutdownNow();
451     }
452     return stream;
453   }
454
455   private void unlockHbck() {
456     if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) {
457       RetryCounter retryCounter = lockFileRetryCounterFactory.create();
458       do {
459         try {
460           IOUtils.closeQuietly(hbckOutFd);
461           FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()),
462               HBCK_LOCK_PATH, true);
463           LOG.info("Finishing hbck");
464           return;
465         } catch (IOException ioe) {
466           LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
467               + (retryCounter.getAttemptTimes() + 1) + " of "
468               + retryCounter.getMaxAttempts());
469           LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
470           try {
471             retryCounter.sleepUntilNextRetry();
472           } catch (InterruptedException ie) {
473             Thread.currentThread().interrupt();
474             LOG.warn("Interrupted while deleting lock file" +
475                 HBCK_LOCK_PATH);
476             return;
477           }
478         }
479       } while (retryCounter.shouldRetry());
480     }
481   }
482
483   /**
484    * To repair region consistency, one must call connect() in order to repair
485    * online state.
486    */
487   public void connect() throws IOException {
488
489     if (isExclusive()) {
490       // Grab the lock
491       hbckOutFd = checkAndMarkRunningHbck();
492       if (hbckOutFd == null) {
493         setRetCode(-1);
494         LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " +
495             "[If you are sure no other instance is running, delete the lock file " +
496             HBCK_LOCK_PATH + " and rerun the tool]");
497         throw new IOException("Duplicate hbck - Abort");
498       }
499
500       // Make sure to cleanup the lock
501       hbckLockCleanup.set(true);
502     }
503
504
505     // Add a shutdown hook to this thread, in case user tries to
506     // kill the hbck with a ctrl-c, we want to cleanup the lock so that
507     // it is available for further calls
508     Runtime.getRuntime().addShutdownHook(new Thread() {
509       @Override
510       public void run() {
511         IOUtils.closeQuietly(HBaseFsck.this);
512         cleanupHbckZnode();
513         unlockHbck();
514       }
515     });
516
517     LOG.info("Launching hbck");
518
519     connection = (ClusterConnection)ConnectionFactory.createConnection(getConf());
520     admin = connection.getAdmin();
521     meta = connection.getTable(TableName.META_TABLE_NAME);
522     status = admin.getClusterStatus();
523   }
524
525   /**
526    * Get deployed regions according to the region servers.
527    */
528   private void loadDeployedRegions() throws IOException, InterruptedException {
529     // From the master, get a list of all known live region servers
530     Collection<ServerName> regionServers = status.getServers();
531     errors.print("Number of live region servers: " + regionServers.size());
532     if (details) {
533       for (ServerName rsinfo: regionServers) {
534         errors.print("  " + rsinfo.getServerName());
535       }
536     }
537
538     // From the master, get a list of all dead region servers
539     Collection<ServerName> deadRegionServers = status.getDeadServerNames();
540     errors.print("Number of dead region servers: " + deadRegionServers.size());
541     if (details) {
542       for (ServerName name: deadRegionServers) {
543         errors.print("  " + name);
544       }
545     }
546
547     // Print the current master name and state
548     errors.print("Master: " + status.getMaster());
549
550     // Print the list of all backup masters
551     Collection<ServerName> backupMasters = status.getBackupMasters();
552     errors.print("Number of backup masters: " + backupMasters.size());
553     if (details) {
554       for (ServerName name: backupMasters) {
555         errors.print("  " + name);
556       }
557     }
558
559     errors.print("Average load: " + status.getAverageLoad());
560     errors.print("Number of requests: " + status.getRequestsCount());
561     errors.print("Number of regions: " + status.getRegionsCount());
562
563     Set<RegionState> rits = status.getRegionsInTransition();
564     errors.print("Number of regions in transition: " + rits.size());
565     if (details) {
566       for (RegionState state: rits) {
567         errors.print("  " + state.toDescriptiveString());
568       }
569     }
570
571     // Determine what's deployed
572     processRegionServers(regionServers);
573   }
574
575   /**
576    * Clear the current state of hbck.
577    */
578   private void clearState() {
579     // Make sure regionInfo is empty before starting
580     fixes = 0;
581     regionInfoMap.clear();
582     emptyRegionInfoQualifiers.clear();
583     tableStates.clear();
584     errors.clear();
585     tablesInfo.clear();
586     orphanHdfsDirs.clear();
587     skippedRegions.clear();
588   }
589
590   /**
591    * This repair method analyzes hbase data in hdfs and repairs it to satisfy
592    * the table integrity rules.  HBase doesn't need to be online for this
593    * operation to work.
594    */
595   public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
596     // Initial pass to fix orphans.
597     if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
598         || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
599       LOG.info("Loading regioninfos HDFS");
600       // if nothing is happening this should always complete in two iterations.
601       int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
602       int curIter = 0;
603       do {
604         clearState(); // clears hbck state and reset fixes to 0 and.
605         // repair what's on HDFS
606         restoreHdfsIntegrity();
607         curIter++;// limit the number of iterations.
608       } while (fixes > 0 && curIter <= maxIterations);
609
610       // Repairs should be done in the first iteration and verification in the second.
611       // If there are more than 2 passes, something funny has happened.
612       if (curIter > 2) {
613         if (curIter == maxIterations) {
614           LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
615               + "Tables integrity may not be fully repaired!");
616         } else {
617           LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
618         }
619       }
620     }
621   }
622
623   /**
624    * This repair method requires the cluster to be online since it contacts
625    * region servers and the masters.  It makes each region's state in HDFS, in
626    * hbase:meta, and deployments consistent.
627    *
628    * @return If &gt; 0 , number of errors detected, if &lt; 0 there was an unrecoverable
629    *     error.  If 0, we have a clean hbase.
630    */
631   public int onlineConsistencyRepair() throws IOException, KeeperException,
632     InterruptedException {
633     clearState();
634
635     // get regions according to what is online on each RegionServer
636     loadDeployedRegions();
637     // check whether hbase:meta is deployed and online
638     recordMetaRegion();
639     // Check if hbase:meta is found only once and in the right place
640     if (!checkMetaRegion()) {
641       String errorMsg = "hbase:meta table is not consistent. ";
642       if (shouldFixAssignments()) {
643         errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
644       } else {
645         errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
646       }
647       errors.reportError(errorMsg + " Exiting...");
648       return -2;
649     }
650     // Not going with further consistency check for tables when hbase:meta itself is not consistent.
651     LOG.info("Loading regionsinfo from the hbase:meta table");
652     boolean success = loadMetaEntries();
653     if (!success) return -1;
654
655     // Empty cells in hbase:meta?
656     reportEmptyMetaCells();
657
658     // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
659     if (shouldFixEmptyMetaCells()) {
660       fixEmptyMetaCells();
661     }
662
663     // get a list of all tables that have not changed recently.
664     if (!checkMetaOnly) {
665       reportTablesInFlux();
666     }
667
668     // Get disabled tables states
669     loadTableStates();
670
671     // load regiondirs and regioninfos from HDFS
672     if (shouldCheckHdfs()) {
673       LOG.info("Loading region directories from HDFS");
674       loadHdfsRegionDirs();
675       LOG.info("Loading region information from HDFS");
676       loadHdfsRegionInfos();
677     }
678
679     // fix the orphan tables
680     fixOrphanTables();
681
682     LOG.info("Checking and fixing region consistency");
683     // Check and fix consistency
684     checkAndFixConsistency();
685
686     // Check integrity (does not fix)
687     checkIntegrity();
688     return errors.getErrorList().size();
689   }
690
691   /**
692    * This method maintains an ephemeral znode. If the creation fails we return false or throw
693    * exception
694    *
695    * @return true if creating znode succeeds; false otherwise
696    * @throws IOException if IO failure occurs
697    */
698   private boolean setMasterInMaintenanceMode() throws IOException {
699     RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
700     hbckEphemeralNodePath = ZKUtil.joinZNode(
701       ZooKeeperWatcher.masterMaintZNode,
702       "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
703     do {
704       try {
705         hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
706         if (hbckZodeCreated) {
707           break;
708         }
709       } catch (KeeperException e) {
710         if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
711            throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
712         }
713         // fall through and retry
714       }
715
716       LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try=" +
717           (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
718
719       try {
720         retryCounter.sleepUntilNextRetry();
721       } catch (InterruptedException ie) {
722         throw (InterruptedIOException) new InterruptedIOException(
723               "Can't create znode " + hbckEphemeralNodePath).initCause(ie);
724       }
725     } while (retryCounter.shouldRetry());
726     return hbckZodeCreated;
727   }
728
729   private void cleanupHbckZnode() {
730     try {
731       if (zkw != null && hbckZodeCreated) {
732         ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
733         hbckZodeCreated = false;
734       }
735     } catch (KeeperException e) {
736       // Ignore
737       if (!e.code().equals(KeeperException.Code.NONODE)) {
738         LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
739       }
740     }
741   }
742
743   /**
744    * Contacts the master and prints out cluster-wide information
745    * @return 0 on success, non-zero on failure
746    */
747   public int onlineHbck()
748       throws IOException, KeeperException, InterruptedException, ServiceException {
749     // print hbase server version
750     errors.print("Version: " + status.getHBaseVersion());
751     offlineHdfsIntegrityRepair();
752
753     // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
754     // hbck, it is likely that hbck would be misled and report transient errors.  Therefore, it
755     // is better to set Master into maintenance mode during online hbck.
756     //
757     if (!setMasterInMaintenanceMode()) {
758       LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
759         + "error.  Please run HBCK multiple times to reduce the chance of transient error.");
760     }
761
762     onlineConsistencyRepair();
763
764     if (checkRegionBoundaries) {
765       checkRegionBoundaries();
766     }
767
768     offlineReferenceFileRepair();
769
770     checkAndFixTableLocks();
771
772     checkAndFixReplication();
773
774     // Remove the hbck znode
775     cleanupHbckZnode();
776
777     // Remove the hbck lock
778     unlockHbck();
779
780     // Print table summary
781     printTableSummary(tablesInfo);
782     return errors.summarize();
783   }
784
785   public static byte[] keyOnly (byte[] b) {
786     if (b == null)
787       return b;
788     int rowlength = Bytes.toShort(b, 0);
789     byte[] result = new byte[rowlength];
790     System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
791     return result;
792   }
793
794   @Override
795   public void close() throws IOException {
796     try {
797       cleanupHbckZnode();
798       unlockHbck();
799     } catch (Exception io) {
800       LOG.warn(io);
801     } finally {
802       if (zkw != null) {
803         zkw.close();
804         zkw = null;
805       }
806       IOUtils.closeQuietly(admin);
807       IOUtils.closeQuietly(meta);
808       IOUtils.closeQuietly(connection);
809     }
810   }
811
812   private static class RegionBoundariesInformation {
813     public byte [] regionName;
814     public byte [] metaFirstKey;
815     public byte [] metaLastKey;
816     public byte [] storesFirstKey;
817     public byte [] storesLastKey;
818     @Override
819     public String toString () {
820       return "regionName=" + Bytes.toStringBinary(regionName) +
821              "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
822              "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
823              "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
824              "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
825     }
826   }
827
828   public void checkRegionBoundaries() {
829     try {
830       ByteArrayComparator comparator = new ByteArrayComparator();
831       List<HRegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
832       final RegionBoundariesInformation currentRegionBoundariesInformation =
833           new RegionBoundariesInformation();
834       Path hbaseRoot = FSUtils.getRootDir(getConf());
835       for (HRegionInfo regionInfo : regions) {
836         Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
837         currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
838         // For each region, get the start and stop key from the META and compare them to the
839         // same information from the Stores.
840         Path path = new Path(tableDir, regionInfo.getEncodedName());
841         FileSystem fs = path.getFileSystem(getConf());
842         FileStatus[] files = fs.listStatus(path);
843         // For all the column families in this region...
844         byte[] storeFirstKey = null;
845         byte[] storeLastKey = null;
846         for (FileStatus file : files) {
847           String fileName = file.getPath().toString();
848           fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
849           if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
850             FileStatus[] storeFiles = fs.listStatus(file.getPath());
851             // For all the stores in this column family.
852             for (FileStatus storeFile : storeFiles) {
853               HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
854                   getConf()), getConf());
855               if ((reader.getFirstKey() != null)
856                   && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
857                       ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey()).getKey()) > 0))) {
858                 storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey()).getKey();
859               }
860               if ((reader.getLastKey() != null)
861                   && ((storeLastKey == null) || (comparator.compare(storeLastKey,
862                       ((KeyValue.KeyOnlyKeyValue)reader.getLastKey()).getKey())) < 0)) {
863                 storeLastKey = ((KeyValue.KeyOnlyKeyValue)reader.getLastKey()).getKey();
864               }
865               reader.close();
866             }
867           }
868         }
869         currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
870         currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
871         currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
872         currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
873         if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
874           currentRegionBoundariesInformation.metaFirstKey = null;
875         if (currentRegionBoundariesInformation.metaLastKey.length == 0)
876           currentRegionBoundariesInformation.metaLastKey = null;
877
878         // For a region to be correct, we need the META start key to be smaller or equal to the
879         // smallest start key from all the stores, and the start key from the next META entry to
880         // be bigger than the last key from all the current stores. First region start key is null;
881         // Last region end key is null; some regions can be empty and not have any store.
882
883         boolean valid = true;
884         // Checking start key.
885         if ((currentRegionBoundariesInformation.storesFirstKey != null)
886             && (currentRegionBoundariesInformation.metaFirstKey != null)) {
887           valid = valid
888               && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
889                 currentRegionBoundariesInformation.metaFirstKey) >= 0;
890         }
891         // Checking stop key.
892         if ((currentRegionBoundariesInformation.storesLastKey != null)
893             && (currentRegionBoundariesInformation.metaLastKey != null)) {
894           valid = valid
895               && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
896                 currentRegionBoundariesInformation.metaLastKey) < 0;
897         }
898         if (!valid) {
899           errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
900             tablesInfo.get(regionInfo.getTable()));
901           LOG.warn("Region's boundaries not alligned between stores and META for:");
902           LOG.warn(currentRegionBoundariesInformation);
903         }
904       }
905     } catch (IOException e) {
906       LOG.error(e);
907     }
908   }
909
910   /**
911    * Iterates through the list of all orphan/invalid regiondirs.
912    */
913   private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
914     for (HbckInfo hi : orphanHdfsDirs) {
915       LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
916       adoptHdfsOrphan(hi);
917     }
918   }
919
920   /**
921    * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
922    * these orphans by creating a new region, and moving the column families,
923    * recovered edits, WALs, into the new region dir.  We determine the region
924    * startkey and endkeys by looking at all of the hfiles inside the column
925    * families to identify the min and max keys. The resulting region will
926    * likely violate table integrity but will be dealt with by merging
927    * overlapping regions.
928    */
929   @SuppressWarnings("deprecation")
930   private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
931     Path p = hi.getHdfsRegionDir();
932     FileSystem fs = p.getFileSystem(getConf());
933     FileStatus[] dirs = fs.listStatus(p);
934     if (dirs == null) {
935       LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
936           p + ". This dir could probably be deleted.");
937       return ;
938     }
939
940     TableName tableName = hi.getTableName();
941     TableInfo tableInfo = tablesInfo.get(tableName);
942     Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
943     HTableDescriptor template = tableInfo.getHTD();
944
945     // find min and max key values
946     Pair<byte[],byte[]> orphanRegionRange = null;
947     for (FileStatus cf : dirs) {
948       String cfName= cf.getPath().getName();
949       // TODO Figure out what the special dirs are
950       if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
951
952       FileStatus[] hfiles = fs.listStatus(cf.getPath());
953       for (FileStatus hfile : hfiles) {
954         byte[] start, end;
955         HFile.Reader hf = null;
956         try {
957           CacheConfig cacheConf = new CacheConfig(getConf());
958           hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
959           hf.loadFileInfo();
960           Cell startKv = hf.getFirstKey();
961           start = CellUtil.cloneRow(startKv);
962           Cell endKv = hf.getLastKey();
963           end = CellUtil.cloneRow(endKv);
964         } catch (IOException ioe) {
965           LOG.warn("Problem reading orphan file " + hfile + ", skipping");
966           continue;
967         } catch (NullPointerException ioe) {
968           LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
969           continue;
970         } finally {
971           if (hf != null) {
972             hf.close();
973           }
974         }
975
976         // expand the range to include the range of all hfiles
977         if (orphanRegionRange == null) {
978           // first range
979           orphanRegionRange = new Pair<byte[], byte[]>(start, end);
980         } else {
981           // TODO add test
982
983           // expand range only if the hfile is wider.
984           if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
985             orphanRegionRange.setFirst(start);
986           }
987           if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
988             orphanRegionRange.setSecond(end);
989           }
990         }
991       }
992     }
993     if (orphanRegionRange == null) {
994       LOG.warn("No data in dir " + p + ", sidelining data");
995       fixes++;
996       sidelineRegionDir(fs, hi);
997       return;
998     }
999     LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
1000         Bytes.toString(orphanRegionRange.getSecond()) + ")");
1001
1002     // create new region on hdfs. move data into place.
1003     HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(),
1004         Bytes.add(orphanRegionRange.getSecond(), new byte[1]));
1005     LOG.info("Creating new region : " + hri);
1006     HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
1007     Path target = region.getRegionFileSystem().getRegionDir();
1008
1009     // rename all the data to new region
1010     mergeRegionDirs(target, hi);
1011     fixes++;
1012   }
1013
1014   /**
1015    * This method determines if there are table integrity errors in HDFS.  If
1016    * there are errors and the appropriate "fix" options are enabled, the method
1017    * will first correct orphan regions making them into legit regiondirs, and
1018    * then reload to merge potentially overlapping regions.
1019    *
1020    * @return number of table integrity errors found
1021    */
1022   private int restoreHdfsIntegrity() throws IOException, InterruptedException {
1023     // Determine what's on HDFS
1024     LOG.info("Loading HBase regioninfo from HDFS...");
1025     loadHdfsRegionDirs(); // populating regioninfo table.
1026
1027     int errs = errors.getErrorList().size();
1028     // First time just get suggestions.
1029     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1030     checkHdfsIntegrity(false, false);
1031
1032     if (errors.getErrorList().size() == errs) {
1033       LOG.info("No integrity errors.  We are done with this phase. Glorious.");
1034       return 0;
1035     }
1036
1037     if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
1038       adoptHdfsOrphans(orphanHdfsDirs);
1039       // TODO optimize by incrementally adding instead of reloading.
1040     }
1041
1042     // Make sure there are no holes now.
1043     if (shouldFixHdfsHoles()) {
1044       clearState(); // this also resets # fixes.
1045       loadHdfsRegionDirs();
1046       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1047       tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1048     }
1049
1050     // Now we fix overlaps
1051     if (shouldFixHdfsOverlaps()) {
1052       // second pass we fix overlaps.
1053       clearState(); // this also resets # fixes.
1054       loadHdfsRegionDirs();
1055       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1056       tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1057     }
1058
1059     return errors.getErrorList().size();
1060   }
1061
1062   /**
1063    * Scan all the store file names to find any lingering reference files,
1064    * which refer to some none-exiting files. If "fix" option is enabled,
1065    * any lingering reference file will be sidelined if found.
1066    * <p>
1067    * Lingering reference file prevents a region from opening. It has to
1068    * be fixed before a cluster can start properly.
1069    */
1070   private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1071     Configuration conf = getConf();
1072     Path hbaseRoot = FSUtils.getRootDir(conf);
1073     FileSystem fs = hbaseRoot.getFileSystem(conf);
1074     LOG.info("Computing mapping of all store files");
1075     Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1076       new FSUtils.ReferenceFileFilter(fs), executor, errors);
1077     errors.print("");
1078     LOG.info("Validating mapping using HDFS state");
1079     for (Path path: allFiles.values()) {
1080       Path referredToFile = StoreFileInfo.getReferredToFile(path);
1081       if (fs.exists(referredToFile)) continue;  // good, expected
1082
1083       // Found a lingering reference file
1084       errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1085         "Found lingering reference file " + path);
1086       if (!shouldFixReferenceFiles()) continue;
1087
1088       // Now, trying to fix it since requested
1089       boolean success = false;
1090       String pathStr = path.toString();
1091
1092       // A reference file path should be like
1093       // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1094       // Up 5 directories to get the root folder.
1095       // So the file will be sidelined to a similar folder structure.
1096       int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1097       for (int i = 0; index > 0 && i < 5; i++) {
1098         index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1099       }
1100       if (index > 0) {
1101         Path rootDir = getSidelineDir();
1102         Path dst = new Path(rootDir, pathStr.substring(index + 1));
1103         fs.mkdirs(dst.getParent());
1104         LOG.info("Trying to sildeline reference file "
1105           + path + " to " + dst);
1106         setShouldRerun();
1107
1108         success = fs.rename(path, dst);
1109       }
1110       if (!success) {
1111         LOG.error("Failed to sideline reference file " + path);
1112       }
1113     }
1114   }
1115
1116   /**
1117    * TODO -- need to add tests for this.
1118    */
1119   private void reportEmptyMetaCells() {
1120     errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1121       emptyRegionInfoQualifiers.size());
1122     if (details) {
1123       for (Result r: emptyRegionInfoQualifiers) {
1124         errors.print("  " + r);
1125       }
1126     }
1127   }
1128
1129   /**
1130    * TODO -- need to add tests for this.
1131    */
1132   private void reportTablesInFlux() {
1133     AtomicInteger numSkipped = new AtomicInteger(0);
1134     HTableDescriptor[] allTables = getTables(numSkipped);
1135     errors.print("Number of Tables: " + allTables.length);
1136     if (details) {
1137       if (numSkipped.get() > 0) {
1138         errors.detail("Number of Tables in flux: " + numSkipped.get());
1139       }
1140       for (HTableDescriptor td : allTables) {
1141         errors.detail("  Table: " + td.getTableName() + "\t" +
1142                            (td.isReadOnly() ? "ro" : "rw") + "\t" +
1143                             (td.isMetaRegion() ? "META" : "    ") + "\t" +
1144                            " families: " + td.getFamilies().size());
1145       }
1146     }
1147   }
1148
1149   public ErrorReporter getErrors() {
1150     return errors;
1151   }
1152
1153   /**
1154    * Read the .regioninfo file from the file system.  If there is no
1155    * .regioninfo, add it to the orphan hdfs region list.
1156    */
1157   private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
1158     Path regionDir = hbi.getHdfsRegionDir();
1159     if (regionDir == null) {
1160       LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
1161       return;
1162     }
1163
1164     if (hbi.hdfsEntry.hri != null) {
1165       // already loaded data
1166       return;
1167     }
1168
1169     FileSystem fs = FileSystem.get(getConf());
1170     HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
1171     LOG.debug("HRegionInfo read: " + hri.toString());
1172     hbi.hdfsEntry.hri = hri;
1173   }
1174
1175   /**
1176    * Exception thrown when a integrity repair operation fails in an
1177    * unresolvable way.
1178    */
1179   public static class RegionRepairException extends IOException {
1180     private static final long serialVersionUID = 1L;
1181     final IOException ioe;
1182     public RegionRepairException(String s, IOException ioe) {
1183       super(s);
1184       this.ioe = ioe;
1185     }
1186   }
1187
1188   /**
1189    * Populate hbi's from regionInfos loaded from file system.
1190    */
1191   private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1192       throws IOException, InterruptedException {
1193     tablesInfo.clear(); // regenerating the data
1194     // generate region split structure
1195     Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1196
1197     // Parallelized read of .regioninfo files.
1198     List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
1199     List<Future<Void>> hbiFutures;
1200
1201     for (HbckInfo hbi : hbckInfos) {
1202       WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1203       hbis.add(work);
1204     }
1205
1206     // Submit and wait for completion
1207     hbiFutures = executor.invokeAll(hbis);
1208
1209     for(int i=0; i<hbiFutures.size(); i++) {
1210       WorkItemHdfsRegionInfo work = hbis.get(i);
1211       Future<Void> f = hbiFutures.get(i);
1212       try {
1213         f.get();
1214       } catch(ExecutionException e) {
1215         LOG.warn("Failed to read .regioninfo file for region " +
1216               work.hbi.getRegionNameAsString(), e.getCause());
1217       }
1218     }
1219
1220     Path hbaseRoot = FSUtils.getRootDir(getConf());
1221     FileSystem fs = hbaseRoot.getFileSystem(getConf());
1222     // serialized table info gathering.
1223     for (HbckInfo hbi: hbckInfos) {
1224
1225       if (hbi.getHdfsHRI() == null) {
1226         // was an orphan
1227         continue;
1228       }
1229
1230
1231       // get table name from hdfs, populate various HBaseFsck tables.
1232       TableName tableName = hbi.getTableName();
1233       if (tableName == null) {
1234         // There was an entry in hbase:meta not in the HDFS?
1235         LOG.warn("tableName was null for: " + hbi);
1236         continue;
1237       }
1238
1239       TableInfo modTInfo = tablesInfo.get(tableName);
1240       if (modTInfo == null) {
1241         // only executed once per table.
1242         modTInfo = new TableInfo(tableName);
1243         tablesInfo.put(tableName, modTInfo);
1244         try {
1245           HTableDescriptor htd =
1246               FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1247           modTInfo.htds.add(htd);
1248         } catch (IOException ioe) {
1249           if (!orphanTableDirs.containsKey(tableName)) {
1250             LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1251             //should only report once for each table
1252             errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1253                 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1254             Set<String> columns = new HashSet<String>();
1255             orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1256           }
1257         }
1258       }
1259       if (!hbi.isSkipChecks()) {
1260         modTInfo.addRegionInfo(hbi);
1261       }
1262     }
1263
1264     loadTableInfosForTablesWithNoRegion();
1265     errors.print("");
1266
1267     return tablesInfo;
1268   }
1269
1270   /**
1271    * To get the column family list according to the column family dirs
1272    * @param columns
1273    * @param hbi
1274    * @return a set of column families
1275    * @throws IOException
1276    */
1277   private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1278     Path regionDir = hbi.getHdfsRegionDir();
1279     FileSystem fs = regionDir.getFileSystem(getConf());
1280     FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1281     for (FileStatus subdir : subDirs) {
1282       String columnfamily = subdir.getPath().getName();
1283       columns.add(columnfamily);
1284     }
1285     return columns;
1286   }
1287
1288   /**
1289    * To fabricate a .tableinfo file with following contents<br>
1290    * 1. the correct tablename <br>
1291    * 2. the correct colfamily list<br>
1292    * 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1293    * @throws IOException
1294    */
1295   private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1296       Set<String> columns) throws IOException {
1297     if (columns ==null || columns.isEmpty()) return false;
1298     HTableDescriptor htd = new HTableDescriptor(tableName);
1299     for (String columnfamimly : columns) {
1300       htd.addFamily(new HColumnDescriptor(columnfamimly));
1301     }
1302     fstd.createTableDescriptor(htd, true);
1303     return true;
1304   }
1305
1306   /**
1307    * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1308    * @throws IOException
1309    */
1310   public void fixEmptyMetaCells() throws IOException {
1311     if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1312       LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1313       for (Result region : emptyRegionInfoQualifiers) {
1314         deleteMetaRegion(region.getRow());
1315         errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1316       }
1317       emptyRegionInfoQualifiers.clear();
1318     }
1319   }
1320
1321   /**
1322    * To fix orphan table by creating a .tableinfo file under tableDir <br>
1323    * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1324    * 2. else create a default .tableinfo file with following items<br>
1325    * &nbsp;2.1 the correct tablename <br>
1326    * &nbsp;2.2 the correct colfamily list<br>
1327    * &nbsp;2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1328    * @throws IOException
1329    */
1330   public void fixOrphanTables() throws IOException {
1331     if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1332
1333       List<TableName> tmpList = new ArrayList<TableName>();
1334       tmpList.addAll(orphanTableDirs.keySet());
1335       HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1336       Iterator<Entry<TableName, Set<String>>> iter =
1337           orphanTableDirs.entrySet().iterator();
1338       int j = 0;
1339       int numFailedCase = 0;
1340       FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1341       while (iter.hasNext()) {
1342         Entry<TableName, Set<String>> entry =
1343             iter.next();
1344         TableName tableName = entry.getKey();
1345         LOG.info("Trying to fix orphan table error: " + tableName);
1346         if (j < htds.length) {
1347           if (tableName.equals(htds[j].getTableName())) {
1348             HTableDescriptor htd = htds[j];
1349             LOG.info("fixing orphan table: " + tableName + " from cache");
1350             fstd.createTableDescriptor(htd, true);
1351             j++;
1352             iter.remove();
1353           }
1354         } else {
1355           if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1356             LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1357             LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1358             iter.remove();
1359           } else {
1360             LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1361             numFailedCase++;
1362           }
1363         }
1364         fixes++;
1365       }
1366
1367       if (orphanTableDirs.isEmpty()) {
1368         // all orphanTableDirs are luckily recovered
1369         // re-run doFsck after recovering the .tableinfo file
1370         setShouldRerun();
1371         LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1372       } else if (numFailedCase > 0) {
1373         LOG.error("Failed to fix " + numFailedCase
1374             + " OrphanTables with default .tableinfo files");
1375       }
1376
1377     }
1378     //cleanup the list
1379     orphanTableDirs.clear();
1380
1381   }
1382
1383   /**
1384    * This borrows code from MasterFileSystem.bootstrap(). Explicitly creates it's own WAL, so be
1385    * sure to close it as well as the region when you're finished.
1386    *
1387    * @return an open hbase:meta HRegion
1388    */
1389   private HRegion createNewMeta() throws IOException {
1390     Path rootdir = FSUtils.getRootDir(getConf());
1391     Configuration c = getConf();
1392     HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1393     HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1394     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1395     // The WAL subsystem will use the default rootDir rather than the passed in rootDir
1396     // unless I pass along via the conf.
1397     Configuration confForWAL = new Configuration(c);
1398     confForWAL.set(HConstants.HBASE_DIR, rootdir.toString());
1399     WAL wal = (new WALFactory(confForWAL,
1400         Collections.<WALActionsListener>singletonList(new MetricsWAL()),
1401         "hbck-meta-recovery-" + RandomStringUtils.randomNumeric(8))).
1402         getWAL(metaHRI.getEncodedNameAsBytes(), metaHRI.getTable().getNamespace());
1403     HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor, wal);
1404     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1405     return meta;
1406   }
1407
1408   /**
1409    * Generate set of puts to add to new meta.  This expects the tables to be
1410    * clean with no overlaps or holes.  If there are any problems it returns null.
1411    *
1412    * @return An array list of puts to do in bulk, null if tables have problems
1413    */
1414   private ArrayList<Put> generatePuts(
1415       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1416     ArrayList<Put> puts = new ArrayList<Put>();
1417     boolean hasProblems = false;
1418     for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1419       TableName name = e.getKey();
1420
1421       // skip "hbase:meta"
1422       if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1423         continue;
1424       }
1425
1426       TableInfo ti = e.getValue();
1427       puts.add(MetaTableAccessor
1428           .makePutFromTableState(new TableState(ti.tableName, TableState.State.ENABLED)));
1429       for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1430           .entrySet()) {
1431         Collection<HbckInfo> his = spl.getValue();
1432         int sz = his.size();
1433         if (sz != 1) {
1434           // problem
1435           LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1436               + " had " +  sz + " regions instead of exactly 1." );
1437           hasProblems = true;
1438           continue;
1439         }
1440
1441         // add the row directly to meta.
1442         HbckInfo hi = his.iterator().next();
1443         HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
1444         Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1445         puts.add(p);
1446       }
1447     }
1448     return hasProblems ? null : puts;
1449   }
1450
1451   /**
1452    * Suggest fixes for each table
1453    */
1454   private void suggestFixes(
1455       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1456     logParallelMerge();
1457     for (TableInfo tInfo : tablesInfo.values()) {
1458       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1459       tInfo.checkRegionChain(handler);
1460     }
1461   }
1462
1463   /**
1464    * Rebuilds meta from information in hdfs/fs.  Depends on configuration settings passed into
1465    * hbck constructor to point to a particular fs/dir. Assumes HBase is OFFLINE.
1466    *
1467    * @param fix flag that determines if method should attempt to fix holes
1468    * @return true if successful, false if attempt failed.
1469    */
1470   public boolean rebuildMeta(boolean fix) throws IOException,
1471       InterruptedException {
1472
1473     // TODO check to make sure hbase is offline. (or at least the table
1474     // currently being worked on is off line)
1475
1476     // Determine what's on HDFS
1477     LOG.info("Loading HBase regioninfo from HDFS...");
1478     loadHdfsRegionDirs(); // populating regioninfo table.
1479
1480     int errs = errors.getErrorList().size();
1481     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1482     checkHdfsIntegrity(false, false);
1483
1484     // make sure ok.
1485     if (errors.getErrorList().size() != errs) {
1486       // While in error state, iterate until no more fixes possible
1487       while(true) {
1488         fixes = 0;
1489         suggestFixes(tablesInfo);
1490         errors.clear();
1491         loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1492         checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1493
1494         int errCount = errors.getErrorList().size();
1495
1496         if (fixes == 0) {
1497           if (errCount > 0) {
1498             return false; // failed to fix problems.
1499           } else {
1500             break; // no fixes and no problems? drop out and fix stuff!
1501           }
1502         }
1503       }
1504     }
1505
1506     // we can rebuild, move old meta out of the way and start
1507     LOG.info("HDFS regioninfo's seems good.  Sidelining old hbase:meta");
1508     Path backupDir = sidelineOldMeta();
1509
1510     LOG.info("Creating new hbase:meta");
1511     HRegion meta = createNewMeta();
1512
1513     // populate meta
1514     List<Put> puts = generatePuts(tablesInfo);
1515     if (puts == null) {
1516       LOG.fatal("Problem encountered when creating new hbase:meta entries.  " +
1517         "You may need to restore the previously sidelined hbase:meta");
1518       return false;
1519     }
1520     meta.batchMutate(puts.toArray(new Put[puts.size()]), HConstants.NO_NONCE, HConstants.NO_NONCE);
1521     meta.close();
1522     if (meta.getWAL() != null) {
1523       meta.getWAL().close();
1524     }
1525     LOG.info("Success! hbase:meta table rebuilt.");
1526     LOG.info("Old hbase:meta is moved into " + backupDir);
1527     return true;
1528   }
1529
1530   /**
1531    * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1532    */
1533   private void logParallelMerge() {
1534     if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1535       LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1536           " false to run serially.");
1537     } else {
1538       LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
1539           " true to run in parallel.");
1540     }
1541   }
1542
1543   private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1544       boolean fixOverlaps) throws IOException {
1545     LOG.info("Checking HBase region split map from HDFS data...");
1546     logParallelMerge();
1547     for (TableInfo tInfo : tablesInfo.values()) {
1548       TableIntegrityErrorHandler handler;
1549       if (fixHoles || fixOverlaps) {
1550         handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1551           fixHoles, fixOverlaps);
1552       } else {
1553         handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1554       }
1555       if (!tInfo.checkRegionChain(handler)) {
1556         // should dump info as well.
1557         errors.report("Found inconsistency in table " + tInfo.getName());
1558       }
1559     }
1560     return tablesInfo;
1561   }
1562
1563   private Path getSidelineDir() throws IOException {
1564     if (sidelineDir == null) {
1565       Path hbaseDir = FSUtils.getRootDir(getConf());
1566       Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1567       sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1568           + startMillis);
1569     }
1570     return sidelineDir;
1571   }
1572
1573   /**
1574    * Sideline a region dir (instead of deleting it)
1575    */
1576   Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1577     return sidelineRegionDir(fs, null, hi);
1578   }
1579
1580   /**
1581    * Sideline a region dir (instead of deleting it)
1582    *
1583    * @param parentDir if specified, the region will be sidelined to folder like
1584    *     {@literal .../parentDir/<table name>/<region name>}. The purpose is to group together
1585    *     similar regions sidelined, for example, those regions should be bulk loaded back later
1586    *     on. If NULL, it is ignored.
1587    */
1588   Path sidelineRegionDir(FileSystem fs,
1589       String parentDir, HbckInfo hi) throws IOException {
1590     TableName tableName = hi.getTableName();
1591     Path regionDir = hi.getHdfsRegionDir();
1592
1593     if (!fs.exists(regionDir)) {
1594       LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1595       return null;
1596     }
1597
1598     Path rootDir = getSidelineDir();
1599     if (parentDir != null) {
1600       rootDir = new Path(rootDir, parentDir);
1601     }
1602     Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1603     Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1604     fs.mkdirs(sidelineRegionDir);
1605     boolean success = false;
1606     FileStatus[] cfs =  fs.listStatus(regionDir);
1607     if (cfs == null) {
1608       LOG.info("Region dir is empty: " + regionDir);
1609     } else {
1610       for (FileStatus cf : cfs) {
1611         Path src = cf.getPath();
1612         Path dst =  new Path(sidelineRegionDir, src.getName());
1613         if (fs.isFile(src)) {
1614           // simple file
1615           success = fs.rename(src, dst);
1616           if (!success) {
1617             String msg = "Unable to rename file " + src +  " to " + dst;
1618             LOG.error(msg);
1619             throw new IOException(msg);
1620           }
1621           continue;
1622         }
1623
1624         // is a directory.
1625         fs.mkdirs(dst);
1626
1627         LOG.info("Sidelining files from " + src + " into containing region " + dst);
1628         // FileSystem.rename is inconsistent with directories -- if the
1629         // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1630         // it moves the src into the dst dir resulting in (foo/a/b).  If
1631         // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1632         FileStatus[] hfiles = fs.listStatus(src);
1633         if (hfiles != null && hfiles.length > 0) {
1634           for (FileStatus hfile : hfiles) {
1635             success = fs.rename(hfile.getPath(), dst);
1636             if (!success) {
1637               String msg = "Unable to rename file " + src +  " to " + dst;
1638               LOG.error(msg);
1639               throw new IOException(msg);
1640             }
1641           }
1642         }
1643         LOG.debug("Sideline directory contents:");
1644         debugLsr(sidelineRegionDir);
1645       }
1646     }
1647
1648     LOG.info("Removing old region dir: " + regionDir);
1649     success = fs.delete(regionDir, true);
1650     if (!success) {
1651       String msg = "Unable to delete dir " + regionDir;
1652       LOG.error(msg);
1653       throw new IOException(msg);
1654     }
1655     return sidelineRegionDir;
1656   }
1657
1658   /**
1659    * Side line an entire table.
1660    */
1661   void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1662       Path backupHbaseDir) throws IOException {
1663     Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1664     if (fs.exists(tableDir)) {
1665       Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1666       fs.mkdirs(backupTableDir.getParent());
1667       boolean success = fs.rename(tableDir, backupTableDir);
1668       if (!success) {
1669         throw new IOException("Failed to move  " + tableName + " from "
1670             +  tableDir + " to " + backupTableDir);
1671       }
1672     } else {
1673       LOG.info("No previous " + tableName +  " exists.  Continuing.");
1674     }
1675   }
1676
1677   /**
1678    * @return Path to backup of original directory
1679    */
1680   Path sidelineOldMeta() throws IOException {
1681     // put current hbase:meta aside.
1682     Path hbaseDir = FSUtils.getRootDir(getConf());
1683     FileSystem fs = hbaseDir.getFileSystem(getConf());
1684     Path backupDir = getSidelineDir();
1685     fs.mkdirs(backupDir);
1686
1687     try {
1688       sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1689     } catch (IOException e) {
1690         LOG.fatal("... failed to sideline meta. Currently in inconsistent state.  To restore "
1691             + "try to rename hbase:meta in " + backupDir.getName() + " to "
1692             + hbaseDir.getName() + ".", e);
1693       throw e; // throw original exception
1694     }
1695     return backupDir;
1696   }
1697
1698   /**
1699    * Load the list of disabled tables in ZK into local set.
1700    * @throws ZooKeeperConnectionException
1701    * @throws IOException
1702    */
1703   private void loadTableStates()
1704   throws IOException {
1705     tableStates = MetaTableAccessor.getTableStates(connection);
1706   }
1707
1708   /**
1709    * Check if the specified region's table is disabled.
1710    * @param tableName table to check status of
1711    */
1712   private boolean isTableDisabled(TableName tableName) {
1713     return tableStates.containsKey(tableName)
1714         && tableStates.get(tableName)
1715         .inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1716   }
1717
1718   /**
1719    * Scan HDFS for all regions, recording their information into
1720    * regionInfoMap
1721    */
1722   public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1723     Path rootDir = FSUtils.getRootDir(getConf());
1724     FileSystem fs = rootDir.getFileSystem(getConf());
1725
1726     // list all tables from HDFS
1727     List<FileStatus> tableDirs = Lists.newArrayList();
1728
1729     boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1730
1731     List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1732     for (Path path : paths) {
1733       TableName tableName = FSUtils.getTableName(path);
1734        if ((!checkMetaOnly &&
1735            isTableIncluded(tableName)) ||
1736            tableName.equals(TableName.META_TABLE_NAME)) {
1737          tableDirs.add(fs.getFileStatus(path));
1738        }
1739     }
1740
1741     // verify that version file exists
1742     if (!foundVersionFile) {
1743       errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1744           "Version file does not exist in root dir " + rootDir);
1745       if (shouldFixVersionFile()) {
1746         LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1747             + " file.");
1748         setShouldRerun();
1749         FSUtils.setVersion(fs, rootDir, getConf().getInt(
1750             HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1751             HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1752             HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1753       }
1754     }
1755
1756     // Avoid multithreading at table-level because already multithreaded internally at
1757     // region-level.  Additionally multithreading at table-level can lead to deadlock
1758     // if there are many tables in the cluster.  Since there are a limited # of threads
1759     // in the executor's thread pool and if we multithread at the table-level by putting
1760     // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1761     // executor tied up solely in waiting for the tables' region-level calls to complete.
1762     // If there are enough tables then there will be no actual threads in the pool left
1763     // for the region-level callables to be serviced.
1764     for (FileStatus tableDir : tableDirs) {
1765       LOG.debug("Loading region dirs from " +tableDir.getPath());
1766       WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1767       try {
1768         item.call();
1769       } catch (ExecutionException e) {
1770         LOG.warn("Could not completely load table dir " +
1771             tableDir.getPath(), e.getCause());
1772       }
1773     }
1774     errors.print("");
1775   }
1776
1777   /**
1778    * Record the location of the hbase:meta region as found in ZooKeeper.
1779    */
1780   private boolean recordMetaRegion() throws IOException {
1781     RegionLocations rl = ((ClusterConnection)connection).locateRegion(TableName.META_TABLE_NAME,
1782         HConstants.EMPTY_START_ROW, false, false);
1783     if (rl == null) {
1784       errors.reportError(ERROR_CODE.NULL_META_REGION,
1785           "META region was not found in ZooKeeper");
1786       return false;
1787     }
1788     for (HRegionLocation metaLocation : rl.getRegionLocations()) {
1789       // Check if Meta region is valid and existing
1790       if (metaLocation == null ) {
1791         errors.reportError(ERROR_CODE.NULL_META_REGION,
1792             "META region location is null");
1793         return false;
1794       }
1795       if (metaLocation.getRegionInfo() == null) {
1796         errors.reportError(ERROR_CODE.NULL_META_REGION,
1797             "META location regionInfo is null");
1798         return false;
1799       }
1800       if (metaLocation.getHostname() == null) {
1801         errors.reportError(ERROR_CODE.NULL_META_REGION,
1802             "META location hostName is null");
1803         return false;
1804       }
1805       ServerName sn = metaLocation.getServerName();
1806       MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, EnvironmentEdgeManager.currentTime());
1807       HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1808       if (hbckInfo == null) {
1809         regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1810       } else {
1811         hbckInfo.metaEntry = m;
1812       }
1813     }
1814     return true;
1815   }
1816
1817   private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1818     return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1819       @Override
1820       public void abort(String why, Throwable e) {
1821         LOG.error(why, e);
1822         System.exit(1);
1823       }
1824
1825       @Override
1826       public boolean isAborted() {
1827         return false;
1828       }
1829
1830     });
1831   }
1832
1833   private ServerName getMetaRegionServerName(int replicaId)
1834   throws IOException, KeeperException {
1835     return new MetaTableLocator().getMetaRegionLocation(zkw, replicaId);
1836   }
1837
1838   /**
1839    * Contacts each regionserver and fetches metadata about regions.
1840    * @param regionServerList - the list of region servers to connect to
1841    * @throws IOException if a remote or network exception occurs
1842    */
1843   void processRegionServers(Collection<ServerName> regionServerList)
1844     throws IOException, InterruptedException {
1845
1846     List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1847     List<Future<Void>> workFutures;
1848
1849     // loop to contact each region server in parallel
1850     for (ServerName rsinfo: regionServerList) {
1851       workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1852     }
1853
1854     workFutures = executor.invokeAll(workItems);
1855
1856     for(int i=0; i<workFutures.size(); i++) {
1857       WorkItemRegion item = workItems.get(i);
1858       Future<Void> f = workFutures.get(i);
1859       try {
1860         f.get();
1861       } catch(ExecutionException e) {
1862         LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1863             e.getCause());
1864       }
1865     }
1866   }
1867
1868   /**
1869    * Check consistency of all regions that have been found in previous phases.
1870    */
1871   private void checkAndFixConsistency()
1872   throws IOException, KeeperException, InterruptedException {
1873     // Divide the checks in two phases. One for default/primary replicas and another
1874     // for the non-primary ones. Keeps code cleaner this way.
1875
1876     List<CheckRegionConsistencyWorkItem> workItems =
1877         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1878     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1879       if (e.getValue().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
1880         workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1881       }
1882     }
1883     checkRegionConsistencyConcurrently(workItems);
1884
1885     boolean prevHdfsCheck = shouldCheckHdfs();
1886     setCheckHdfs(false); //replicas don't have any hdfs data
1887     // Run a pass over the replicas and fix any assignment issues that exist on the currently
1888     // deployed/undeployed replicas.
1889     List<CheckRegionConsistencyWorkItem> replicaWorkItems =
1890         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1891     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1892       if (e.getValue().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
1893         replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1894       }
1895     }
1896     checkRegionConsistencyConcurrently(replicaWorkItems);
1897     setCheckHdfs(prevHdfsCheck);
1898
1899     // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
1900     // not get accurate state of the hbase if continuing. The config here allows users to tune
1901     // the tolerance of number of skipped region.
1902     // TODO: evaluate the consequence to continue the hbck operation without config.
1903     int terminateThreshold =  getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
1904     int numOfSkippedRegions = skippedRegions.size();
1905     if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
1906       throw new IOException(numOfSkippedRegions
1907         + " region(s) could not be checked or repaired.  See logs for detail.");
1908     }
1909
1910     if (shouldCheckHdfs()) {
1911       checkAndFixTableStates();
1912     }
1913   }
1914
1915   /**
1916    * Check consistency of all regions using mulitple threads concurrently.
1917    */
1918   private void checkRegionConsistencyConcurrently(
1919     final List<CheckRegionConsistencyWorkItem> workItems)
1920     throws IOException, KeeperException, InterruptedException {
1921     if (workItems.isEmpty()) {
1922       return;  // nothing to check
1923     }
1924
1925     List<Future<Void>> workFutures = executor.invokeAll(workItems);
1926     for(Future<Void> f: workFutures) {
1927       try {
1928         f.get();
1929       } catch(ExecutionException e1) {
1930         LOG.warn("Could not check region consistency " , e1.getCause());
1931         if (e1.getCause() instanceof IOException) {
1932           throw (IOException)e1.getCause();
1933         } else if (e1.getCause() instanceof KeeperException) {
1934           throw (KeeperException)e1.getCause();
1935         } else if (e1.getCause() instanceof InterruptedException) {
1936           throw (InterruptedException)e1.getCause();
1937         } else {
1938           throw new IOException(e1.getCause());
1939         }
1940       }
1941     }
1942   }
1943
1944   class CheckRegionConsistencyWorkItem implements Callable<Void> {
1945     private final String key;
1946     private final HbckInfo hbi;
1947
1948     CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
1949       this.key = key;
1950       this.hbi = hbi;
1951     }
1952
1953     @Override
1954     public synchronized Void call() throws Exception {
1955       try {
1956         checkRegionConsistency(key, hbi);
1957       } catch (Exception e) {
1958         // If the region is non-META region, skip this region and send warning/error message; if
1959         // the region is META region, we should not continue.
1960         LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()
1961           + "'.", e);
1962         if (hbi.getHdfsHRI().isMetaRegion()) {
1963           throw e;
1964         }
1965         LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
1966         addSkippedRegion(hbi);
1967       }
1968       return null;
1969     }
1970   }
1971
1972   private void addSkippedRegion(final HbckInfo hbi) {
1973     Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
1974     if (skippedRegionNames == null) {
1975       skippedRegionNames = new HashSet<String>();
1976     }
1977     skippedRegionNames.add(hbi.getRegionNameAsString());
1978     skippedRegions.put(hbi.getTableName(), skippedRegionNames);
1979   }
1980
1981   /**
1982    * Check and fix table states, assumes full info available:
1983    * - tableInfos
1984    * - empty tables loaded
1985    */
1986   private void checkAndFixTableStates() throws IOException {
1987     // first check dangling states
1988     for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
1989       TableName tableName = entry.getKey();
1990       TableState tableState = entry.getValue();
1991       TableInfo tableInfo = tablesInfo.get(tableName);
1992       if (isTableIncluded(tableName)
1993           && !tableName.isSystemTable()
1994           && tableInfo == null) {
1995         if (fixMeta) {
1996           MetaTableAccessor.deleteTableState(connection, tableName);
1997           TableState state = MetaTableAccessor.getTableState(connection, tableName);
1998           if (state != null) {
1999             errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
2000                 tableName + " unable to delete dangling table state " + tableState);
2001           }
2002         } else {
2003           errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
2004               tableName + " has dangling table state " + tableState);
2005         }
2006       }
2007     }
2008     // check that all tables have states
2009     for (TableName tableName : tablesInfo.keySet()) {
2010       if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
2011         if (fixMeta) {
2012           MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
2013           TableState newState = MetaTableAccessor.getTableState(connection, tableName);
2014           if (newState == null) {
2015             errors.reportError(ERROR_CODE.NO_TABLE_STATE,
2016                 "Unable to change state for table " + tableName + " in meta ");
2017           }
2018         } else {
2019           errors.reportError(ERROR_CODE.NO_TABLE_STATE,
2020               tableName + " has no state in meta ");
2021         }
2022       }
2023     }
2024   }
2025
2026   private void preCheckPermission() throws IOException, AccessDeniedException {
2027     if (shouldIgnorePreCheckPermission()) {
2028       return;
2029     }
2030
2031     Path hbaseDir = FSUtils.getRootDir(getConf());
2032     FileSystem fs = hbaseDir.getFileSystem(getConf());
2033     UserProvider userProvider = UserProvider.instantiate(getConf());
2034     UserGroupInformation ugi = userProvider.getCurrent().getUGI();
2035     FileStatus[] files = fs.listStatus(hbaseDir);
2036     for (FileStatus file : files) {
2037       try {
2038         FSUtils.checkAccess(ugi, file, FsAction.WRITE);
2039       } catch (AccessDeniedException ace) {
2040         LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
2041         errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
2042           + " does not have write perms to " + file.getPath()
2043           + ". Please rerun hbck as hdfs user " + file.getOwner());
2044         throw ace;
2045       }
2046     }
2047   }
2048
2049   /**
2050    * Deletes region from meta table
2051    */
2052   private void deleteMetaRegion(HbckInfo hi) throws IOException {
2053     deleteMetaRegion(hi.metaEntry.getRegionName());
2054   }
2055
2056   /**
2057    * Deletes region from meta table
2058    */
2059   private void deleteMetaRegion(byte[] metaKey) throws IOException {
2060     Delete d = new Delete(metaKey);
2061     meta.delete(d);
2062     LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
2063   }
2064
2065   /**
2066    * Reset the split parent region info in meta table
2067    */
2068   private void resetSplitParent(HbckInfo hi) throws IOException {
2069     RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
2070     Delete d = new Delete(hi.metaEntry.getRegionName());
2071     d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
2072     d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
2073     mutations.add(d);
2074
2075     HRegionInfo hri = new HRegionInfo(hi.metaEntry);
2076     hri.setOffline(false);
2077     hri.setSplit(false);
2078     Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
2079     mutations.add(p);
2080
2081     meta.mutateRow(mutations);
2082     LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
2083   }
2084
2085   /**
2086    * This backwards-compatibility wrapper for permanently offlining a region
2087    * that should not be alive.  If the region server does not support the
2088    * "offline" method, it will use the closest unassign method instead.  This
2089    * will basically work until one attempts to disable or delete the affected
2090    * table.  The problem has to do with in-memory only master state, so
2091    * restarting the HMaster or failing over to another should fix this.
2092    */
2093   private void offline(byte[] regionName) throws IOException {
2094     String regionString = Bytes.toStringBinary(regionName);
2095     if (!rsSupportsOffline) {
2096       LOG.warn("Using unassign region " + regionString
2097           + " instead of using offline method, you should"
2098           + " restart HMaster after these repairs");
2099       admin.unassign(regionName, true);
2100       return;
2101     }
2102
2103     // first time we assume the rs's supports #offline.
2104     try {
2105       LOG.info("Offlining region " + regionString);
2106       admin.offline(regionName);
2107     } catch (IOException ioe) {
2108       String notFoundMsg = "java.lang.NoSuchMethodException: " +
2109         "org.apache.hadoop.hbase.master.HMaster.offline([B)";
2110       if (ioe.getMessage().contains(notFoundMsg)) {
2111         LOG.warn("Using unassign region " + regionString
2112             + " instead of using offline method, you should"
2113             + " restart HMaster after these repairs");
2114         rsSupportsOffline = false; // in the future just use unassign
2115         admin.unassign(regionName, true);
2116         return;
2117       }
2118       throw ioe;
2119     }
2120   }
2121
2122   private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
2123     undeployRegionsForHbi(hi);
2124     // undeploy replicas of the region (but only if the method is invoked for the primary)
2125     if (hi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2126       return;
2127     }
2128     int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2129     for (int i = 1; i < numReplicas; i++) {
2130       if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2131       HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
2132           hi.getPrimaryHRIForDeployedReplica(), i);
2133       HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2134       if (h != null) {
2135         undeployRegionsForHbi(h);
2136         //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2137         //in consistency checks
2138         h.setSkipChecks(true);
2139       }
2140     }
2141   }
2142
2143   private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {
2144     for (OnlineEntry rse : hi.deployedEntries) {
2145       LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
2146       try {
2147         HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);
2148         offline(rse.hri.getRegionName());
2149       } catch (IOException ioe) {
2150         LOG.warn("Got exception when attempting to offline region "
2151             + Bytes.toString(rse.hri.getRegionName()), ioe);
2152       }
2153     }
2154   }
2155
2156   /**
2157    * Attempts to undeploy a region from a region server based in information in
2158    * META.  Any operations that modify the file system should make sure that
2159    * its corresponding region is not deployed to prevent data races.
2160    *
2161    * A separate call is required to update the master in-memory region state
2162    * kept in the AssignementManager.  Because disable uses this state instead of
2163    * that found in META, we can't seem to cleanly disable/delete tables that
2164    * have been hbck fixed.  When used on a version of HBase that does not have
2165    * the offline ipc call exposed on the master (&lt;0.90.5, &lt;0.92.0) a master
2166    * restart or failover may be required.
2167    */
2168   private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
2169     if (hi.metaEntry == null && hi.hdfsEntry == null) {
2170       undeployRegions(hi);
2171       return;
2172     }
2173
2174     // get assignment info and hregioninfo from meta.
2175     Get get = new Get(hi.getRegionName());
2176     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2177     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2178     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2179     // also get the locations of the replicas to close if the primary region is being closed
2180     if (hi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2181       int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2182       for (int i = 0; i < numReplicas; i++) {
2183         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
2184         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
2185       }
2186     }
2187     Result r = meta.get(get);
2188     RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2189     if (rl == null) {
2190       LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2191           " since meta does not have handle to reach it");
2192       return;
2193     }
2194     for (HRegionLocation h : rl.getRegionLocations()) {
2195       ServerName serverName = h.getServerName();
2196       if (serverName == null) {
2197         errors.reportError("Unable to close region "
2198             + hi.getRegionNameAsString() +  " because meta does not "
2199             + "have handle to reach it.");
2200         continue;
2201       }
2202       HRegionInfo hri = h.getRegionInfo();
2203       if (hri == null) {
2204         LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2205             + " because hbase:meta had invalid or missing "
2206             + HConstants.CATALOG_FAMILY_STR + ":"
2207             + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2208             + " qualifier value.");
2209         continue;
2210       }
2211       // close the region -- close files and remove assignment
2212       HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2213     }
2214   }
2215
2216   private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
2217     KeeperException, InterruptedException {
2218     // If we are trying to fix the errors
2219     if (shouldFixAssignments()) {
2220       errors.print(msg);
2221       undeployRegions(hbi);
2222       setShouldRerun();
2223       HRegionInfo hri = hbi.getHdfsHRI();
2224       if (hri == null) {
2225         hri = hbi.metaEntry;
2226       }
2227       HBaseFsckRepair.fixUnassigned(admin, hri);
2228       HBaseFsckRepair.waitUntilAssigned(admin, hri);
2229
2230       // also assign replicas if needed (do it only when this call operates on a primary replica)
2231       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) return;
2232       int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();
2233       for (int i = 1; i < replicationCount; i++) {
2234         hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2235         HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2236         if (h != null) {
2237           undeployRegions(h);
2238           //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2239           //in consistency checks
2240           h.setSkipChecks(true);
2241         }
2242         HBaseFsckRepair.fixUnassigned(admin, hri);
2243         HBaseFsckRepair.waitUntilAssigned(admin, hri);
2244       }
2245
2246     }
2247   }
2248
2249   /**
2250    * Check a single region for consistency and correct deployment.
2251    */
2252   private void checkRegionConsistency(final String key, final HbckInfo hbi)
2253   throws IOException, KeeperException, InterruptedException {
2254
2255     if (hbi.isSkipChecks()) return;
2256     String descriptiveName = hbi.toString();
2257     boolean inMeta = hbi.metaEntry != null;
2258     // In case not checking HDFS, assume the region is on HDFS
2259     boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2260     boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
2261     boolean isDeployed = !hbi.deployedOn.isEmpty();
2262     boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
2263     boolean deploymentMatchesMeta =
2264       hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2265       hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
2266     boolean splitParent =
2267         inMeta && hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
2268     boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry.getTable());
2269     boolean recentlyModified = inHdfs &&
2270       hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2271
2272     // ========== First the healthy cases =============
2273     if (hbi.containsOnlyHdfsEdits()) {
2274       return;
2275     }
2276     if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2277       return;
2278     } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2279       LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2280         "tabled that is not deployed");
2281       return;
2282     } else if (recentlyModified) {
2283       LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2284       return;
2285     }
2286     // ========== Cases where the region is not in hbase:meta =============
2287     else if (!inMeta && !inHdfs && !isDeployed) {
2288       // We shouldn't have record of this region at all then!
2289       assert false : "Entry for region with no data";
2290     } else if (!inMeta && !inHdfs && isDeployed) {
2291       errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2292           + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2293           "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2294       if (shouldFixAssignments()) {
2295         undeployRegions(hbi);
2296       }
2297
2298     } else if (!inMeta && inHdfs && !isDeployed) {
2299       if (hbi.isMerged()) {
2300         // This region has already been merged, the remaining hdfs file will be
2301         // cleaned by CatalogJanitor later
2302         hbi.setSkipChecks(true);
2303         LOG.info("Region " + descriptiveName
2304             + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2305         return;
2306       }
2307       errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2308           + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2309           "or deployed on any region server");
2310       // restore region consistency of an adopted orphan
2311       if (shouldFixMeta()) {
2312         if (!hbi.isHdfsRegioninfoPresent()) {
2313           LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2314               +  " in table integrity repair phase if -fixHdfsOrphans was" +
2315               " used.");
2316           return;
2317         }
2318
2319         HRegionInfo hri = hbi.getHdfsHRI();
2320         TableInfo tableInfo = tablesInfo.get(hri.getTable());
2321
2322         for (HRegionInfo region : tableInfo.getRegionsFromMeta()) {
2323           if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2324               && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2325                 hri.getEndKey()) >= 0)
2326               && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2327             if(region.isSplit() || region.isOffline()) continue;
2328             Path regionDir = hbi.getHdfsRegionDir();
2329             FileSystem fs = regionDir.getFileSystem(getConf());
2330             List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2331             for (Path familyDir : familyDirs) {
2332               List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2333               for (Path referenceFilePath : referenceFilePaths) {
2334                 Path parentRegionDir =
2335                     StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2336                 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2337                   LOG.warn(hri + " start and stop keys are in the range of " + region
2338                       + ". The region might not be cleaned up from hdfs when region " + region
2339                       + " split failed. Hence deleting from hdfs.");
2340                   HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2341                     regionDir.getParent(), hri);
2342                   return;
2343                 }
2344               }
2345             }
2346           }
2347         }
2348         LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2349         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2350         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2351             admin.getClusterStatus().getServers(), numReplicas);
2352
2353         tryAssignmentRepair(hbi, "Trying to reassign region...");
2354       }
2355
2356     } else if (!inMeta && inHdfs && isDeployed) {
2357       errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2358           + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2359       debugLsr(hbi.getHdfsRegionDir());
2360       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2361         // for replicas, this means that we should undeploy the region (we would have
2362         // gone over the primaries and fixed meta holes in first phase under
2363         // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2364         // this stage unless unwanted replica)
2365         if (shouldFixAssignments()) {
2366           undeployRegionsForHbi(hbi);
2367         }
2368       }
2369       if (shouldFixMeta() && hbi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2370         if (!hbi.isHdfsRegioninfoPresent()) {
2371           LOG.error("This should have been repaired in table integrity repair phase");
2372           return;
2373         }
2374
2375         LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2376         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2377         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2378             admin.getClusterStatus().getServers(), numReplicas);
2379         tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2380       }
2381
2382     // ========== Cases where the region is in hbase:meta =============
2383     } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2384       // check whether this is an actual error, or just transient state where parent
2385       // is not cleaned
2386       if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2387         // check that split daughters are there
2388         HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2389         HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2390         if (infoA != null && infoB != null) {
2391           // we already processed or will process daughters. Move on, nothing to see here.
2392           hbi.setSkipChecks(true);
2393           return;
2394         }
2395       }
2396       errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2397           + descriptiveName + " is a split parent in META, in HDFS, "
2398           + "and not deployed on any region server. This could be transient.");
2399       if (shouldFixSplitParents()) {
2400         setShouldRerun();
2401         resetSplitParent(hbi);
2402       }
2403     } else if (inMeta && !inHdfs && !isDeployed) {
2404       errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2405           + descriptiveName + " found in META, but not in HDFS "
2406           + "or deployed on any region server.");
2407       if (shouldFixMeta()) {
2408         deleteMetaRegion(hbi);
2409       }
2410     } else if (inMeta && !inHdfs && isDeployed) {
2411       errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2412           + " found in META, but not in HDFS, " +
2413           "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2414       // We treat HDFS as ground truth.  Any information in meta is transient
2415       // and equivalent data can be regenerated.  So, lets unassign and remove
2416       // these problems from META.
2417       if (shouldFixAssignments()) {
2418         errors.print("Trying to fix unassigned region...");
2419         undeployRegions(hbi);
2420       }
2421       if (shouldFixMeta()) {
2422         // wait for it to complete
2423         deleteMetaRegion(hbi);
2424       }
2425     } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2426       errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2427           + " not deployed on any region server.");
2428       tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2429     } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2430       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2431           "Region " + descriptiveName + " should not be deployed according " +
2432           "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2433       if (shouldFixAssignments()) {
2434         errors.print("Trying to close the region " + descriptiveName);
2435         setShouldRerun();
2436         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2437       }
2438     } else if (inMeta && inHdfs && isMultiplyDeployed) {
2439       errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2440           + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2441           + " but is multiply assigned to region servers " +
2442           Joiner.on(", ").join(hbi.deployedOn));
2443       // If we are trying to fix the errors
2444       if (shouldFixAssignments()) {
2445         errors.print("Trying to fix assignment error...");
2446         setShouldRerun();
2447         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2448       }
2449     } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2450       errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2451           + descriptiveName + " listed in hbase:meta on region server " +
2452           hbi.metaEntry.regionServer + " but found on region server " +
2453           hbi.deployedOn.get(0));
2454       // If we are trying to fix the errors
2455       if (shouldFixAssignments()) {
2456         errors.print("Trying to fix assignment error...");
2457         setShouldRerun();
2458         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2459         HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2460       }
2461     } else {
2462       errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2463           " is in an unforeseen state:" +
2464           " inMeta=" + inMeta +
2465           " inHdfs=" + inHdfs +
2466           " isDeployed=" + isDeployed +
2467           " isMultiplyDeployed=" + isMultiplyDeployed +
2468           " deploymentMatchesMeta=" + deploymentMatchesMeta +
2469           " shouldBeDeployed=" + shouldBeDeployed);
2470     }
2471   }
2472
2473   /**
2474    * Checks tables integrity. Goes over all regions and scans the tables.
2475    * Collects all the pieces for each table and checks if there are missing,
2476    * repeated or overlapping ones.
2477    * @throws IOException
2478    */
2479   SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2480     tablesInfo = new TreeMap<TableName,TableInfo> ();
2481     LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2482     for (HbckInfo hbi : regionInfoMap.values()) {
2483       // Check only valid, working regions
2484       if (hbi.metaEntry == null) {
2485         // this assumes that consistency check has run loadMetaEntry
2486         Path p = hbi.getHdfsRegionDir();
2487         if (p == null) {
2488           errors.report("No regioninfo in Meta or HDFS. " + hbi);
2489         }
2490
2491         // TODO test.
2492         continue;
2493       }
2494       if (hbi.metaEntry.regionServer == null) {
2495         errors.detail("Skipping region because no region server: " + hbi);
2496         continue;
2497       }
2498       if (hbi.metaEntry.isOffline()) {
2499         errors.detail("Skipping region because it is offline: " + hbi);
2500         continue;
2501       }
2502       if (hbi.containsOnlyHdfsEdits()) {
2503         errors.detail("Skipping region because it only contains edits" + hbi);
2504         continue;
2505       }
2506
2507       // Missing regionDir or over-deployment is checked elsewhere. Include
2508       // these cases in modTInfo, so we can evaluate those regions as part of
2509       // the region chain in META
2510       //if (hbi.foundRegionDir == null) continue;
2511       //if (hbi.deployedOn.size() != 1) continue;
2512       if (hbi.deployedOn.size() == 0) continue;
2513
2514       // We should be safe here
2515       TableName tableName = hbi.metaEntry.getTable();
2516       TableInfo modTInfo = tablesInfo.get(tableName);
2517       if (modTInfo == null) {
2518         modTInfo = new TableInfo(tableName);
2519       }
2520       for (ServerName server : hbi.deployedOn) {
2521         modTInfo.addServer(server);
2522       }
2523
2524       if (!hbi.isSkipChecks()) {
2525         modTInfo.addRegionInfo(hbi);
2526       }
2527
2528       tablesInfo.put(tableName, modTInfo);
2529     }
2530
2531     loadTableInfosForTablesWithNoRegion();
2532
2533     logParallelMerge();
2534     for (TableInfo tInfo : tablesInfo.values()) {
2535       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2536       if (!tInfo.checkRegionChain(handler)) {
2537         errors.report("Found inconsistency in table " + tInfo.getName());
2538       }
2539     }
2540     return tablesInfo;
2541   }
2542
2543   /** Loads table info's for tables that may not have been included, since there are no
2544    * regions reported for the table, but table dir is there in hdfs
2545    */
2546   private void loadTableInfosForTablesWithNoRegion() throws IOException {
2547     Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2548     for (HTableDescriptor htd : allTables.values()) {
2549       if (checkMetaOnly && !htd.isMetaTable()) {
2550         continue;
2551       }
2552
2553       TableName tableName = htd.getTableName();
2554       if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2555         TableInfo tableInfo = new TableInfo(tableName);
2556         tableInfo.htds.add(htd);
2557         tablesInfo.put(htd.getTableName(), tableInfo);
2558       }
2559     }
2560   }
2561
2562   /**
2563    * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2564    * @return number of file move fixes done to merge regions.
2565    */
2566   public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2567     int fileMoves = 0;
2568     String thread = Thread.currentThread().getName();
2569     LOG.debug("[" + thread + "] Contained region dir after close and pause");
2570     debugLsr(contained.getHdfsRegionDir());
2571
2572     // rename the contained into the container.
2573     FileSystem fs = targetRegionDir.getFileSystem(getConf());
2574     FileStatus[] dirs = null;
2575     try {
2576       dirs = fs.listStatus(contained.getHdfsRegionDir());
2577     } catch (FileNotFoundException fnfe) {
2578       // region we are attempting to merge in is not present!  Since this is a merge, there is
2579       // no harm skipping this region if it does not exist.
2580       if (!fs.exists(contained.getHdfsRegionDir())) {
2581         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2582             + " is missing. Assuming already sidelined or moved.");
2583       } else {
2584         sidelineRegionDir(fs, contained);
2585       }
2586       return fileMoves;
2587     }
2588
2589     if (dirs == null) {
2590       if (!fs.exists(contained.getHdfsRegionDir())) {
2591         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2592             + " already sidelined.");
2593       } else {
2594         sidelineRegionDir(fs, contained);
2595       }
2596       return fileMoves;
2597     }
2598
2599     for (FileStatus cf : dirs) {
2600       Path src = cf.getPath();
2601       Path dst =  new Path(targetRegionDir, src.getName());
2602
2603       if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2604         // do not copy the old .regioninfo file.
2605         continue;
2606       }
2607
2608       if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2609         // do not copy the .oldlogs files
2610         continue;
2611       }
2612
2613       LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2614       // FileSystem.rename is inconsistent with directories -- if the
2615       // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2616       // it moves the src into the dst dir resulting in (foo/a/b).  If
2617       // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2618       for (FileStatus hfile : fs.listStatus(src)) {
2619         boolean success = fs.rename(hfile.getPath(), dst);
2620         if (success) {
2621           fileMoves++;
2622         }
2623       }
2624       LOG.debug("[" + thread + "] Sideline directory contents:");
2625       debugLsr(targetRegionDir);
2626     }
2627
2628     // if all success.
2629     sidelineRegionDir(fs, contained);
2630     LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2631         getSidelineDir());
2632     debugLsr(contained.getHdfsRegionDir());
2633
2634     return fileMoves;
2635   }
2636
2637
2638   static class WorkItemOverlapMerge implements Callable<Void> {
2639     private TableIntegrityErrorHandler handler;
2640     Collection<HbckInfo> overlapgroup;
2641
2642     WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2643       this.handler = handler;
2644       this.overlapgroup = overlapgroup;
2645     }
2646
2647     @Override
2648     public Void call() throws Exception {
2649       handler.handleOverlapGroup(overlapgroup);
2650       return null;
2651     }
2652   };
2653
2654
2655   /**
2656    * Maintain information about a particular table.
2657    */
2658   public class TableInfo {
2659     TableName tableName;
2660     TreeSet <ServerName> deployedOn;
2661
2662     // backwards regions
2663     final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2664
2665     // sidelined big overlapped regions
2666     final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2667
2668     // region split calculator
2669     final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2670
2671     // Histogram of different HTableDescriptors found.  Ideally there is only one!
2672     final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2673
2674     // key = start split, values = set of splits in problem group
2675     final Multimap<byte[], HbckInfo> overlapGroups =
2676       TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2677
2678     // list of regions derived from meta entries.
2679     private ImmutableList<HRegionInfo> regionsFromMeta = null;
2680
2681     TableInfo(TableName name) {
2682       this.tableName = name;
2683       deployedOn = new TreeSet <ServerName>();
2684     }
2685
2686     /**
2687      * @return descriptor common to all regions.  null if are none or multiple!
2688      */
2689     private HTableDescriptor getHTD() {
2690       if (htds.size() == 1) {
2691         return (HTableDescriptor)htds.toArray()[0];
2692       } else {
2693         LOG.error("None/Multiple table descriptors found for table '"
2694           + tableName + "' regions: " + htds);
2695       }
2696       return null;
2697     }
2698
2699     public void addRegionInfo(HbckInfo hir) {
2700       if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2701         // end key is absolute end key, just add it.
2702         // ignore replicas other than primary for these checks
2703         if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2704         return;
2705       }
2706
2707       // if not the absolute end key, check for cycle
2708       if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2709         errors.reportError(
2710             ERROR_CODE.REGION_CYCLE,
2711             String.format("The endkey for this region comes before the "
2712                 + "startkey, startkey=%s, endkey=%s",
2713                 Bytes.toStringBinary(hir.getStartKey()),
2714                 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2715         backwards.add(hir);
2716         return;
2717       }
2718
2719       // main case, add to split calculator
2720       // ignore replicas other than primary for these checks
2721       if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2722     }
2723
2724     public void addServer(ServerName server) {
2725       this.deployedOn.add(server);
2726     }
2727
2728     public TableName getName() {
2729       return tableName;
2730     }
2731
2732     public int getNumRegions() {
2733       return sc.getStarts().size() + backwards.size();
2734     }
2735
2736     public synchronized ImmutableList<HRegionInfo> getRegionsFromMeta() {
2737       // lazy loaded, synchronized to ensure a single load
2738       if (regionsFromMeta == null) {
2739         List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
2740         for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2741           if (tableName.equals(h.getTableName())) {
2742             if (h.metaEntry != null) {
2743               regions.add((HRegionInfo) h.metaEntry);
2744             }
2745           }
2746         }
2747         regionsFromMeta = Ordering.natural().immutableSortedCopy(regions);
2748       }
2749
2750       return regionsFromMeta;
2751     }
2752
2753     private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2754       ErrorReporter errors;
2755
2756       IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2757         this.errors = errors;
2758         setTableInfo(ti);
2759       }
2760
2761       @Override
2762       public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2763         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2764             "First region should start with an empty key.  You need to "
2765             + " create a new region and regioninfo in HDFS to plug the hole.",
2766             getTableInfo(), hi);
2767       }
2768
2769       @Override
2770       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2771         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2772             "Last region should end with an empty key. You need to "
2773                 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2774       }
2775
2776       @Override
2777       public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2778         errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2779             "Region has the same start and end key.", getTableInfo(), hi);
2780       }
2781
2782       @Override
2783       public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2784         byte[] key = r1.getStartKey();
2785         // dup start key
2786         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2787             "Multiple regions have the same startkey: "
2788             + Bytes.toStringBinary(key), getTableInfo(), r1);
2789         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2790             "Multiple regions have the same startkey: "
2791             + Bytes.toStringBinary(key), getTableInfo(), r2);
2792       }
2793
2794       @Override
2795       public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2796         errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2797             "There is an overlap in the region chain.",
2798             getTableInfo(), hi1, hi2);
2799       }
2800
2801       @Override
2802       public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2803         errors.reportError(
2804             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2805             "There is a hole in the region chain between "
2806                 + Bytes.toStringBinary(holeStart) + " and "
2807                 + Bytes.toStringBinary(holeStop)
2808                 + ".  You need to create a new .regioninfo and region "
2809                 + "dir in hdfs to plug the hole.");
2810       }
2811     };
2812
2813     /**
2814      * This handler fixes integrity errors from hdfs information.  There are
2815      * basically three classes of integrity problems 1) holes, 2) overlaps, and
2816      * 3) invalid regions.
2817      *
2818      * This class overrides methods that fix holes and the overlap group case.
2819      * Individual cases of particular overlaps are handled by the general
2820      * overlap group merge repair case.
2821      *
2822      * If hbase is online, this forces regions offline before doing merge
2823      * operations.
2824      */
2825     private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2826       Configuration conf;
2827
2828       boolean fixOverlaps = true;
2829
2830       HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2831           boolean fixHoles, boolean fixOverlaps) {
2832         super(ti, errors);
2833         this.conf = conf;
2834         this.fixOverlaps = fixOverlaps;
2835         // TODO properly use fixHoles
2836       }
2837
2838       /**
2839        * This is a special case hole -- when the first region of a table is
2840        * missing from META, HBase doesn't acknowledge the existance of the
2841        * table.
2842        */
2843       @Override
2844       public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2845         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2846             "First region should start with an empty key.  Creating a new " +
2847             "region and regioninfo in HDFS to plug the hole.",
2848             getTableInfo(), next);
2849         HTableDescriptor htd = getTableInfo().getHTD();
2850         // from special EMPTY_START_ROW to next region's startKey
2851         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2852             HConstants.EMPTY_START_ROW, next.getStartKey());
2853
2854         // TODO test
2855         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2856         LOG.info("Table region start key was not empty.  Created new empty region: "
2857             + newRegion + " " +region);
2858         fixes++;
2859       }
2860
2861       @Override
2862       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2863         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2864             "Last region should end with an empty key.  Creating a new "
2865                 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2866         HTableDescriptor htd = getTableInfo().getHTD();
2867         // from curEndKey to EMPTY_START_ROW
2868         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2869             HConstants.EMPTY_START_ROW);
2870
2871         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2872         LOG.info("Table region end key was not empty.  Created new empty region: " + newRegion
2873             + " " + region);
2874         fixes++;
2875       }
2876
2877       /**
2878        * There is a hole in the hdfs regions that violates the table integrity
2879        * rules.  Create a new empty region that patches the hole.
2880        */
2881       @Override
2882       public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2883         errors.reportError(
2884             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2885             "There is a hole in the region chain between "
2886                 + Bytes.toStringBinary(holeStartKey) + " and "
2887                 + Bytes.toStringBinary(holeStopKey)
2888                 + ".  Creating a new regioninfo and region "
2889                 + "dir in hdfs to plug the hole.");
2890         HTableDescriptor htd = getTableInfo().getHTD();
2891         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2892         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2893         LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2894         fixes++;
2895       }
2896
2897       /**
2898        * This takes set of overlapping regions and merges them into a single
2899        * region.  This covers cases like degenerate regions, shared start key,
2900        * general overlaps, duplicate ranges, and partial overlapping regions.
2901        *
2902        * Cases:
2903        * - Clean regions that overlap
2904        * - Only .oldlogs regions (can't find start/stop range, or figure out)
2905        *
2906        * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
2907        */
2908       @Override
2909       public void handleOverlapGroup(Collection<HbckInfo> overlap)
2910           throws IOException {
2911         Preconditions.checkNotNull(overlap);
2912         Preconditions.checkArgument(overlap.size() >0);
2913
2914         if (!this.fixOverlaps) {
2915           LOG.warn("Not attempting to repair overlaps.");
2916           return;
2917         }
2918
2919         if (overlap.size() > maxMerge) {
2920           LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2921             "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2922           if (sidelineBigOverlaps) {
2923             // we only sideline big overlapped groups that exceeds the max number of regions to merge
2924             sidelineBigOverlaps(overlap);
2925           }
2926           return;
2927         }
2928
2929         mergeOverlaps(overlap);
2930       }
2931
2932       void mergeOverlaps(Collection<HbckInfo> overlap)
2933           throws IOException {
2934         String thread = Thread.currentThread().getName();
2935         LOG.info("== [" + thread + "] Merging regions into one region: "
2936           + Joiner.on(",").join(overlap));
2937         // get the min / max range and close all concerned regions
2938         Pair<byte[], byte[]> range = null;
2939         for (HbckInfo hi : overlap) {
2940           if (range == null) {
2941             range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2942           } else {
2943             if (RegionSplitCalculator.BYTES_COMPARATOR
2944                 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2945               range.setFirst(hi.getStartKey());
2946             }
2947             if (RegionSplitCalculator.BYTES_COMPARATOR
2948                 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2949               range.setSecond(hi.getEndKey());
2950             }
2951           }
2952           // need to close files so delete can happen.
2953           LOG.debug("[" + thread + "] Closing region before moving data around: " +  hi);
2954           LOG.debug("[" + thread + "] Contained region dir before close");
2955           debugLsr(hi.getHdfsRegionDir());
2956           try {
2957             LOG.info("[" + thread + "] Closing region: " + hi);
2958             closeRegion(hi);
2959           } catch (IOException ioe) {
2960             LOG.warn("[" + thread + "] Was unable to close region " + hi
2961               + ".  Just continuing... ", ioe);
2962           } catch (InterruptedException e) {
2963             LOG.warn("[" + thread + "] Was unable to close region " + hi
2964               + ".  Just continuing... ", e);
2965           }
2966
2967           try {
2968             LOG.info("[" + thread + "] Offlining region: " + hi);
2969             offline(hi.getRegionName());
2970           } catch (IOException ioe) {
2971             LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2972               + ".  Just continuing... ", ioe);
2973           }
2974         }
2975
2976         // create new empty container region.
2977         HTableDescriptor htd = getTableInfo().getHTD();
2978         // from start key to end Key
2979         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2980             range.getSecond());
2981         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2982         LOG.info("[" + thread + "] Created new empty container region: " +
2983             newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2984         debugLsr(region.getRegionFileSystem().getRegionDir());
2985
2986         // all target regions are closed, should be able to safely cleanup.
2987         boolean didFix= false;
2988         Path target = region.getRegionFileSystem().getRegionDir();
2989         for (HbckInfo contained : overlap) {
2990           LOG.info("[" + thread + "] Merging " + contained  + " into " + target );
2991           int merges = mergeRegionDirs(target, contained);
2992           if (merges > 0) {
2993             didFix = true;
2994           }
2995         }
2996         if (didFix) {
2997           fixes++;
2998         }
2999       }
3000
3001       /**
3002        * Sideline some regions in a big overlap group so that it
3003        * will have fewer regions, and it is easier to merge them later on.
3004        *
3005        * @param bigOverlap the overlapped group with regions more than maxMerge
3006        * @throws IOException
3007        */
3008       void sidelineBigOverlaps(
3009           Collection<HbckInfo> bigOverlap) throws IOException {
3010         int overlapsToSideline = bigOverlap.size() - maxMerge;
3011         if (overlapsToSideline > maxOverlapsToSideline) {
3012           overlapsToSideline = maxOverlapsToSideline;
3013         }
3014         List<HbckInfo> regionsToSideline =
3015           RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
3016         FileSystem fs = FileSystem.get(conf);
3017         for (HbckInfo regionToSideline: regionsToSideline) {
3018           try {
3019             LOG.info("Closing region: " + regionToSideline);
3020             closeRegion(regionToSideline);
3021           } catch (IOException ioe) {
3022             LOG.warn("Was unable to close region " + regionToSideline
3023               + ".  Just continuing... ", ioe);
3024           } catch (InterruptedException e) {
3025             LOG.warn("Was unable to close region " + regionToSideline
3026               + ".  Just continuing... ", e);
3027           }
3028
3029           try {
3030             LOG.info("Offlining region: " + regionToSideline);
3031             offline(regionToSideline.getRegionName());
3032           } catch (IOException ioe) {
3033             LOG.warn("Unable to offline region from master: " + regionToSideline
3034               + ".  Just continuing... ", ioe);
3035           }
3036
3037           LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
3038           Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
3039           if (sidelineRegionDir != null) {
3040             sidelinedRegions.put(sidelineRegionDir, regionToSideline);
3041             LOG.info("After sidelined big overlapped region: "
3042               + regionToSideline.getRegionNameAsString()
3043               + " to " + sidelineRegionDir.toString());
3044             fixes++;
3045           }
3046         }
3047       }
3048     }
3049
3050     /**
3051      * Check the region chain (from META) of this table.  We are looking for
3052      * holes, overlaps, and cycles.
3053      * @return false if there are errors
3054      * @throws IOException
3055      */
3056     public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
3057       // When table is disabled no need to check for the region chain. Some of the regions
3058       // accidently if deployed, this below code might report some issues like missing start
3059       // or end regions or region hole in chain and may try to fix which is unwanted.
3060       if (isTableDisabled(this.tableName)) {
3061         return true;
3062       }
3063       int originalErrorsCount = errors.getErrorList().size();
3064       Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
3065       SortedSet<byte[]> splits = sc.getSplits();
3066
3067       byte[] prevKey = null;
3068       byte[] problemKey = null;
3069
3070       if (splits.size() == 0) {
3071         // no region for this table
3072         handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
3073       }
3074
3075       for (byte[] key : splits) {
3076         Collection<HbckInfo> ranges = regions.get(key);
3077         if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
3078           for (HbckInfo rng : ranges) {
3079             handler.handleRegionStartKeyNotEmpty(rng);
3080           }
3081         }
3082
3083         // check for degenerate ranges
3084         for (HbckInfo rng : ranges) {
3085           // special endkey case converts '' to null
3086           byte[] endKey = rng.getEndKey();
3087           endKey = (endKey.length == 0) ? null : endKey;
3088           if (Bytes.equals(rng.getStartKey(),endKey)) {
3089             handler.handleDegenerateRegion(rng);
3090           }
3091         }
3092
3093         if (ranges.size() == 1) {
3094           // this split key is ok -- no overlap, not a hole.
3095           if (problemKey != null) {
3096             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3097           }
3098           problemKey = null; // fell through, no more problem.
3099         } else if (ranges.size() > 1) {
3100           // set the new problem key group name, if already have problem key, just
3101           // keep using it.
3102           if (problemKey == null) {
3103             // only for overlap regions.
3104             LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
3105             problemKey = key;
3106           }
3107           overlapGroups.putAll(problemKey, ranges);
3108
3109           // record errors
3110           ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
3111           //  this dumb and n^2 but this shouldn't happen often
3112           for (HbckInfo r1 : ranges) {
3113             if (r1.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
3114             subRange.remove(r1);
3115             for (HbckInfo r2 : subRange) {
3116               if (r2.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
3117               if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
3118                 handler.handleDuplicateStartKeys(r1,r2);
3119               } else {
3120                 // overlap
3121                 handler.handleOverlapInRegionChain(r1, r2);
3122               }
3123             }
3124           }
3125
3126         } else if (ranges.size() == 0) {
3127           if (problemKey != null) {
3128             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3129           }
3130           problemKey = null;
3131
3132           byte[] holeStopKey = sc.getSplits().higher(key);
3133           // if higher key is null we reached the top.
3134           if (holeStopKey != null) {
3135             // hole
3136             handler.handleHoleInRegionChain(key, holeStopKey);
3137           }
3138         }
3139         prevKey = key;
3140       }
3141
3142       // When the last region of a table is proper and having an empty end key, 'prevKey'
3143       // will be null.
3144       if (prevKey != null) {
3145         handler.handleRegionEndKeyNotEmpty(prevKey);
3146       }
3147
3148       // TODO fold this into the TableIntegrityHandler
3149       if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
3150         boolean ok = handleOverlapsParallel(handler, prevKey);
3151         if (!ok) {
3152           return false;
3153         }
3154       } else {
3155         for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3156           handler.handleOverlapGroup(overlap);
3157         }
3158       }
3159
3160       if (details) {
3161         // do full region split map dump
3162         errors.print("---- Table '"  +  this.tableName
3163             + "': region split map");
3164         dump(splits, regions);
3165         errors.print("---- Table '"  +  this.tableName
3166             + "': overlap groups");
3167         dumpOverlapProblems(overlapGroups);
3168         errors.print("There are " + overlapGroups.keySet().size()
3169             + " overlap groups with " + overlapGroups.size()
3170             + " overlapping regions");
3171       }
3172       if (!sidelinedRegions.isEmpty()) {
3173         LOG.warn("Sidelined big overlapped regions, please bulk load them!");
3174         errors.print("---- Table '"  +  this.tableName
3175             + "': sidelined big overlapped regions");
3176         dumpSidelinedRegions(sidelinedRegions);
3177       }
3178       return errors.getErrorList().size() == originalErrorsCount;
3179     }
3180
3181     private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
3182         throws IOException {
3183       // we parallelize overlap handler for the case we have lots of groups to fix.  We can
3184       // safely assume each group is independent.
3185       List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
3186       List<Future<Void>> rets;
3187       for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3188         //
3189         merges.add(new WorkItemOverlapMerge(overlap, handler));
3190       }
3191       try {
3192         rets = executor.invokeAll(merges);
3193       } catch (InterruptedException e) {
3194         LOG.error("Overlap merges were interrupted", e);
3195         return false;
3196       }
3197       for(int i=0; i<merges.size(); i++) {
3198         WorkItemOverlapMerge work = merges.get(i);
3199         Future<Void> f = rets.get(i);
3200         try {
3201           f.get();
3202         } catch(ExecutionException e) {
3203           LOG.warn("Failed to merge overlap group" + work, e.getCause());
3204         } catch (InterruptedException e) {
3205           LOG.error("Waiting for overlap merges was interrupted", e);
3206           return false;
3207         }
3208       }
3209       return true;
3210     }
3211
3212     /**
3213      * This dumps data in a visually reasonable way for visual debugging
3214      *
3215      * @param splits
3216      * @param regions
3217      */
3218     void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
3219       // we display this way because the last end key should be displayed as well.
3220       StringBuilder sb = new StringBuilder();
3221       for (byte[] k : splits) {
3222         sb.setLength(0); // clear out existing buffer, if any.
3223         sb.append(Bytes.toStringBinary(k) + ":\t");
3224         for (HbckInfo r : regions.get(k)) {
3225           sb.append("[ "+ r.toString() + ", "
3226               + Bytes.toStringBinary(r.getEndKey())+ "]\t");
3227         }
3228         errors.print(sb.toString());
3229       }
3230     }
3231   }
3232
3233   public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
3234     // we display this way because the last end key should be displayed as
3235     // well.
3236     for (byte[] k : regions.keySet()) {
3237       errors.print(Bytes.toStringBinary(k) + ":");
3238       for (HbckInfo r : regions.get(k)) {
3239         errors.print("[ " + r.toString() + ", "
3240             + Bytes.toStringBinary(r.getEndKey()) + "]");
3241       }
3242       errors.print("----");
3243     }
3244   }
3245
3246   public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
3247     for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
3248       TableName tableName = entry.getValue().getTableName();
3249       Path path = entry.getKey();
3250       errors.print("This sidelined region dir should be bulk loaded: "
3251         + path.toString());
3252       errors.print("Bulk load command looks like: "
3253         + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
3254         + path.toUri().getPath() + " "+ tableName);
3255     }
3256   }
3257
3258   public Multimap<byte[], HbckInfo> getOverlapGroups(
3259       TableName table) {
3260     TableInfo ti = tablesInfo.get(table);
3261     return ti.overlapGroups;
3262   }
3263
3264   /**
3265    * Return a list of user-space table names whose metadata have not been
3266    * modified in the last few milliseconds specified by timelag
3267    * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
3268    * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
3269    * milliseconds specified by timelag, then the table is a candidate to be returned.
3270    * @return tables that have not been modified recently
3271    * @throws IOException if an error is encountered
3272    */
3273   HTableDescriptor[] getTables(AtomicInteger numSkipped) {
3274     List<TableName> tableNames = new ArrayList<TableName>();
3275     long now = EnvironmentEdgeManager.currentTime();
3276
3277     for (HbckInfo hbi : regionInfoMap.values()) {
3278       MetaEntry info = hbi.metaEntry;
3279
3280       // if the start key is zero, then we have found the first region of a table.
3281       // pick only those tables that were not modified in the last few milliseconds.
3282       if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
3283         if (info.modTime + timelag < now) {
3284           tableNames.add(info.getTable());
3285         } else {
3286           numSkipped.incrementAndGet(); // one more in-flux table
3287         }
3288       }
3289     }
3290     return getHTableDescriptors(tableNames);
3291   }
3292
3293   HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
3294     HTableDescriptor[] htd = new HTableDescriptor[0];
3295       LOG.info("getHTableDescriptors == tableNames => " + tableNames);
3296     try (Connection conn = ConnectionFactory.createConnection(getConf());
3297         Admin admin = conn.getAdmin()) {
3298       htd = admin.getTableDescriptorsByTableName(tableNames);
3299     } catch (IOException e) {
3300       LOG.debug("Exception getting table descriptors", e);
3301     }
3302     return htd;
3303   }
3304
3305   /**
3306    * Gets the entry in regionInfo corresponding to the the given encoded
3307    * region name. If the region has not been seen yet, a new entry is added
3308    * and returned.
3309    */
3310   private synchronized HbckInfo getOrCreateInfo(String name) {
3311     HbckInfo hbi = regionInfoMap.get(name);
3312     if (hbi == null) {
3313       hbi = new HbckInfo(null);
3314       regionInfoMap.put(name, hbi);
3315     }
3316     return hbi;
3317   }
3318
3319   private void checkAndFixTableLocks() throws IOException {
3320     TableLockChecker checker = new TableLockChecker(zkw, errors);
3321     checker.checkTableLocks();
3322
3323     if (this.fixTableLocks) {
3324       checker.fixExpiredTableLocks();
3325     }
3326   }
3327
3328   private void checkAndFixReplication() throws IOException {
3329     ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, connection, errors);
3330     checker.checkUnDeletedQueues();
3331
3332     if (checker.hasUnDeletedQueues() && this.fixReplication) {
3333       checker.fixUnDeletedQueues();
3334       setShouldRerun();
3335     }
3336   }
3337
3338   /**
3339     * Check values in regionInfo for hbase:meta
3340     * Check if zero or more than one regions with hbase:meta are found.
3341     * If there are inconsistencies (i.e. zero or more than one regions
3342     * pretend to be holding the hbase:meta) try to fix that and report an error.
3343     * @throws IOException from HBaseFsckRepair functions
3344     * @throws KeeperException
3345     * @throws InterruptedException
3346     */
3347   boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3348     Map<Integer, HbckInfo> metaRegions = new HashMap<Integer, HbckInfo>();
3349     for (HbckInfo value : regionInfoMap.values()) {
3350       if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3351         metaRegions.put(value.getReplicaId(), value);
3352       }
3353     }
3354     int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME)
3355         .getRegionReplication();
3356     boolean noProblem = true;
3357     // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
3358     // Check the deployed servers. It should be exactly one server for each replica.
3359     for (int i = 0; i < metaReplication; i++) {
3360       HbckInfo metaHbckInfo = metaRegions.remove(i);
3361       List<ServerName> servers = new ArrayList<ServerName>();
3362       if (metaHbckInfo != null) {
3363         servers = metaHbckInfo.deployedOn;
3364       }
3365       if (servers.size() != 1) {
3366         noProblem = false;
3367         if (servers.size() == 0) {
3368           assignMetaReplica(i);
3369         } else if (servers.size() > 1) {
3370           errors
3371           .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
3372                        metaHbckInfo.getReplicaId() + " is found on more than one region.");
3373           if (shouldFixAssignments()) {
3374             errors.print("Trying to fix a problem with hbase:meta, replicaId " +
3375                          metaHbckInfo.getReplicaId() +"..");
3376             setShouldRerun();
3377             // try fix it (treat is a dupe assignment)
3378             HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers);
3379           }
3380         }
3381       }
3382     }
3383     // unassign whatever is remaining in metaRegions. They are excess replicas.
3384     for (Map.Entry<Integer, HbckInfo> entry : metaRegions.entrySet()) {
3385       noProblem = false;
3386       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
3387           "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
3388           ", deployed " + metaRegions.size());
3389       if (shouldFixAssignments()) {
3390         errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
3391             " of hbase:meta..");
3392         setShouldRerun();
3393         unassignMetaReplica(entry.getValue());
3394       }
3395     }
3396     // if noProblem is false, rerun hbck with hopefully fixed META
3397     // if noProblem is true, no errors, so continue normally
3398     return noProblem;
3399   }
3400
3401   private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException,
3402   KeeperException {
3403     undeployRegions(hi);
3404     ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId()));
3405   }
3406
3407   private void assignMetaReplica(int replicaId)
3408       throws IOException, KeeperException, InterruptedException {
3409     errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
3410         replicaId +" is not found on any region.");
3411     if (shouldFixAssignments()) {
3412       errors.print("Trying to fix a problem with hbase:meta..");
3413       setShouldRerun();
3414       // try to fix it (treat it as unassigned region)
3415       HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
3416           HRegionInfo.FIRST_META_REGIONINFO, replicaId);
3417       HBaseFsckRepair.fixUnassigned(admin, h);
3418       HBaseFsckRepair.waitUntilAssigned(admin, h);
3419     }
3420   }
3421
3422   /**
3423    * Scan hbase:meta, adding all regions found to the regionInfo map.
3424    * @throws IOException if an error is encountered
3425    */
3426   boolean loadMetaEntries() throws IOException {
3427     MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() {
3428       int countRecord = 1;
3429
3430       // comparator to sort KeyValues with latest modtime
3431       final Comparator<Cell> comp = new Comparator<Cell>() {
3432         @Override
3433         public int compare(Cell k1, Cell k2) {
3434           return Long.compare(k1.getTimestamp(), k2.getTimestamp());
3435         }
3436       };
3437
3438       @Override
3439       public boolean visit(Result result) throws IOException {
3440         try {
3441
3442           // record the latest modification of this META record
3443           long ts =  Collections.max(result.listCells(), comp).getTimestamp();
3444           RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
3445           if (rl == null) {
3446             emptyRegionInfoQualifiers.add(result);
3447             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3448               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3449             return true;
3450           }
3451           ServerName sn = null;
3452           if (rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID) == null ||
3453               rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) {
3454             emptyRegionInfoQualifiers.add(result);
3455             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3456               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3457             return true;
3458           }
3459           HRegionInfo hri = rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo();
3460           if (!(isTableIncluded(hri.getTable())
3461               || hri.isMetaRegion())) {
3462             return true;
3463           }
3464           PairOfSameType<HRegionInfo> daughters = MetaTableAccessor.getDaughterRegions(result);
3465           for (HRegionLocation h : rl.getRegionLocations()) {
3466             if (h == null || h.getRegionInfo() == null) {
3467               continue;
3468             }
3469             sn = h.getServerName();
3470             hri = h.getRegionInfo();
3471
3472             MetaEntry m = null;
3473             if (hri.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
3474               m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3475             } else {
3476               m = new MetaEntry(hri, sn, ts, null, null);
3477             }
3478             HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3479             if (previous == null) {
3480               regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3481             } else if (previous.metaEntry == null) {
3482               previous.metaEntry = m;
3483             } else {
3484               throw new IOException("Two entries in hbase:meta are same " + previous);
3485             }
3486           }
3487           PairOfSameType<HRegionInfo> mergeRegions = MetaTableAccessor.getMergeRegions(result);
3488           for (HRegionInfo mergeRegion : new HRegionInfo[] {
3489               mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3490             if (mergeRegion != null) {
3491               // This region is already been merged
3492               HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3493               hbInfo.setMerged(true);
3494             }
3495           }
3496
3497           // show proof of progress to the user, once for every 100 records.
3498           if (countRecord % 100 == 0) {
3499             errors.progress();
3500           }
3501           countRecord++;
3502           return true;
3503         } catch (RuntimeException e) {
3504           LOG.error("Result=" + result);
3505           throw e;
3506         }
3507       }
3508     };
3509     if (!checkMetaOnly) {
3510       // Scan hbase:meta to pick up user regions
3511       MetaTableAccessor.fullScanRegions(connection, visitor);
3512     }
3513
3514     errors.print("");
3515     return true;
3516   }
3517
3518   /**
3519    * Stores the regioninfo entries scanned from META
3520    */
3521   static class MetaEntry extends HRegionInfo {
3522     ServerName regionServer;   // server hosting this region
3523     long modTime;          // timestamp of most recent modification metadata
3524     HRegionInfo splitA, splitB; //split daughters
3525
3526     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
3527       this(rinfo, regionServer, modTime, null, null);
3528     }
3529
3530     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
3531         HRegionInfo splitA, HRegionInfo splitB) {
3532       super(rinfo);
3533       this.regionServer = regionServer;
3534       this.modTime = modTime;
3535       this.splitA = splitA;
3536       this.splitB = splitB;
3537     }
3538
3539     @Override
3540     public boolean equals(Object o) {
3541       boolean superEq = super.equals(o);
3542       if (!superEq) {
3543         return superEq;
3544       }
3545
3546       MetaEntry me = (MetaEntry) o;
3547       if (!regionServer.equals(me.regionServer)) {
3548         return false;
3549       }
3550       return (modTime == me.modTime);
3551     }
3552
3553     @Override
3554     public int hashCode() {
3555       int hash = Arrays.hashCode(getRegionName());
3556       hash ^= getRegionId();
3557       hash ^= Arrays.hashCode(getStartKey());
3558       hash ^= Arrays.hashCode(getEndKey());
3559       hash ^= Boolean.valueOf(isOffline()).hashCode();
3560       hash ^= getTable().hashCode();
3561       if (regionServer != null) {
3562         hash ^= regionServer.hashCode();
3563       }
3564       hash ^= modTime;
3565       return hash;
3566     }
3567   }
3568
3569   /**
3570    * Stores the regioninfo entries from HDFS
3571    */
3572   static class HdfsEntry {
3573     HRegionInfo hri;
3574     Path hdfsRegionDir = null;
3575     long hdfsRegionDirModTime  = 0;
3576     boolean hdfsRegioninfoFilePresent = false;
3577     boolean hdfsOnlyEdits = false;
3578   }
3579
3580   /**
3581    * Stores the regioninfo retrieved from Online region servers.
3582    */
3583   static class OnlineEntry {
3584     HRegionInfo hri;
3585     ServerName hsa;
3586
3587     @Override
3588     public String toString() {
3589       return hsa.toString() + ";" + hri.getRegionNameAsString();
3590     }
3591   }
3592
3593   /**
3594    * Maintain information about a particular region.  It gathers information
3595    * from three places -- HDFS, META, and region servers.
3596    */
3597   public static class HbckInfo implements KeyRange {
3598     private MetaEntry metaEntry = null; // info in META
3599     private HdfsEntry hdfsEntry = null; // info in HDFS
3600     private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
3601     private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
3602     private boolean skipChecks = false; // whether to skip further checks to this region info.
3603     private boolean isMerged = false;// whether this region has already been merged into another one
3604     private int deployedReplicaId = HRegionInfo.DEFAULT_REPLICA_ID;
3605     private HRegionInfo primaryHRIForDeployedReplica = null;
3606
3607     HbckInfo(MetaEntry metaEntry) {
3608       this.metaEntry = metaEntry;
3609     }
3610
3611     public synchronized int getReplicaId() {
3612       return metaEntry != null? metaEntry.getReplicaId(): deployedReplicaId;
3613     }
3614
3615     public synchronized void addServer(HRegionInfo hri, ServerName server) {
3616       OnlineEntry rse = new OnlineEntry() ;
3617       rse.hri = hri;
3618       rse.hsa = server;
3619       this.deployedEntries.add(rse);
3620       this.deployedOn.add(server);
3621       // save the replicaId that we see deployed in the cluster
3622       this.deployedReplicaId = hri.getReplicaId();
3623       this.primaryHRIForDeployedReplica =
3624           RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
3625     }
3626
3627     @Override
3628     public synchronized String toString() {
3629       StringBuilder sb = new StringBuilder();
3630       sb.append("{ meta => ");
3631       sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3632       sb.append( ", hdfs => " + getHdfsRegionDir());
3633       sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3634       sb.append( ", replicaId => " + getReplicaId());
3635       sb.append(" }");
3636       return sb.toString();
3637     }
3638
3639     @Override
3640     public byte[] getStartKey() {
3641       if (this.metaEntry != null) {
3642         return this.metaEntry.getStartKey();
3643       } else if (this.hdfsEntry != null) {
3644         return this.hdfsEntry.hri.getStartKey();
3645       } else {
3646         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3647         return null;
3648       }
3649     }
3650
3651     @Override
3652     public byte[] getEndKey() {
3653       if (this.metaEntry != null) {
3654         return this.metaEntry.getEndKey();
3655       } else if (this.hdfsEntry != null) {
3656         return this.hdfsEntry.hri.getEndKey();
3657       } else {
3658         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3659         return null;
3660       }
3661     }
3662
3663     public TableName getTableName() {
3664       if (this.metaEntry != null) {
3665         return this.metaEntry.getTable();
3666       } else if (this.hdfsEntry != null) {
3667         // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3668         // so we get the name from the Path
3669         Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3670         return FSUtils.getTableName(tableDir);
3671       } else {
3672         // return the info from the first online/deployed hri
3673         for (OnlineEntry e : deployedEntries) {
3674           return e.hri.getTable();
3675         }
3676         return null;
3677       }
3678     }
3679
3680     public String getRegionNameAsString() {
3681       if (metaEntry != null) {
3682         return metaEntry.getRegionNameAsString();
3683       } else if (hdfsEntry != null) {
3684         if (hdfsEntry.hri != null) {
3685           return hdfsEntry.hri.getRegionNameAsString();
3686         }
3687       } else {
3688         // return the info from the first online/deployed hri
3689         for (OnlineEntry e : deployedEntries) {
3690           return e.hri.getRegionNameAsString();
3691         }
3692       }
3693       return null;
3694     }
3695
3696     public byte[] getRegionName() {
3697       if (metaEntry != null) {
3698         return metaEntry.getRegionName();
3699       } else if (hdfsEntry != null) {
3700         return hdfsEntry.hri.getRegionName();
3701       } else {
3702         // return the info from the first online/deployed hri
3703         for (OnlineEntry e : deployedEntries) {
3704           return e.hri.getRegionName();
3705         }
3706         return null;
3707       }
3708     }
3709
3710     public HRegionInfo getPrimaryHRIForDeployedReplica() {
3711       return primaryHRIForDeployedReplica;
3712     }
3713
3714     Path getHdfsRegionDir() {
3715       if (hdfsEntry == null) {
3716         return null;
3717       }
3718       return hdfsEntry.hdfsRegionDir;
3719     }
3720
3721     boolean containsOnlyHdfsEdits() {
3722       if (hdfsEntry == null) {
3723         return false;
3724       }
3725       return hdfsEntry.hdfsOnlyEdits;
3726     }
3727
3728     boolean isHdfsRegioninfoPresent() {
3729       if (hdfsEntry == null) {
3730         return false;
3731       }
3732       return hdfsEntry.hdfsRegioninfoFilePresent;
3733     }
3734
3735     long getModTime() {
3736       if (hdfsEntry == null) {
3737         return 0;
3738       }
3739       return hdfsEntry.hdfsRegionDirModTime;
3740     }
3741
3742     HRegionInfo getHdfsHRI() {
3743       if (hdfsEntry == null) {
3744         return null;
3745       }
3746       return hdfsEntry.hri;
3747     }
3748
3749     public void setSkipChecks(boolean skipChecks) {
3750       this.skipChecks = skipChecks;
3751     }
3752
3753     public boolean isSkipChecks() {
3754       return skipChecks;
3755     }
3756
3757     public void setMerged(boolean isMerged) {
3758       this.isMerged = isMerged;
3759     }
3760
3761     public boolean isMerged() {
3762       return this.isMerged;
3763     }
3764   }
3765
3766   final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3767     @Override
3768     public int compare(HbckInfo l, HbckInfo r) {
3769       if (l == r) {
3770         // same instance
3771         return 0;
3772       }
3773
3774       int tableCompare = l.getTableName().compareTo(r.getTableName());
3775       if (tableCompare != 0) {
3776         return tableCompare;
3777       }
3778
3779       int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3780           l.getStartKey(), r.getStartKey());
3781       if (startComparison != 0) {
3782         return startComparison;
3783       }
3784
3785       // Special case for absolute endkey
3786       byte[] endKey = r.getEndKey();
3787       endKey = (endKey.length == 0) ? null : endKey;
3788       byte[] endKey2 = l.getEndKey();
3789       endKey2 = (endKey2.length == 0) ? null : endKey2;
3790       int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3791           endKey2,  endKey);
3792
3793       if (endComparison != 0) {
3794         return endComparison;
3795       }
3796
3797       // use regionId as tiebreaker.
3798       // Null is considered after all possible values so make it bigger.
3799       if (l.hdfsEntry == null && r.hdfsEntry == null) {
3800         return 0;
3801       }
3802       if (l.hdfsEntry == null && r.hdfsEntry != null) {
3803         return 1;
3804       }
3805       // l.hdfsEntry must not be null
3806       if (r.hdfsEntry == null) {
3807         return -1;
3808       }
3809       // both l.hdfsEntry and r.hdfsEntry must not be null.
3810       return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3811     }
3812   };
3813
3814   /**
3815    * Prints summary of all tables found on the system.
3816    */
3817   private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
3818     StringBuilder sb = new StringBuilder();
3819     int numOfSkippedRegions;
3820     errors.print("Summary:");
3821     for (TableInfo tInfo : tablesInfo.values()) {
3822       numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
3823           skippedRegions.get(tInfo.getName()).size() : 0;
3824
3825       if (errors.tableHasErrors(tInfo)) {
3826         errors.print("Table " + tInfo.getName() + " is inconsistent.");
3827       } else if (numOfSkippedRegions > 0){
3828         errors.print("Table " + tInfo.getName() + " is okay (with "
3829           + numOfSkippedRegions + " skipped regions).");
3830       }
3831       else {
3832         errors.print("Table " + tInfo.getName() + " is okay.");
3833       }
3834       errors.print("    Number of regions: " + tInfo.getNumRegions());
3835       if (numOfSkippedRegions > 0) {
3836         Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
3837         System.out.println("    Number of skipped regions: " + numOfSkippedRegions);
3838         System.out.println("      List of skipped regions:");
3839         for(String sr : skippedRegionStrings) {
3840           System.out.println("        " + sr);
3841         }
3842       }
3843       sb.setLength(0); // clear out existing buffer, if any.
3844       sb.append("    Deployed on: ");
3845       for (ServerName server : tInfo.deployedOn) {
3846         sb.append(" " + server.toString());
3847       }
3848       errors.print(sb.toString());
3849     }
3850   }
3851
3852   static ErrorReporter getErrorReporter(
3853       final Configuration conf) throws ClassNotFoundException {
3854     Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3855     return ReflectionUtils.newInstance(reporter, conf);
3856   }
3857
3858   public interface ErrorReporter {
3859     enum ERROR_CODE {
3860       UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3861       NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META,
3862       NOT_DEPLOYED,
3863       MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3864       FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3865       HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3866       ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3867       WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR, ORPHAN_TABLE_STATE,
3868       NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE
3869     }
3870     void clear();
3871     void report(String message);
3872     void reportError(String message);
3873     void reportError(ERROR_CODE errorCode, String message);
3874     void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3875     void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3876     void reportError(
3877       ERROR_CODE errorCode,
3878       String message,
3879       TableInfo table,
3880       HbckInfo info1,
3881       HbckInfo info2
3882     );
3883     int summarize();
3884     void detail(String details);
3885     ArrayList<ERROR_CODE> getErrorList();
3886     void progress();
3887     void print(String message);
3888     void resetErrors();
3889     boolean tableHasErrors(TableInfo table);
3890   }
3891
3892   static class PrintingErrorReporter implements ErrorReporter {
3893     public int errorCount = 0;
3894     private int showProgress;
3895     // How frequently calls to progress() will create output
3896     private static final int progressThreshold = 100;
3897
3898     Set<TableInfo> errorTables = new HashSet<TableInfo>();
3899
3900     // for use by unit tests to verify which errors were discovered
3901     private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3902
3903     @Override
3904     public void clear() {
3905       errorTables.clear();
3906       errorList.clear();
3907       errorCount = 0;
3908     }
3909
3910     @Override
3911     public synchronized void reportError(ERROR_CODE errorCode, String message) {
3912       if (errorCode == ERROR_CODE.WRONG_USAGE) {
3913         System.err.println(message);
3914         return;
3915       }
3916
3917       errorList.add(errorCode);
3918       if (!summary) {
3919         System.out.println("ERROR: " + message);
3920       }
3921       errorCount++;
3922       showProgress = 0;
3923     }
3924
3925     @Override
3926     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3927       errorTables.add(table);
3928       reportError(errorCode, message);
3929     }
3930
3931     @Override
3932     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3933                                          HbckInfo info) {
3934       errorTables.add(table);
3935       String reference = "(region " + info.getRegionNameAsString() + ")";
3936       reportError(errorCode, reference + " " + message);
3937     }
3938
3939     @Override
3940     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3941                                          HbckInfo info1, HbckInfo info2) {
3942       errorTables.add(table);
3943       String reference = "(regions " + info1.getRegionNameAsString()
3944           + " and " + info2.getRegionNameAsString() + ")";
3945       reportError(errorCode, reference + " " + message);
3946     }
3947
3948     @Override
3949     public synchronized void reportError(String message) {
3950       reportError(ERROR_CODE.UNKNOWN, message);
3951     }
3952
3953     /**
3954      * Report error information, but do not increment the error count.  Intended for cases
3955      * where the actual error would have been reported previously.
3956      * @param message
3957      */
3958     @Override
3959     public synchronized void report(String message) {
3960       if (! summary) {
3961         System.out.println("ERROR: " + message);
3962       }
3963       showProgress = 0;
3964     }
3965
3966     @Override
3967     public synchronized int summarize() {
3968       System.out.println(Integer.toString(errorCount) +
3969                          " inconsistencies detected.");
3970       if (errorCount == 0) {
3971         System.out.println("Status: OK");
3972         return 0;
3973       } else {
3974         System.out.println("Status: INCONSISTENT");
3975         return -1;
3976       }
3977     }
3978
3979     @Override
3980     public ArrayList<ERROR_CODE> getErrorList() {
3981       return errorList;
3982     }
3983
3984     @Override
3985     public synchronized void print(String message) {
3986       if (!summary) {
3987         System.out.println(message);
3988       }
3989     }
3990
3991     @Override
3992     public boolean tableHasErrors(TableInfo table) {
3993       return errorTables.contains(table);
3994     }
3995
3996     @Override
3997     public void resetErrors() {
3998       errorCount = 0;
3999     }
4000
4001     @Override
4002     public synchronized void detail(String message) {
4003       if (details) {
4004         System.out.println(message);
4005       }
4006       showProgress = 0;
4007     }
4008
4009     @Override
4010     public synchronized void progress() {
4011       if (showProgress++ == progressThreshold) {
4012         if (!summary) {
4013           System.out.print(".");
4014         }
4015         showProgress = 0;
4016       }
4017     }
4018   }
4019
4020   /**
4021    * Contact a region server and get all information from it
4022    */
4023   static class WorkItemRegion implements Callable<Void> {
4024     private final HBaseFsck hbck;
4025     private final ServerName rsinfo;
4026     private final ErrorReporter errors;
4027     private final ClusterConnection connection;
4028
4029     WorkItemRegion(HBaseFsck hbck, ServerName info,
4030                    ErrorReporter errors, ClusterConnection connection) {
4031       this.hbck = hbck;
4032       this.rsinfo = info;
4033       this.errors = errors;
4034       this.connection = connection;
4035     }
4036
4037     @Override
4038     public synchronized Void call() throws IOException {
4039       errors.progress();
4040       try {
4041         BlockingInterface server = connection.getAdmin(rsinfo);
4042
4043         // list all online regions from this region server
4044         List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
4045         regions = filterRegions(regions);
4046
4047         if (details) {
4048           errors.detail("RegionServer: " + rsinfo.getServerName() +
4049                            " number of regions: " + regions.size());
4050           for (HRegionInfo rinfo: regions) {
4051             errors.detail("  " + rinfo.getRegionNameAsString() +
4052                              " id: " + rinfo.getRegionId() +
4053                              " encoded_name: " + rinfo.getEncodedName() +
4054                              " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
4055                              " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
4056           }
4057         }
4058
4059         // check to see if the existence of this region matches the region in META
4060         for (HRegionInfo r:regions) {
4061           HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
4062           hbi.addServer(r, rsinfo);
4063         }
4064       } catch (IOException e) {          // unable to connect to the region server.
4065         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
4066           " Unable to fetch region information. " + e);
4067         throw e;
4068       }
4069       return null;
4070     }
4071
4072     private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
4073       List<HRegionInfo> ret = Lists.newArrayList();
4074       for (HRegionInfo hri : regions) {
4075         if (hri.isMetaTable() || (!hbck.checkMetaOnly
4076             && hbck.isTableIncluded(hri.getTable()))) {
4077           ret.add(hri);
4078         }
4079       }
4080       return ret;
4081     }
4082   }
4083
4084   /**
4085    * Contact hdfs and get all information about specified table directory into
4086    * regioninfo list.
4087    */
4088   class WorkItemHdfsDir implements Callable<Void> {
4089     private FileStatus tableDir;
4090     private ErrorReporter errors;
4091     private FileSystem fs;
4092
4093     WorkItemHdfsDir(FileSystem fs, ErrorReporter errors,
4094                     FileStatus status) {
4095       this.fs = fs;
4096       this.tableDir = status;
4097       this.errors = errors;
4098     }
4099
4100     @Override
4101     public synchronized Void call() throws InterruptedException, ExecutionException {
4102       final Vector<Exception> exceptions = new Vector<Exception>();
4103
4104       try {
4105         final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
4106         final List<Future<?>> futures = new ArrayList<Future<?>>(regionDirs.length);
4107
4108         for (final FileStatus regionDir : regionDirs) {
4109           errors.progress();
4110           final String encodedName = regionDir.getPath().getName();
4111           // ignore directories that aren't hexadecimal
4112           if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
4113             continue;
4114           }
4115
4116           if (!exceptions.isEmpty()) {
4117             break;
4118           }
4119
4120           futures.add(executor.submit(new Runnable() {
4121             @Override
4122             public void run() {
4123               try {
4124                 LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
4125
4126                 Path regioninfoFile = new Path(regionDir.getPath(), HRegionFileSystem.REGION_INFO_FILE);
4127                 boolean regioninfoFileExists = fs.exists(regioninfoFile);
4128
4129                 if (!regioninfoFileExists) {
4130                   // As tables become larger it is more and more likely that by the time you
4131                   // reach a given region that it will be gone due to region splits/merges.
4132                   if (!fs.exists(regionDir.getPath())) {
4133                     LOG.warn("By the time we tried to process this region dir it was already gone: "
4134                         + regionDir.getPath());
4135                     return;
4136                   }
4137                 }
4138
4139                 HbckInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
4140                 HdfsEntry he = new HdfsEntry();
4141                 synchronized (hbi) {
4142                   if (hbi.getHdfsRegionDir() != null) {
4143                     errors.print("Directory " + encodedName + " duplicate??" +
4144                                  hbi.getHdfsRegionDir());
4145                   }
4146
4147                   he.hdfsRegionDir = regionDir.getPath();
4148                   he.hdfsRegionDirModTime = regionDir.getModificationTime();
4149                   he.hdfsRegioninfoFilePresent = regioninfoFileExists;
4150                   // we add to orphan list when we attempt to read .regioninfo
4151
4152                   // Set a flag if this region contains only edits
4153                   // This is special case if a region is left after split
4154                   he.hdfsOnlyEdits = true;
4155                   FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
4156                   Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath());
4157                   for (FileStatus subDir : subDirs) {
4158                     errors.progress();
4159                     String sdName = subDir.getPath().getName();
4160                     if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
4161                       he.hdfsOnlyEdits = false;
4162                       break;
4163                     }
4164                   }
4165                   hbi.hdfsEntry = he;
4166                 }
4167               } catch (Exception e) {
4168                 LOG.error("Could not load region dir", e);
4169                 exceptions.add(e);
4170               }
4171             }
4172           }));
4173         }
4174
4175         // Ensure all pending tasks are complete (or that we run into an exception)
4176         for (Future<?> f : futures) {
4177           if (!exceptions.isEmpty()) {
4178             break;
4179           }
4180           try {
4181             f.get();
4182           } catch (ExecutionException e) {
4183             LOG.error("Unexpected exec exception!  Should've been caught already.  (Bug?)", e);
4184             // Shouldn't happen, we already logged/caught any exceptions in the Runnable
4185           };
4186         }
4187       } catch (IOException e) {
4188         LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
4189         exceptions.add(e);
4190       } finally {
4191         if (!exceptions.isEmpty()) {
4192           errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
4193               + tableDir.getPath().getName()
4194               + " Unable to fetch all HDFS region information. ");
4195           // Just throw the first exception as an indication something bad happened
4196           // Don't need to propagate all the exceptions, we already logged them all anyway
4197           throw new ExecutionException("First exception in WorkItemHdfsDir", exceptions.firstElement());
4198         }
4199       }
4200       return null;
4201     }
4202   }
4203
4204   /**
4205    * Contact hdfs and get all information about specified table directory into
4206    * regioninfo list.
4207    */
4208   static class WorkItemHdfsRegionInfo implements Callable<Void> {
4209     private HbckInfo hbi;
4210     private HBaseFsck hbck;
4211     private ErrorReporter errors;
4212
4213     WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
4214       this.hbi = hbi;
4215       this.hbck = hbck;
4216       this.errors = errors;
4217     }
4218
4219     @Override
4220     public synchronized Void call() throws IOException {
4221       // only load entries that haven't been loaded yet.
4222       if (hbi.getHdfsHRI() == null) {
4223         try {
4224           errors.progress();
4225           hbck.loadHdfsRegioninfo(hbi);
4226         } catch (IOException ioe) {
4227           String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
4228               + hbi.getTableName() + " in hdfs dir "
4229               + hbi.getHdfsRegionDir()
4230               + "!  It may be an invalid format or version file.  Treating as "
4231               + "an orphaned regiondir.";
4232           errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
4233           try {
4234             hbck.debugLsr(hbi.getHdfsRegionDir());
4235           } catch (IOException ioe2) {
4236             LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
4237             throw ioe2;
4238           }
4239           hbck.orphanHdfsDirs.add(hbi);
4240           throw ioe;
4241         }
4242       }
4243       return null;
4244     }
4245   };
4246
4247   /**
4248    * Display the full report from fsck. This displays all live and dead region
4249    * servers, and all known regions.
4250    */
4251   public static void setDisplayFullReport() {
4252     details = true;
4253   }
4254
4255   /**
4256    * Set exclusive mode.
4257    */
4258   public static void setForceExclusive() {
4259     forceExclusive = true;
4260   }
4261
4262   /**
4263    * Only one instance of hbck can modify HBase at a time.
4264    */
4265   public boolean isExclusive() {
4266     return fixAny || forceExclusive;
4267   }
4268
4269   /**
4270    * Set summary mode.
4271    * Print only summary of the tables and status (OK or INCONSISTENT)
4272    */
4273   static void setSummary() {
4274     summary = true;
4275   }
4276
4277   /**
4278    * Set hbase:meta check mode.
4279    * Print only info about hbase:meta table deployment/state
4280    */
4281   void setCheckMetaOnly() {
4282     checkMetaOnly = true;
4283   }
4284
4285   /**
4286    * Set region boundaries check mode.
4287    */
4288   void setRegionBoundariesCheck() {
4289     checkRegionBoundaries = true;
4290   }
4291
4292   /**
4293    * Set table locks fix mode.
4294    * Delete table locks held for a long time
4295    */
4296   public void setFixTableLocks(boolean shouldFix) {
4297     fixTableLocks = shouldFix;
4298     fixAny |= shouldFix;
4299   }
4300
4301   /**
4302    * Set replication fix mode.
4303    */
4304   public void setFixReplication(boolean shouldFix) {
4305     fixReplication = shouldFix;
4306     fixAny |= shouldFix;
4307   }
4308
4309   /**
4310    * Check if we should rerun fsck again. This checks if we've tried to
4311    * fix something and we should rerun fsck tool again.
4312    * Display the full report from fsck. This displays all live and dead
4313    * region servers, and all known regions.
4314    */
4315   void setShouldRerun() {
4316     rerun = true;
4317   }
4318
4319   boolean shouldRerun() {
4320     return rerun;
4321   }
4322
4323   /**
4324    * Fix inconsistencies found by fsck. This should try to fix errors (if any)
4325    * found by fsck utility.
4326    */
4327   public void setFixAssignments(boolean shouldFix) {
4328     fixAssignments = shouldFix;
4329     fixAny |= shouldFix;
4330   }
4331
4332   boolean shouldFixAssignments() {
4333     return fixAssignments;
4334   }
4335
4336   public void setFixMeta(boolean shouldFix) {
4337     fixMeta = shouldFix;
4338     fixAny |= shouldFix;
4339   }
4340
4341   boolean shouldFixMeta() {
4342     return fixMeta;
4343   }
4344
4345   public void setFixEmptyMetaCells(boolean shouldFix) {
4346     fixEmptyMetaCells = shouldFix;
4347     fixAny |= shouldFix;
4348   }
4349
4350   boolean shouldFixEmptyMetaCells() {
4351     return fixEmptyMetaCells;
4352   }
4353
4354   public void setCheckHdfs(boolean checking) {
4355     checkHdfs = checking;
4356   }
4357
4358   boolean shouldCheckHdfs() {
4359     return checkHdfs;
4360   }
4361
4362   public void setFixHdfsHoles(boolean shouldFix) {
4363     fixHdfsHoles = shouldFix;
4364     fixAny |= shouldFix;
4365   }
4366
4367   boolean shouldFixHdfsHoles() {
4368     return fixHdfsHoles;
4369   }
4370
4371   public void setFixTableOrphans(boolean shouldFix) {
4372     fixTableOrphans = shouldFix;
4373     fixAny |= shouldFix;
4374   }
4375
4376   boolean shouldFixTableOrphans() {
4377     return fixTableOrphans;
4378   }
4379
4380   public void setFixHdfsOverlaps(boolean shouldFix) {
4381     fixHdfsOverlaps = shouldFix;
4382     fixAny |= shouldFix;
4383   }
4384
4385   boolean shouldFixHdfsOverlaps() {
4386     return fixHdfsOverlaps;
4387   }
4388
4389   public void setFixHdfsOrphans(boolean shouldFix) {
4390     fixHdfsOrphans = shouldFix;
4391     fixAny |= shouldFix;
4392   }
4393
4394   boolean shouldFixHdfsOrphans() {
4395     return fixHdfsOrphans;
4396   }
4397
4398   public void setFixVersionFile(boolean shouldFix) {
4399     fixVersionFile = shouldFix;
4400     fixAny |= shouldFix;
4401   }
4402
4403   public boolean shouldFixVersionFile() {
4404     return fixVersionFile;
4405   }
4406
4407   public void setSidelineBigOverlaps(boolean sbo) {
4408     this.sidelineBigOverlaps = sbo;
4409   }
4410
4411   public boolean shouldSidelineBigOverlaps() {
4412     return sidelineBigOverlaps;
4413   }
4414
4415   public void setFixSplitParents(boolean shouldFix) {
4416     fixSplitParents = shouldFix;
4417     fixAny |= shouldFix;
4418   }
4419
4420   boolean shouldFixSplitParents() {
4421     return fixSplitParents;
4422   }
4423
4424   public void setFixReferenceFiles(boolean shouldFix) {
4425     fixReferenceFiles = shouldFix;
4426     fixAny |= shouldFix;
4427   }
4428
4429   boolean shouldFixReferenceFiles() {
4430     return fixReferenceFiles;
4431   }
4432
4433   public boolean shouldIgnorePreCheckPermission() {
4434     return !fixAny || ignorePreCheckPermission;
4435   }
4436
4437   public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
4438     this.ignorePreCheckPermission = ignorePreCheckPermission;
4439   }
4440
4441   /**
4442    * @param mm maximum number of regions to merge into a single region.
4443    */
4444   public void setMaxMerge(int mm) {
4445     this.maxMerge = mm;
4446   }
4447
4448   public int getMaxMerge() {
4449     return maxMerge;
4450   }
4451
4452   public void setMaxOverlapsToSideline(int mo) {
4453     this.maxOverlapsToSideline = mo;
4454   }
4455
4456   public int getMaxOverlapsToSideline() {
4457     return maxOverlapsToSideline;
4458   }
4459
4460   /**
4461    * Only check/fix tables specified by the list,
4462    * Empty list means all tables are included.
4463    */
4464   boolean isTableIncluded(TableName table) {
4465     return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
4466   }
4467
4468   public void includeTable(TableName table) {
4469     tablesIncluded.add(table);
4470   }
4471
4472   Set<TableName> getIncludedTables() {
4473     return new HashSet<TableName>(tablesIncluded);
4474   }
4475
4476   /**
4477    * We are interested in only those tables that have not changed their state in
4478    * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
4479    * @param seconds - the time in seconds
4480    */
4481   public void setTimeLag(long seconds) {
4482     timelag = seconds * 1000; // convert to milliseconds
4483   }
4484
4485   /**
4486    *
4487    * @param sidelineDir - HDFS path to sideline data
4488    */
4489   public void setSidelineDir(String sidelineDir) {
4490     this.sidelineDir = new Path(sidelineDir);
4491   }
4492
4493   protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4494     return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4495   }
4496
4497   public HFileCorruptionChecker getHFilecorruptionChecker() {
4498     return hfcc;
4499   }
4500
4501   public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4502     this.hfcc = hfcc;
4503   }
4504
4505   public void setRetCode(int code) {
4506     this.retcode = code;
4507   }
4508
4509   public int getRetCode() {
4510     return retcode;
4511   }
4512
4513   protected HBaseFsck printUsageAndExit() {
4514     StringWriter sw = new StringWriter(2048);
4515     PrintWriter out = new PrintWriter(sw);
4516     out.println("Usage: fsck [opts] {only tables}");
4517     out.println(" where [opts] are:");
4518     out.println("   -help Display help options (this)");
4519     out.println("   -details Display full report of all regions.");
4520     out.println("   -timelag <timeInSeconds>  Process only regions that " +
4521                        " have not experienced any metadata updates in the last " +
4522                        " <timeInSeconds> seconds.");
4523     out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4524         " before checking if the fix worked if run with -fix");
4525     out.println("   -summary Print only summary of the tables and status.");
4526     out.println("   -metaonly Only check the state of the hbase:meta table.");
4527     out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4528     out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
4529     out.println("   -exclusive Abort if another hbck is exclusive or fixing.");
4530
4531     out.println("");
4532     out.println("  Metadata Repair options: (expert features, use with caution!)");
4533     out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
4534     out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
4535     out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
4536     out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
4537         + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4538     out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
4539     out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
4540     out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4541     out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
4542     out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
4543     out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4544     out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
4545     out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4546     out.println("   -fixSplitParents  Try to force offline split parents to be online.");
4547     out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
4548     out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
4549     out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
4550         + " (empty REGIONINFO_QUALIFIER rows)");
4551
4552     out.println("");
4553     out.println("  Datafile Repair options: (expert features, use with caution!)");
4554     out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
4555     out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
4556
4557     out.println("");
4558     out.println("  Metadata Repair shortcuts");
4559     out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4560         "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles -fixTableLocks");
4561     out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4562
4563     out.println("");
4564     out.println("  Table lock options");
4565     out.println("   -fixTableLocks    Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
4566
4567     out.println("");
4568     out.println(" Replication options");
4569     out.println("   -fixReplication   Deletes replication queues for removed peers");
4570
4571     out.flush();
4572     errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4573
4574     setRetCode(-2);
4575     return this;
4576   }
4577
4578   /**
4579    * Main program
4580    *
4581    * @param args
4582    * @throws Exception
4583    */
4584   public static void main(String[] args) throws Exception {
4585     // create a fsck object
4586     Configuration conf = HBaseConfiguration.create();
4587     Path hbasedir = FSUtils.getRootDir(conf);
4588     URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4589     FSUtils.setFsDefault(conf, new Path(defaultFs));
4590     int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4591     System.exit(ret);
4592   }
4593
4594   /**
4595    * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
4596    */
4597   static class HBaseFsckTool extends Configured implements Tool {
4598     HBaseFsckTool(Configuration conf) { super(conf); }
4599     @Override
4600     public int run(String[] args) throws Exception {
4601       HBaseFsck hbck = new HBaseFsck(getConf());
4602       hbck.exec(hbck.executor, args);
4603       hbck.close();
4604       return hbck.getRetCode();
4605     }
4606   };
4607
4608
4609   public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
4610     ServiceException, InterruptedException {
4611     long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4612
4613     boolean checkCorruptHFiles = false;
4614     boolean sidelineCorruptHFiles = false;
4615
4616     // Process command-line args.
4617     for (int i = 0; i < args.length; i++) {
4618       String cmd = args[i];
4619       if (cmd.equals("-help") || cmd.equals("-h")) {
4620         return printUsageAndExit();
4621       } else if (cmd.equals("-details")) {
4622         setDisplayFullReport();
4623       } else if (cmd.equals("-exclusive")) {
4624         setForceExclusive();
4625       } else if (cmd.equals("-timelag")) {
4626         if (i == args.length - 1) {
4627           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4628           return printUsageAndExit();
4629         }
4630         try {
4631           long timelag = Long.parseLong(args[i+1]);
4632           setTimeLag(timelag);
4633         } catch (NumberFormatException e) {
4634           errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4635           return printUsageAndExit();
4636         }
4637         i++;
4638       } else if (cmd.equals("-sleepBeforeRerun")) {
4639         if (i == args.length - 1) {
4640           errors.reportError(ERROR_CODE.WRONG_USAGE,
4641             "HBaseFsck: -sleepBeforeRerun needs a value.");
4642           return printUsageAndExit();
4643         }
4644         try {
4645           sleepBeforeRerun = Long.parseLong(args[i+1]);
4646         } catch (NumberFormatException e) {
4647           errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4648           return printUsageAndExit();
4649         }
4650         i++;
4651       } else if (cmd.equals("-sidelineDir")) {
4652         if (i == args.length - 1) {
4653           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4654           return printUsageAndExit();
4655         }
4656         i++;
4657         setSidelineDir(args[i]);
4658       } else if (cmd.equals("-fix")) {
4659         errors.reportError(ERROR_CODE.WRONG_USAGE,
4660           "This option is deprecated, please use  -fixAssignments instead.");
4661         setFixAssignments(true);
4662       } else if (cmd.equals("-fixAssignments")) {
4663         setFixAssignments(true);
4664       } else if (cmd.equals("-fixMeta")) {
4665         setFixMeta(true);
4666       } else if (cmd.equals("-noHdfsChecking")) {
4667         setCheckHdfs(false);
4668       } else if (cmd.equals("-fixHdfsHoles")) {
4669         setFixHdfsHoles(true);
4670       } else if (cmd.equals("-fixHdfsOrphans")) {
4671         setFixHdfsOrphans(true);
4672       } else if (cmd.equals("-fixTableOrphans")) {
4673         setFixTableOrphans(true);
4674       } else if (cmd.equals("-fixHdfsOverlaps")) {
4675         setFixHdfsOverlaps(true);
4676       } else if (cmd.equals("-fixVersionFile")) {
4677         setFixVersionFile(true);
4678       } else if (cmd.equals("-sidelineBigOverlaps")) {
4679         setSidelineBigOverlaps(true);
4680       } else if (cmd.equals("-fixSplitParents")) {
4681         setFixSplitParents(true);
4682       } else if (cmd.equals("-ignorePreCheckPermission")) {
4683         setIgnorePreCheckPermission(true);
4684       } else if (cmd.equals("-checkCorruptHFiles")) {
4685         checkCorruptHFiles = true;
4686       } else if (cmd.equals("-sidelineCorruptHFiles")) {
4687         sidelineCorruptHFiles = true;
4688       } else if (cmd.equals("-fixReferenceFiles")) {
4689         setFixReferenceFiles(true);
4690       } else if (cmd.equals("-fixEmptyMetaCells")) {
4691         setFixEmptyMetaCells(true);
4692       } else if (cmd.equals("-repair")) {
4693         // this attempts to merge overlapping hdfs regions, needs testing
4694         // under load
4695         setFixHdfsHoles(true);
4696         setFixHdfsOrphans(true);
4697         setFixMeta(true);
4698         setFixAssignments(true);
4699         setFixHdfsOverlaps(true);
4700         setFixVersionFile(true);
4701         setSidelineBigOverlaps(true);
4702         setFixSplitParents(false);
4703         setCheckHdfs(true);
4704         setFixReferenceFiles(true);
4705         setFixTableLocks(true);
4706       } else if (cmd.equals("-repairHoles")) {
4707         // this will make all missing hdfs regions available but may lose data
4708         setFixHdfsHoles(true);
4709         setFixHdfsOrphans(false);
4710         setFixMeta(true);
4711         setFixAssignments(true);
4712         setFixHdfsOverlaps(false);
4713         setSidelineBigOverlaps(false);
4714         setFixSplitParents(false);
4715         setCheckHdfs(true);
4716       } else if (cmd.equals("-maxOverlapsToSideline")) {
4717         if (i == args.length - 1) {
4718           errors.reportError(ERROR_CODE.WRONG_USAGE,
4719             "-maxOverlapsToSideline needs a numeric value argument.");
4720           return printUsageAndExit();
4721         }
4722         try {
4723           int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
4724           setMaxOverlapsToSideline(maxOverlapsToSideline);
4725         } catch (NumberFormatException e) {
4726           errors.reportError(ERROR_CODE.WRONG_USAGE,
4727             "-maxOverlapsToSideline needs a numeric value argument.");
4728           return printUsageAndExit();
4729         }
4730         i++;
4731       } else if (cmd.equals("-maxMerge")) {
4732         if (i == args.length - 1) {
4733           errors.reportError(ERROR_CODE.WRONG_USAGE,
4734             "-maxMerge needs a numeric value argument.");
4735           return printUsageAndExit();
4736         }
4737         try {
4738           int maxMerge = Integer.parseInt(args[i+1]);
4739           setMaxMerge(maxMerge);
4740         } catch (NumberFormatException e) {
4741           errors.reportError(ERROR_CODE.WRONG_USAGE,
4742             "-maxMerge needs a numeric value argument.");
4743           return printUsageAndExit();
4744         }
4745         i++;
4746       } else if (cmd.equals("-summary")) {
4747         setSummary();
4748       } else if (cmd.equals("-metaonly")) {
4749         setCheckMetaOnly();
4750       } else if (cmd.equals("-boundaries")) {
4751         setRegionBoundariesCheck();
4752       } else if (cmd.equals("-fixTableLocks")) {
4753         setFixTableLocks(true);
4754       } else if (cmd.equals("-fixReplication")) {
4755         setFixReplication(true);
4756       } else if (cmd.startsWith("-")) {
4757         errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
4758         return printUsageAndExit();
4759       } else {
4760         includeTable(TableName.valueOf(cmd));
4761         errors.print("Allow checking/fixes for table: " + cmd);
4762       }
4763     }
4764
4765     errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
4766
4767     // pre-check current user has FS write permission or not
4768     try {
4769       preCheckPermission();
4770     } catch (AccessDeniedException ace) {
4771       Runtime.getRuntime().exit(-1);
4772     } catch (IOException ioe) {
4773       Runtime.getRuntime().exit(-1);
4774     }
4775
4776     // do the real work of hbck
4777     connect();
4778
4779     try {
4780       // if corrupt file mode is on, first fix them since they may be opened later
4781       if (checkCorruptHFiles || sidelineCorruptHFiles) {
4782         LOG.info("Checking all hfiles for corruption");
4783         HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
4784         setHFileCorruptionChecker(hfcc); // so we can get result
4785         Collection<TableName> tables = getIncludedTables();
4786         Collection<Path> tableDirs = new ArrayList<Path>();
4787         Path rootdir = FSUtils.getRootDir(getConf());
4788         if (tables.size() > 0) {
4789           for (TableName t : tables) {
4790             tableDirs.add(FSUtils.getTableDir(rootdir, t));
4791           }
4792         } else {
4793           tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
4794         }
4795         hfcc.checkTables(tableDirs);
4796         hfcc.report(errors);
4797       }
4798
4799       // check and fix table integrity, region consistency.
4800       int code = onlineHbck();
4801       setRetCode(code);
4802       // If we have changed the HBase state it is better to run hbck again
4803       // to see if we haven't broken something else in the process.
4804       // We run it only once more because otherwise we can easily fall into
4805       // an infinite loop.
4806       if (shouldRerun()) {
4807         try {
4808           LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
4809           Thread.sleep(sleepBeforeRerun);
4810         } catch (InterruptedException ie) {
4811           LOG.warn("Interrupted while sleeping");
4812           return this;
4813         }
4814         // Just report
4815         setFixAssignments(false);
4816         setFixMeta(false);
4817         setFixHdfsHoles(false);
4818         setFixHdfsOverlaps(false);
4819         setFixVersionFile(false);
4820         setFixTableOrphans(false);
4821         errors.resetErrors();
4822         code = onlineHbck();
4823         setRetCode(code);
4824       }
4825     } finally {
4826       IOUtils.closeQuietly(this);
4827     }
4828     return this;
4829   }
4830
4831   /**
4832    * ls -r for debugging purposes
4833    */
4834   void debugLsr(Path p) throws IOException {
4835     debugLsr(getConf(), p, errors);
4836   }
4837
4838   /**
4839    * ls -r for debugging purposes
4840    */
4841   public static void debugLsr(Configuration conf,
4842       Path p) throws IOException {
4843     debugLsr(conf, p, new PrintingErrorReporter());
4844   }
4845
4846   /**
4847    * ls -r for debugging purposes
4848    */
4849   public static void debugLsr(Configuration conf,
4850       Path p, ErrorReporter errors) throws IOException {
4851     if (!LOG.isDebugEnabled() || p == null) {
4852       return;
4853     }
4854     FileSystem fs = p.getFileSystem(conf);
4855
4856     if (!fs.exists(p)) {
4857       // nothing
4858       return;
4859     }
4860     errors.print(p.toString());
4861
4862     if (fs.isFile(p)) {
4863       return;
4864     }
4865
4866     if (fs.getFileStatus(p).isDirectory()) {
4867       FileStatus[] fss= fs.listStatus(p);
4868       for (FileStatus status : fss) {
4869         debugLsr(conf, status.getPath(), errors);
4870       }
4871     }
4872   }
4873 }