View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.Closeable;
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.io.InterruptedIOException;
24  import java.io.PrintWriter;
25  import java.io.StringWriter;
26  import java.net.InetAddress;
27  import java.net.URI;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Collection;
31  import java.util.Collections;
32  import java.util.Comparator;
33  import java.util.HashMap;
34  import java.util.HashSet;
35  import java.util.Iterator;
36  import java.util.List;
37  import java.util.Map;
38  import java.util.Map.Entry;
39  import java.util.Set;
40  import java.util.SortedMap;
41  import java.util.SortedSet;
42  import java.util.TreeMap;
43  import java.util.TreeSet;
44  import java.util.concurrent.Callable;
45  import java.util.concurrent.ConcurrentSkipListMap;
46  import java.util.concurrent.ExecutionException;
47  import java.util.concurrent.ExecutorService;
48  import java.util.concurrent.Executors;
49  import java.util.concurrent.Future;
50  import java.util.concurrent.FutureTask;
51  import java.util.concurrent.ScheduledThreadPoolExecutor;
52  import java.util.concurrent.TimeUnit;
53  import java.util.concurrent.TimeoutException;
54  import java.util.concurrent.atomic.AtomicBoolean;
55  import java.util.concurrent.atomic.AtomicInteger;
56  
57  import com.google.common.base.Joiner;
58  import com.google.common.base.Preconditions;
59  import com.google.common.collect.Lists;
60  import com.google.common.collect.Multimap;
61  import com.google.common.collect.TreeMultimap;
62  import com.google.protobuf.ServiceException;
63  
64  import org.apache.commons.lang.RandomStringUtils;
65  import org.apache.commons.lang.StringUtils;
66  import org.apache.commons.logging.Log;
67  import org.apache.commons.logging.LogFactory;
68  import org.apache.hadoop.conf.Configuration;
69  import org.apache.hadoop.conf.Configured;
70  import org.apache.hadoop.fs.FSDataOutputStream;
71  import org.apache.hadoop.fs.FileStatus;
72  import org.apache.hadoop.fs.FileSystem;
73  import org.apache.hadoop.fs.Path;
74  import org.apache.hadoop.fs.permission.FsAction;
75  import org.apache.hadoop.fs.permission.FsPermission;
76  import org.apache.hadoop.hbase.Abortable;
77  import org.apache.hadoop.hbase.Cell;
78  import org.apache.hadoop.hbase.ClusterStatus;
79  import org.apache.hadoop.hbase.HBaseConfiguration;
80  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
81  import org.apache.hadoop.hbase.HColumnDescriptor;
82  import org.apache.hadoop.hbase.HConstants;
83  import org.apache.hadoop.hbase.HRegionInfo;
84  import org.apache.hadoop.hbase.HRegionLocation;
85  import org.apache.hadoop.hbase.HTableDescriptor;
86  import org.apache.hadoop.hbase.KeyValue;
87  import org.apache.hadoop.hbase.MasterNotRunningException;
88  import org.apache.hadoop.hbase.MetaTableAccessor;
89  import org.apache.hadoop.hbase.RegionLocations;
90  import org.apache.hadoop.hbase.ServerName;
91  import org.apache.hadoop.hbase.TableDescriptor;
92  import org.apache.hadoop.hbase.TableName;
93  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
94  import org.apache.hadoop.hbase.classification.InterfaceAudience;
95  import org.apache.hadoop.hbase.classification.InterfaceStability;
96  import org.apache.hadoop.hbase.client.Admin;
97  import org.apache.hadoop.hbase.client.ClusterConnection;
98  import org.apache.hadoop.hbase.client.ConnectionFactory;
99  import org.apache.hadoop.hbase.client.Delete;
100 import org.apache.hadoop.hbase.client.Get;
101 import org.apache.hadoop.hbase.client.HBaseAdmin;
102 import org.apache.hadoop.hbase.client.HConnection;
103 import org.apache.hadoop.hbase.client.Put;
104 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
105 import org.apache.hadoop.hbase.client.Result;
106 import org.apache.hadoop.hbase.client.RowMutations;
107 import org.apache.hadoop.hbase.client.Table;
108 import org.apache.hadoop.hbase.client.TableState;
109 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
110 import org.apache.hadoop.hbase.io.hfile.HFile;
111 import org.apache.hadoop.hbase.master.MasterFileSystem;
112 import org.apache.hadoop.hbase.master.RegionState;
113 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
114 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
115 import org.apache.hadoop.hbase.regionserver.HRegion;
116 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
117 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
118 import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL;
119 import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
120 import org.apache.hadoop.hbase.security.AccessDeniedException;
121 import org.apache.hadoop.hbase.security.UserProvider;
122 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
123 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
124 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
125 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
126 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
127 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
128 import org.apache.hadoop.hbase.wal.WAL;
129 import org.apache.hadoop.hbase.wal.WALFactory;
130 import org.apache.hadoop.hbase.wal.WALSplitter;
131 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
132 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
133 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
134 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
135 import org.apache.hadoop.io.IOUtils;
136 import org.apache.hadoop.ipc.RemoteException;
137 import org.apache.hadoop.security.UserGroupInformation;
138 import org.apache.hadoop.util.ReflectionUtils;
139 import org.apache.hadoop.util.Tool;
140 import org.apache.hadoop.util.ToolRunner;
141 import org.apache.zookeeper.KeeperException;
142 
143 /**
144  * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
145  * table integrity problems in a corrupted HBase.
146  * <p>
147  * Region consistency checks verify that hbase:meta, region deployment on region
148  * servers and the state of data in HDFS (.regioninfo files) all are in
149  * accordance.
150  * <p>
151  * Table integrity checks verify that all possible row keys resolve to exactly
152  * one region of a table.  This means there are no individual degenerate
153  * or backwards regions; no holes between regions; and that there are no
154  * overlapping regions.
155  * <p>
156  * The general repair strategy works in two phases:
157  * <ol>
158  * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
159  * <li> Repair Region Consistency with hbase:meta and assignments
160  * </ol>
161  * <p>
162  * For table integrity repairs, the tables' region directories are scanned
163  * for .regioninfo files.  Each table's integrity is then verified.  If there
164  * are any orphan regions (regions with no .regioninfo files) or holes, new
165  * regions are fabricated.  Backwards regions are sidelined as well as empty
166  * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
167  * a new region is created and all data is merged into the new region.
168  * <p>
169  * Table integrity repairs deal solely with HDFS and could potentially be done
170  * offline -- the hbase region servers or master do not need to be running.
171  * This phase can eventually be used to completely reconstruct the hbase:meta table in
172  * an offline fashion.
173  * <p>
174  * Region consistency requires three conditions -- 1) valid .regioninfo file
175  * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
176  * and 3) a region is deployed only at the regionserver that was assigned to
177  * with proper state in the master.
178  * <p>
179  * Region consistency repairs require hbase to be online so that hbck can
180  * contact the HBase master and region servers.  The hbck#connect() method must
181  * first be called successfully.  Much of the region consistency information
182  * is transient and less risky to repair.
183  * <p>
184  * If hbck is run from the command line, there are a handful of arguments that
185  * can be used to limit the kinds of repairs hbck will do.  See the code in
186  * {@link #printUsageAndExit()} for more details.
187  */
188 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
189 @InterfaceStability.Evolving
190 public class HBaseFsck extends Configured implements Closeable {
191   public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
192   public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
193   private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
194   private static boolean rsSupportsOffline = true;
195   private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
196   private static final int DEFAULT_MAX_MERGE = 5;
197   private static final String TO_BE_LOADED = "to_be_loaded";
198   private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
199   private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
200   private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200;
201 
202   /**********************
203    * Internal resources
204    **********************/
205   private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
206   private ClusterStatus status;
207   private ClusterConnection connection;
208   private Admin admin;
209   private Table meta;
210   // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
211   protected ExecutorService executor;
212   private long startMillis = System.currentTimeMillis();
213   private HFileCorruptionChecker hfcc;
214   private int retcode = 0;
215   private Path HBCK_LOCK_PATH;
216   private FSDataOutputStream hbckOutFd;
217   // This lock is to prevent cleanup of balancer resources twice between
218   // ShutdownHook and the main code. We cleanup only if the connect() is
219   // successful
220   private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
221 
222   /***********
223    * Options
224    ***********/
225   private static boolean details = false; // do we display the full report
226   private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
227   private boolean fixAssignments = false; // fix assignment errors?
228   private boolean fixMeta = false; // fix meta errors?
229   private boolean checkHdfs = true; // load and check fs consistency?
230   private boolean fixHdfsHoles = false; // fix fs holes?
231   private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
232   private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
233   private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
234   private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
235   private boolean fixSplitParents = false; // fix lingering split parents
236   private boolean fixReferenceFiles = false; // fix lingering reference store file
237   private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
238   private boolean fixTableLocks = false; // fix table locks which are expired
239   private boolean fixAny = false; // Set to true if any of the fix is required.
240 
241   // limit checking/fixes to listed tables, if empty attempt to check/fix all
242   // hbase:meta are always checked
243   private Set<TableName> tablesIncluded = new HashSet<TableName>();
244   private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
245   // maximum number of overlapping regions to sideline
246   private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
247   private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
248   private Path sidelineDir = null;
249 
250   private boolean rerun = false; // if we tried to fix something, rerun hbck
251   private static boolean summary = false; // if we want to print less output
252   private boolean checkMetaOnly = false;
253   private boolean checkRegionBoundaries = false;
254   private boolean ignorePreCheckPermission = false; // if pre-check permission
255 
256   /*********
257    * State
258    *********/
259   final private ErrorReporter errors;
260   int fixes = 0;
261 
262   /**
263    * This map contains the state of all hbck items.  It maps from encoded region
264    * name to HbckInfo structure.  The information contained in HbckInfo is used
265    * to detect and correct consistency (hdfs/meta/deployment) problems.
266    */
267   private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
268   // Empty regioninfo qualifiers in hbase:meta
269   private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
270 
271   /**
272    * This map from Tablename -> TableInfo contains the structures necessary to
273    * detect table consistency problems (holes, dupes, overlaps).  It is sorted
274    * to prevent dupes.
275    *
276    * If tablesIncluded is empty, this map contains all tables.
277    * Otherwise, it contains only meta tables and tables in tablesIncluded,
278    * unless checkMetaOnly is specified, in which case, it contains only
279    * the meta table
280    */
281   private SortedMap<TableName, TableInfo> tablesInfo =
282       new ConcurrentSkipListMap<TableName, TableInfo>();
283 
284   /**
285    * When initially looking at HDFS, we attempt to find any orphaned data.
286    */
287   private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
288 
289   private Map<TableName, Set<String>> orphanTableDirs =
290       new HashMap<TableName, Set<String>>();
291   private Map<TableName, TableState> tableStates =
292       new HashMap<TableName, TableState>();
293   private final RetryCounterFactory lockFileRetryCounterFactory;
294 
295 
296   /**
297    * Constructor
298    *
299    * @param conf Configuration object
300    * @throws MasterNotRunningException if the master is not running
301    * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
302    */
303   public HBaseFsck(Configuration conf) throws MasterNotRunningException,
304       ZooKeeperConnectionException, IOException, ClassNotFoundException {
305     super(conf);
306     // make a copy, just to be sure we're not overriding someone else's config
307     setConf(HBaseConfiguration.create(getConf()));
308     // disable blockcache for tool invocation, see HBASE-10500
309     getConf().setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
310     // Disable usage of meta replicas in hbck
311     getConf().setBoolean(HConstants.USE_META_REPLICAS, false);
312     errors = getErrorReporter(conf);
313 
314     int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
315     executor = new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
316     lockFileRetryCounterFactory = new RetryCounterFactory(
317         getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS), 
318         getConf().getInt("hbase.hbck.lockfile.attempt.sleep.interval",
319             DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL));
320   }
321 
322   /**
323    * Constructor
324    *
325    * @param conf
326    *          Configuration object
327    * @throws MasterNotRunningException
328    *           if the master is not running
329    * @throws ZooKeeperConnectionException
330    *           if unable to connect to ZooKeeper
331    */
332   public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
333       ZooKeeperConnectionException, IOException, ClassNotFoundException {
334     super(conf);
335     errors = getErrorReporter(getConf());
336     this.executor = exec;
337     lockFileRetryCounterFactory = new RetryCounterFactory(
338         getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
339         getConf().getInt("hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL));
340   }
341   
342   private class FileLockCallable implements Callable<FSDataOutputStream> {
343     RetryCounter retryCounter;
344 
345     public FileLockCallable(RetryCounter retryCounter) {
346       this.retryCounter = retryCounter;
347     }
348     @Override
349     public FSDataOutputStream call() throws IOException {
350       try {
351         FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
352         FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
353             HConstants.DATA_FILE_UMASK_KEY);
354         Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
355         fs.mkdirs(tmpDir);
356         HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
357         final FSDataOutputStream out = createFileWithRetries(fs, HBCK_LOCK_PATH, defaultPerms);
358         out.writeBytes(InetAddress.getLocalHost().toString());
359         out.flush();
360         return out;
361       } catch(RemoteException e) {
362         if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
363           return null;
364         } else {
365           throw e;
366         }
367       }
368     }
369 
370     private FSDataOutputStream createFileWithRetries(final FileSystem fs,
371         final Path hbckLockFilePath, final FsPermission defaultPerms)
372         throws IOException {
373 
374       IOException exception = null;
375       do {
376         try {
377           return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
378         } catch (IOException ioe) {
379           LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
380               + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
381               + retryCounter.getMaxAttempts());
382           LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(), 
383               ioe);
384           try {
385             exception = ioe;
386             retryCounter.sleepUntilNextRetry();
387           } catch (InterruptedException ie) {
388             throw (InterruptedIOException) new InterruptedIOException(
389                 "Can't create lock file " + hbckLockFilePath.getName())
390             .initCause(ie);
391           }
392         }
393       } while (retryCounter.shouldRetry());
394 
395       throw exception;
396     }
397   }
398 
399   /**
400    * This method maintains a lock using a file. If the creation fails we return null
401    *
402    * @return FSDataOutputStream object corresponding to the newly opened lock file
403    * @throws IOException
404    */
405   private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
406     RetryCounter retryCounter = lockFileRetryCounterFactory.create();
407     FileLockCallable callable = new FileLockCallable(retryCounter);
408     ExecutorService executor = Executors.newFixedThreadPool(1);
409     FutureTask<FSDataOutputStream> futureTask = new FutureTask<FSDataOutputStream>(callable);
410     executor.execute(futureTask);
411     final int timeoutInSeconds = 30;
412     FSDataOutputStream stream = null;
413     try {
414       stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
415     } catch (ExecutionException ee) {
416       LOG.warn("Encountered exception when opening lock file", ee);
417     } catch (InterruptedException ie) {
418       LOG.warn("Interrupted when opening lock file", ie);
419       Thread.currentThread().interrupt();
420     } catch (TimeoutException exception) {
421       // took too long to obtain lock
422       LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
423       futureTask.cancel(true);
424     } finally {
425       executor.shutdownNow();
426     }
427     return stream;
428   }
429 
430   private void unlockHbck() {
431     if (hbckLockCleanup.compareAndSet(true, false)) {
432       RetryCounter retryCounter = lockFileRetryCounterFactory.create();
433       do {
434         try {
435           IOUtils.closeStream(hbckOutFd);
436           FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()),
437               HBCK_LOCK_PATH, true);
438           return;
439         } catch (IOException ioe) {
440           LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
441               + (retryCounter.getAttemptTimes() + 1) + " of "
442               + retryCounter.getMaxAttempts());
443           LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
444           try {
445             retryCounter.sleepUntilNextRetry();
446           } catch (InterruptedException ie) {
447             Thread.currentThread().interrupt();
448             LOG.warn("Interrupted while deleting lock file" +
449                 HBCK_LOCK_PATH);
450             return;
451           }
452         }
453       } while (retryCounter.shouldRetry());
454 
455     }
456   }
457 
458   /**
459    * To repair region consistency, one must call connect() in order to repair
460    * online state.
461    */
462   public void connect() throws IOException {
463 
464     // Check if another instance of balancer is running
465     hbckOutFd = checkAndMarkRunningHbck();
466     if (hbckOutFd == null) {
467       setRetCode(-1);
468       LOG.error("Another instance of hbck is running, exiting this instance.[If you are sure" +
469           " no other instance is running, delete the lock file " +
470           HBCK_LOCK_PATH + " and rerun the tool]");
471       throw new IOException("Duplicate hbck - Abort");
472     }
473 
474     // Make sure to cleanup the lock
475     hbckLockCleanup.set(true);
476 
477     // Add a shutdown hook to this thread, incase user tries to
478     // kill the hbck with a ctrl-c, we want to cleanup the lock so that
479     // it is available for further calls
480     Runtime.getRuntime().addShutdownHook(new Thread() {
481       @Override
482       public void run() {
483         IOUtils.closeStream(HBaseFsck.this);
484         unlockHbck();
485       }
486     });
487     LOG.debug("Launching hbck");
488 
489     connection = (ClusterConnection)ConnectionFactory.createConnection(getConf());
490     admin = connection.getAdmin();
491     meta = connection.getTable(TableName.META_TABLE_NAME);
492     status = admin.getClusterStatus();
493   }
494 
495   /**
496    * Get deployed regions according to the region servers.
497    */
498   private void loadDeployedRegions() throws IOException, InterruptedException {
499     // From the master, get a list of all known live region servers
500     Collection<ServerName> regionServers = status.getServers();
501     errors.print("Number of live region servers: " + regionServers.size());
502     if (details) {
503       for (ServerName rsinfo: regionServers) {
504         errors.print("  " + rsinfo.getServerName());
505       }
506     }
507 
508     // From the master, get a list of all dead region servers
509     Collection<ServerName> deadRegionServers = status.getDeadServerNames();
510     errors.print("Number of dead region servers: " + deadRegionServers.size());
511     if (details) {
512       for (ServerName name: deadRegionServers) {
513         errors.print("  " + name);
514       }
515     }
516 
517     // Print the current master name and state
518     errors.print("Master: " + status.getMaster());
519 
520     // Print the list of all backup masters
521     Collection<ServerName> backupMasters = status.getBackupMasters();
522     errors.print("Number of backup masters: " + backupMasters.size());
523     if (details) {
524       for (ServerName name: backupMasters) {
525         errors.print("  " + name);
526       }
527     }
528 
529     errors.print("Average load: " + status.getAverageLoad());
530     errors.print("Number of requests: " + status.getRequestsCount());
531     errors.print("Number of regions: " + status.getRegionsCount());
532 
533     Map<String, RegionState> rits = status.getRegionsInTransition();
534     errors.print("Number of regions in transition: " + rits.size());
535     if (details) {
536       for (RegionState state: rits.values()) {
537         errors.print("  " + state.toDescriptiveString());
538       }
539     }
540 
541     // Determine what's deployed
542     processRegionServers(regionServers);
543   }
544 
545   /**
546    * Clear the current state of hbck.
547    */
548   private void clearState() {
549     // Make sure regionInfo is empty before starting
550     fixes = 0;
551     regionInfoMap.clear();
552     emptyRegionInfoQualifiers.clear();
553     tableStates.clear();
554     errors.clear();
555     tablesInfo.clear();
556     orphanHdfsDirs.clear();
557   }
558 
559   /**
560    * This repair method analyzes hbase data in hdfs and repairs it to satisfy
561    * the table integrity rules.  HBase doesn't need to be online for this
562    * operation to work.
563    */
564   public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
565     // Initial pass to fix orphans.
566     if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
567         || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
568       LOG.info("Loading regioninfos HDFS");
569       // if nothing is happening this should always complete in two iterations.
570       int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
571       int curIter = 0;
572       do {
573         clearState(); // clears hbck state and reset fixes to 0 and.
574         // repair what's on HDFS
575         restoreHdfsIntegrity();
576         curIter++;// limit the number of iterations.
577       } while (fixes > 0 && curIter <= maxIterations);
578 
579       // Repairs should be done in the first iteration and verification in the second.
580       // If there are more than 2 passes, something funny has happened.
581       if (curIter > 2) {
582         if (curIter == maxIterations) {
583           LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
584               + "Tables integrity may not be fully repaired!");
585         } else {
586           LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
587         }
588       }
589     }
590   }
591 
592   /**
593    * This repair method requires the cluster to be online since it contacts
594    * region servers and the masters.  It makes each region's state in HDFS, in
595    * hbase:meta, and deployments consistent.
596    *
597    * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable
598    * error.  If 0, we have a clean hbase.
599    */
600   public int onlineConsistencyRepair() throws IOException, KeeperException,
601     InterruptedException {
602     clearState();
603 
604     // get regions according to what is online on each RegionServer
605     loadDeployedRegions();
606     // check whether hbase:meta is deployed and online
607     recordMetaRegion();
608     // Check if hbase:meta is found only once and in the right place
609     if (!checkMetaRegion()) {
610       String errorMsg = "hbase:meta table is not consistent. ";
611       if (shouldFixAssignments()) {
612         errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
613       } else {
614         errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
615       }
616       errors.reportError(errorMsg + " Exiting...");
617       return -2;
618     }
619     // Not going with further consistency check for tables when hbase:meta itself is not consistent.
620     LOG.info("Loading regionsinfo from the hbase:meta table");
621     boolean success = loadMetaEntries();
622     if (!success) return -1;
623 
624     // Empty cells in hbase:meta?
625     reportEmptyMetaCells();
626 
627     // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
628     if (shouldFixEmptyMetaCells()) {
629       fixEmptyMetaCells();
630     }
631 
632     // get a list of all tables that have not changed recently.
633     if (!checkMetaOnly) {
634       reportTablesInFlux();
635     }
636 
637     // Get disabled tables states
638     loadTableStates();
639 
640     // load regiondirs and regioninfos from HDFS
641     if (shouldCheckHdfs()) {
642       loadHdfsRegionDirs();
643       loadHdfsRegionInfos();
644     }
645 
646     // fix the orphan tables
647     fixOrphanTables();
648 
649     // Check and fix consistency
650     checkAndFixConsistency();
651 
652     // Check integrity (does not fix)
653     checkIntegrity();
654     return errors.getErrorList().size();
655   }
656 
657   /**
658    * Contacts the master and prints out cluster-wide information
659    * @return 0 on success, non-zero on failure
660    */
661   public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
662     // print hbase server version
663     errors.print("Version: " + status.getHBaseVersion());
664     offlineHdfsIntegrityRepair();
665 
666     // turn the balancer off
667     boolean oldBalancer = admin.setBalancerRunning(false, true);
668     try {
669       onlineConsistencyRepair();
670     }
671     finally {
672       admin.setBalancerRunning(oldBalancer, false);
673     }
674 
675     if (checkRegionBoundaries) {
676       checkRegionBoundaries();
677     }
678 
679     offlineReferenceFileRepair();
680 
681     checkAndFixTableLocks();
682 
683     // Remove the hbck lock
684     unlockHbck();
685 
686     // Print table summary
687     printTableSummary(tablesInfo);
688     return errors.summarize();
689   }
690 
691   public static byte[] keyOnly (byte[] b) {
692     if (b == null)
693       return b;
694     int rowlength = Bytes.toShort(b, 0);
695     byte[] result = new byte[rowlength];
696     System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
697     return result;
698   }
699 
700   @Override
701   public void close() throws IOException {
702     try {
703       unlockHbck();
704     } catch (Exception io) {
705       LOG.warn(io);
706     }
707     IOUtils.cleanup(null, admin, meta, connection);
708   }
709 
710   private static class RegionBoundariesInformation {
711     public byte [] regionName;
712     public byte [] metaFirstKey;
713     public byte [] metaLastKey;
714     public byte [] storesFirstKey;
715     public byte [] storesLastKey;
716     @Override
717     public String toString () {
718       return "regionName=" + Bytes.toStringBinary(regionName) +
719              "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
720              "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
721              "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
722              "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
723     }
724   }
725 
726   public void checkRegionBoundaries() {
727     try {
728       ByteArrayComparator comparator = new ByteArrayComparator();
729       List<HRegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
730       final RegionBoundariesInformation currentRegionBoundariesInformation =
731           new RegionBoundariesInformation();
732       Path hbaseRoot = FSUtils.getRootDir(getConf());
733       for (HRegionInfo regionInfo : regions) {
734         Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
735         currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
736         // For each region, get the start and stop key from the META and compare them to the
737         // same information from the Stores.
738         Path path = new Path(tableDir, regionInfo.getEncodedName());
739         FileSystem fs = path.getFileSystem(getConf());
740         FileStatus[] files = fs.listStatus(path);
741         // For all the column families in this region...
742         byte[] storeFirstKey = null;
743         byte[] storeLastKey = null;
744         for (FileStatus file : files) {
745           String fileName = file.getPath().toString();
746           fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
747           if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
748             FileStatus[] storeFiles = fs.listStatus(file.getPath());
749             // For all the stores in this column family.
750             for (FileStatus storeFile : storeFiles) {
751               HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
752                   getConf()), getConf());
753               if ((reader.getFirstKey() != null)
754                   && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
755                       reader.getFirstKey()) > 0))) {
756                 storeFirstKey = reader.getFirstKey();
757               }
758               if ((reader.getLastKey() != null)
759                   && ((storeLastKey == null) || (comparator.compare(storeLastKey,
760                       reader.getLastKey())) < 0)) {
761                 storeLastKey = reader.getLastKey();
762               }
763               reader.close();
764             }
765           }
766         }
767         currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
768         currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
769         currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
770         currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
771         if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
772           currentRegionBoundariesInformation.metaFirstKey = null;
773         if (currentRegionBoundariesInformation.metaLastKey.length == 0)
774           currentRegionBoundariesInformation.metaLastKey = null;
775 
776         // For a region to be correct, we need the META start key to be smaller or equal to the
777         // smallest start key from all the stores, and the start key from the next META entry to
778         // be bigger than the last key from all the current stores. First region start key is null;
779         // Last region end key is null; some regions can be empty and not have any store.
780 
781         boolean valid = true;
782         // Checking start key.
783         if ((currentRegionBoundariesInformation.storesFirstKey != null)
784             && (currentRegionBoundariesInformation.metaFirstKey != null)) {
785           valid = valid
786               && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
787                 currentRegionBoundariesInformation.metaFirstKey) >= 0;
788         }
789         // Checking stop key.
790         if ((currentRegionBoundariesInformation.storesLastKey != null)
791             && (currentRegionBoundariesInformation.metaLastKey != null)) {
792           valid = valid
793               && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
794                 currentRegionBoundariesInformation.metaLastKey) < 0;
795         }
796         if (!valid) {
797           errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
798             tablesInfo.get(regionInfo.getTable()));
799           LOG.warn("Region's boundaries not alligned between stores and META for:");
800           LOG.warn(currentRegionBoundariesInformation);
801         }
802       }
803     } catch (IOException e) {
804       LOG.error(e);
805     }
806   }
807 
808   /**
809    * Iterates through the list of all orphan/invalid regiondirs.
810    */
811   private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
812     for (HbckInfo hi : orphanHdfsDirs) {
813       LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
814       adoptHdfsOrphan(hi);
815     }
816   }
817 
818   /**
819    * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
820    * these orphans by creating a new region, and moving the column families,
821    * recovered edits, WALs, into the new region dir.  We determine the region
822    * startkey and endkeys by looking at all of the hfiles inside the column
823    * families to identify the min and max keys. The resulting region will
824    * likely violate table integrity but will be dealt with by merging
825    * overlapping regions.
826    */
827   @SuppressWarnings("deprecation")
828   private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
829     Path p = hi.getHdfsRegionDir();
830     FileSystem fs = p.getFileSystem(getConf());
831     FileStatus[] dirs = fs.listStatus(p);
832     if (dirs == null) {
833       LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
834           p + ". This dir could probably be deleted.");
835       return ;
836     }
837 
838     TableName tableName = hi.getTableName();
839     TableInfo tableInfo = tablesInfo.get(tableName);
840     Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
841     HTableDescriptor template = tableInfo.getHTD();
842 
843     // find min and max key values
844     Pair<byte[],byte[]> orphanRegionRange = null;
845     for (FileStatus cf : dirs) {
846       String cfName= cf.getPath().getName();
847       // TODO Figure out what the special dirs are
848       if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
849 
850       FileStatus[] hfiles = fs.listStatus(cf.getPath());
851       for (FileStatus hfile : hfiles) {
852         byte[] start, end;
853         HFile.Reader hf = null;
854         try {
855           CacheConfig cacheConf = new CacheConfig(getConf());
856           hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
857           hf.loadFileInfo();
858           KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
859           start = startKv.getRow();
860           KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
861           end = endKv.getRow();
862         } catch (IOException ioe) {
863           LOG.warn("Problem reading orphan file " + hfile + ", skipping");
864           continue;
865         } catch (NullPointerException ioe) {
866           LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
867           continue;
868         } finally {
869           if (hf != null) {
870             hf.close();
871           }
872         }
873 
874         // expand the range to include the range of all hfiles
875         if (orphanRegionRange == null) {
876           // first range
877           orphanRegionRange = new Pair<byte[], byte[]>(start, end);
878         } else {
879           // TODO add test
880 
881           // expand range only if the hfile is wider.
882           if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
883             orphanRegionRange.setFirst(start);
884           }
885           if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
886             orphanRegionRange.setSecond(end);
887           }
888         }
889       }
890     }
891     if (orphanRegionRange == null) {
892       LOG.warn("No data in dir " + p + ", sidelining data");
893       fixes++;
894       sidelineRegionDir(fs, hi);
895       return;
896     }
897     LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
898         Bytes.toString(orphanRegionRange.getSecond()) + ")");
899 
900     // create new region on hdfs.  move data into place.
901     HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
902     LOG.info("Creating new region : " + hri);
903     HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
904     Path target = region.getRegionFileSystem().getRegionDir();
905 
906     // rename all the data to new region
907     mergeRegionDirs(target, hi);
908     fixes++;
909   }
910 
911   /**
912    * This method determines if there are table integrity errors in HDFS.  If
913    * there are errors and the appropriate "fix" options are enabled, the method
914    * will first correct orphan regions making them into legit regiondirs, and
915    * then reload to merge potentially overlapping regions.
916    *
917    * @return number of table integrity errors found
918    */
919   private int restoreHdfsIntegrity() throws IOException, InterruptedException {
920     // Determine what's on HDFS
921     LOG.info("Loading HBase regioninfo from HDFS...");
922     loadHdfsRegionDirs(); // populating regioninfo table.
923 
924     int errs = errors.getErrorList().size();
925     // First time just get suggestions.
926     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
927     checkHdfsIntegrity(false, false);
928 
929     if (errors.getErrorList().size() == errs) {
930       LOG.info("No integrity errors.  We are done with this phase. Glorious.");
931       return 0;
932     }
933 
934     if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
935       adoptHdfsOrphans(orphanHdfsDirs);
936       // TODO optimize by incrementally adding instead of reloading.
937     }
938 
939     // Make sure there are no holes now.
940     if (shouldFixHdfsHoles()) {
941       clearState(); // this also resets # fixes.
942       loadHdfsRegionDirs();
943       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
944       tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
945     }
946 
947     // Now we fix overlaps
948     if (shouldFixHdfsOverlaps()) {
949       // second pass we fix overlaps.
950       clearState(); // this also resets # fixes.
951       loadHdfsRegionDirs();
952       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
953       tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
954     }
955 
956     return errors.getErrorList().size();
957   }
958 
959   /**
960    * Scan all the store file names to find any lingering reference files,
961    * which refer to some none-exiting files. If "fix" option is enabled,
962    * any lingering reference file will be sidelined if found.
963    * <p>
964    * Lingering reference file prevents a region from opening. It has to
965    * be fixed before a cluster can start properly.
966    */
967   private void offlineReferenceFileRepair() throws IOException {
968     Configuration conf = getConf();
969     Path hbaseRoot = FSUtils.getRootDir(conf);
970     FileSystem fs = hbaseRoot.getFileSystem(conf);
971     Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot);
972     for (Path path: allFiles.values()) {
973       boolean isReference = false;
974       try {
975         isReference = StoreFileInfo.isReference(path);
976       } catch (Throwable t) {
977         // Ignore. Some files may not be store files at all.
978         // For example, files under .oldlogs folder in hbase:meta
979         // Warning message is already logged by
980         // StoreFile#isReference.
981       }
982       if (!isReference) continue;
983 
984       Path referredToFile = StoreFileInfo.getReferredToFile(path);
985       if (fs.exists(referredToFile)) continue;  // good, expected
986 
987       // Found a lingering reference file
988       errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
989         "Found lingering reference file " + path);
990       if (!shouldFixReferenceFiles()) continue;
991 
992       // Now, trying to fix it since requested
993       boolean success = false;
994       String pathStr = path.toString();
995 
996       // A reference file path should be like
997       // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
998       // Up 5 directories to get the root folder.
999       // So the file will be sidelined to a similar folder structure.
1000       int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1001       for (int i = 0; index > 0 && i < 5; i++) {
1002         index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1003       }
1004       if (index > 0) {
1005         Path rootDir = getSidelineDir();
1006         Path dst = new Path(rootDir, pathStr.substring(index + 1));
1007         fs.mkdirs(dst.getParent());
1008         LOG.info("Trying to sildeline reference file "
1009           + path + " to " + dst);
1010         setShouldRerun();
1011 
1012         success = fs.rename(path, dst);
1013       }
1014       if (!success) {
1015         LOG.error("Failed to sideline reference file " + path);
1016       }
1017     }
1018   }
1019 
1020   /**
1021    * TODO -- need to add tests for this.
1022    */
1023   private void reportEmptyMetaCells() {
1024     errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1025       emptyRegionInfoQualifiers.size());
1026     if (details) {
1027       for (Result r: emptyRegionInfoQualifiers) {
1028         errors.print("  " + r);
1029       }
1030     }
1031   }
1032 
1033   /**
1034    * TODO -- need to add tests for this.
1035    */
1036   private void reportTablesInFlux() {
1037     AtomicInteger numSkipped = new AtomicInteger(0);
1038     HTableDescriptor[] allTables = getTables(numSkipped);
1039     errors.print("Number of Tables: " + allTables.length);
1040     if (details) {
1041       if (numSkipped.get() > 0) {
1042         errors.detail("Number of Tables in flux: " + numSkipped.get());
1043       }
1044       for (HTableDescriptor td : allTables) {
1045         errors.detail("  Table: " + td.getTableName() + "\t" +
1046                            (td.isReadOnly() ? "ro" : "rw") + "\t" +
1047                             (td.isMetaRegion() ? "META" : "    ") + "\t" +
1048                            " families: " + td.getFamilies().size());
1049       }
1050     }
1051   }
1052 
1053   public ErrorReporter getErrors() {
1054     return errors;
1055   }
1056 
1057   /**
1058    * Read the .regioninfo file from the file system.  If there is no
1059    * .regioninfo, add it to the orphan hdfs region list.
1060    */
1061   private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
1062     Path regionDir = hbi.getHdfsRegionDir();
1063     if (regionDir == null) {
1064       LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
1065       return;
1066     }
1067 
1068     if (hbi.hdfsEntry.hri != null) {
1069       // already loaded data
1070       return;
1071     }
1072 
1073     FileSystem fs = FileSystem.get(getConf());
1074     HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
1075     LOG.debug("HRegionInfo read: " + hri.toString());
1076     hbi.hdfsEntry.hri = hri;
1077   }
1078 
1079   /**
1080    * Exception thrown when a integrity repair operation fails in an
1081    * unresolvable way.
1082    */
1083   public static class RegionRepairException extends IOException {
1084     private static final long serialVersionUID = 1L;
1085     final IOException ioe;
1086     public RegionRepairException(String s, IOException ioe) {
1087       super(s);
1088       this.ioe = ioe;
1089     }
1090   }
1091 
1092   /**
1093    * Populate hbi's from regionInfos loaded from file system.
1094    */
1095   private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1096       throws IOException, InterruptedException {
1097     tablesInfo.clear(); // regenerating the data
1098     // generate region split structure
1099     Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1100 
1101     // Parallelized read of .regioninfo files.
1102     List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
1103     List<Future<Void>> hbiFutures;
1104 
1105     for (HbckInfo hbi : hbckInfos) {
1106       WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1107       hbis.add(work);
1108     }
1109 
1110     // Submit and wait for completion
1111     hbiFutures = executor.invokeAll(hbis);
1112 
1113     for(int i=0; i<hbiFutures.size(); i++) {
1114       WorkItemHdfsRegionInfo work = hbis.get(i);
1115       Future<Void> f = hbiFutures.get(i);
1116       try {
1117         f.get();
1118       } catch(ExecutionException e) {
1119         LOG.warn("Failed to read .regioninfo file for region " +
1120               work.hbi.getRegionNameAsString(), e.getCause());
1121       }
1122     }
1123 
1124     Path hbaseRoot = FSUtils.getRootDir(getConf());
1125     FileSystem fs = hbaseRoot.getFileSystem(getConf());
1126     // serialized table info gathering.
1127     for (HbckInfo hbi: hbckInfos) {
1128 
1129       if (hbi.getHdfsHRI() == null) {
1130         // was an orphan
1131         continue;
1132       }
1133 
1134 
1135       // get table name from hdfs, populate various HBaseFsck tables.
1136       TableName tableName = hbi.getTableName();
1137       if (tableName == null) {
1138         // There was an entry in hbase:meta not in the HDFS?
1139         LOG.warn("tableName was null for: " + hbi);
1140         continue;
1141       }
1142 
1143       TableInfo modTInfo = tablesInfo.get(tableName);
1144       if (modTInfo == null) {
1145         // only executed once per table.
1146         modTInfo = new TableInfo(tableName);
1147         tablesInfo.put(tableName, modTInfo);
1148         try {
1149           TableDescriptor htd =
1150               FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1151           modTInfo.htds.add(htd.getHTableDescriptor());
1152         } catch (IOException ioe) {
1153           if (!orphanTableDirs.containsKey(tableName)) {
1154             LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1155             //should only report once for each table
1156             errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1157                 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1158             Set<String> columns = new HashSet<String>();
1159             orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1160           }
1161         }
1162       }
1163       if (!hbi.isSkipChecks()) {
1164         modTInfo.addRegionInfo(hbi);
1165       }
1166     }
1167 
1168     loadTableInfosForTablesWithNoRegion();
1169 
1170     return tablesInfo;
1171   }
1172 
1173   /**
1174    * To get the column family list according to the column family dirs
1175    * @param columns
1176    * @param hbi
1177    * @return a set of column families
1178    * @throws IOException
1179    */
1180   private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1181     Path regionDir = hbi.getHdfsRegionDir();
1182     FileSystem fs = regionDir.getFileSystem(getConf());
1183     FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1184     for (FileStatus subdir : subDirs) {
1185       String columnfamily = subdir.getPath().getName();
1186       columns.add(columnfamily);
1187     }
1188     return columns;
1189   }
1190 
1191   /**
1192    * To fabricate a .tableinfo file with following contents<br>
1193    * 1. the correct tablename <br>
1194    * 2. the correct colfamily list<br>
1195    * 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1196    * @throws IOException
1197    */
1198   private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1199       Set<String> columns) throws IOException {
1200     if (columns ==null || columns.isEmpty()) return false;
1201     HTableDescriptor htd = new HTableDescriptor(tableName);
1202     for (String columnfamimly : columns) {
1203       htd.addFamily(new HColumnDescriptor(columnfamimly));
1204     }
1205     fstd.createTableDescriptor(new TableDescriptor(htd), true);
1206     return true;
1207   }
1208 
1209   /**
1210    * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1211    * @throws IOException
1212    */
1213   public void fixEmptyMetaCells() throws IOException {
1214     if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1215       LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1216       for (Result region : emptyRegionInfoQualifiers) {
1217         deleteMetaRegion(region.getRow());
1218         errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1219       }
1220       emptyRegionInfoQualifiers.clear();
1221     }
1222   }
1223 
1224   /**
1225    * To fix orphan table by creating a .tableinfo file under tableDir <br>
1226    * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1227    * 2. else create a default .tableinfo file with following items<br>
1228    * &nbsp;2.1 the correct tablename <br>
1229    * &nbsp;2.2 the correct colfamily list<br>
1230    * &nbsp;2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1231    * @throws IOException
1232    */
1233   public void fixOrphanTables() throws IOException {
1234     if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1235 
1236       List<TableName> tmpList = new ArrayList<TableName>();
1237       tmpList.addAll(orphanTableDirs.keySet());
1238       HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1239       Iterator<Entry<TableName, Set<String>>> iter =
1240           orphanTableDirs.entrySet().iterator();
1241       int j = 0;
1242       int numFailedCase = 0;
1243       FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1244       while (iter.hasNext()) {
1245         Entry<TableName, Set<String>> entry =
1246             iter.next();
1247         TableName tableName = entry.getKey();
1248         LOG.info("Trying to fix orphan table error: " + tableName);
1249         if (j < htds.length) {
1250           if (tableName.equals(htds[j].getTableName())) {
1251             HTableDescriptor htd = htds[j];
1252             LOG.info("fixing orphan table: " + tableName + " from cache");
1253             fstd.createTableDescriptor(new TableDescriptor(htd), true);
1254             j++;
1255             iter.remove();
1256           }
1257         } else {
1258           if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1259             LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1260             LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1261             iter.remove();
1262           } else {
1263             LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1264             numFailedCase++;
1265           }
1266         }
1267         fixes++;
1268       }
1269 
1270       if (orphanTableDirs.isEmpty()) {
1271         // all orphanTableDirs are luckily recovered
1272         // re-run doFsck after recovering the .tableinfo file
1273         setShouldRerun();
1274         LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1275       } else if (numFailedCase > 0) {
1276         LOG.error("Failed to fix " + numFailedCase
1277             + " OrphanTables with default .tableinfo files");
1278       }
1279 
1280     }
1281     //cleanup the list
1282     orphanTableDirs.clear();
1283 
1284   }
1285 
1286   /**
1287    * This borrows code from MasterFileSystem.bootstrap(). Explicitly creates it's own WAL, so be
1288    * sure to close it as well as the region when you're finished.
1289    *
1290    * @return an open hbase:meta HRegion
1291    */
1292   private HRegion createNewMeta() throws IOException {
1293     Path rootdir = FSUtils.getRootDir(getConf());
1294     Configuration c = getConf();
1295     HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1296     HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1297     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1298     // The WAL subsystem will use the default rootDir rather than the passed in rootDir
1299     // unless I pass along via the conf.
1300     Configuration confForWAL = new Configuration(c);
1301     confForWAL.set(HConstants.HBASE_DIR, rootdir.toString());
1302     WAL wal = (new WALFactory(confForWAL,
1303         Collections.<WALActionsListener>singletonList(new MetricsWAL()),
1304         "hbck-meta-recovery-" + RandomStringUtils.randomNumeric(8))).
1305         getWAL(metaHRI.getEncodedNameAsBytes());
1306     HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor, wal);
1307     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1308     return meta;
1309   }
1310 
1311   /**
1312    * Generate set of puts to add to new meta.  This expects the tables to be
1313    * clean with no overlaps or holes.  If there are any problems it returns null.
1314    *
1315    * @return An array list of puts to do in bulk, null if tables have problems
1316    */
1317   private ArrayList<Put> generatePuts(
1318       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1319     ArrayList<Put> puts = new ArrayList<Put>();
1320     boolean hasProblems = false;
1321     for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1322       TableName name = e.getKey();
1323 
1324       // skip "hbase:meta"
1325       if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1326         continue;
1327       }
1328 
1329       TableInfo ti = e.getValue();
1330       puts.add(MetaTableAccessor
1331           .makePutFromTableState(new TableState(ti.tableName, TableState.State.ENABLED)));
1332       for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1333           .entrySet()) {
1334         Collection<HbckInfo> his = spl.getValue();
1335         int sz = his.size();
1336         if (sz != 1) {
1337           // problem
1338           LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1339               + " had " +  sz + " regions instead of exactly 1." );
1340           hasProblems = true;
1341           continue;
1342         }
1343 
1344         // add the row directly to meta.
1345         HbckInfo hi = his.iterator().next();
1346         HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
1347         Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1348         puts.add(p);
1349       }
1350     }
1351     return hasProblems ? null : puts;
1352   }
1353 
1354   /**
1355    * Suggest fixes for each table
1356    */
1357   private void suggestFixes(
1358       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1359     for (TableInfo tInfo : tablesInfo.values()) {
1360       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1361       tInfo.checkRegionChain(handler);
1362     }
1363   }
1364 
1365   /**
1366    * Rebuilds meta from information in hdfs/fs.  Depends on configuration settings passed into
1367    * hbck constructor to point to a particular fs/dir. Assumes HBase is OFFLINE.
1368    *
1369    * @param fix flag that determines if method should attempt to fix holes
1370    * @return true if successful, false if attempt failed.
1371    */
1372   public boolean rebuildMeta(boolean fix) throws IOException,
1373       InterruptedException {
1374 
1375     // TODO check to make sure hbase is offline. (or at least the table
1376     // currently being worked on is off line)
1377 
1378     // Determine what's on HDFS
1379     LOG.info("Loading HBase regioninfo from HDFS...");
1380     loadHdfsRegionDirs(); // populating regioninfo table.
1381 
1382     int errs = errors.getErrorList().size();
1383     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1384     checkHdfsIntegrity(false, false);
1385 
1386     // make sure ok.
1387     if (errors.getErrorList().size() != errs) {
1388       // While in error state, iterate until no more fixes possible
1389       while(true) {
1390         fixes = 0;
1391         suggestFixes(tablesInfo);
1392         errors.clear();
1393         loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1394         checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1395 
1396         int errCount = errors.getErrorList().size();
1397 
1398         if (fixes == 0) {
1399           if (errCount > 0) {
1400             return false; // failed to fix problems.
1401           } else {
1402             break; // no fixes and no problems? drop out and fix stuff!
1403           }
1404         }
1405       }
1406     }
1407 
1408     // we can rebuild, move old meta out of the way and start
1409     LOG.info("HDFS regioninfo's seems good.  Sidelining old hbase:meta");
1410     Path backupDir = sidelineOldMeta();
1411 
1412     LOG.info("Creating new hbase:meta");
1413     HRegion meta = createNewMeta();
1414 
1415     // populate meta
1416     List<Put> puts = generatePuts(tablesInfo);
1417     if (puts == null) {
1418       LOG.fatal("Problem encountered when creating new hbase:meta entries.  " +
1419         "You may need to restore the previously sidelined hbase:meta");
1420       return false;
1421     }
1422     meta.batchMutate(puts.toArray(new Put[puts.size()]));
1423     meta.close();
1424     if (meta.getWAL() != null) {
1425       meta.getWAL().close();
1426     }
1427     LOG.info("Success! hbase:meta table rebuilt.");
1428     LOG.info("Old hbase:meta is moved into " + backupDir);
1429     return true;
1430   }
1431 
1432   private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1433       boolean fixOverlaps) throws IOException {
1434     LOG.info("Checking HBase region split map from HDFS data...");
1435     for (TableInfo tInfo : tablesInfo.values()) {
1436       TableIntegrityErrorHandler handler;
1437       if (fixHoles || fixOverlaps) {
1438         handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1439           fixHoles, fixOverlaps);
1440       } else {
1441         handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1442       }
1443       if (!tInfo.checkRegionChain(handler)) {
1444         // should dump info as well.
1445         errors.report("Found inconsistency in table " + tInfo.getName());
1446       }
1447     }
1448     return tablesInfo;
1449   }
1450 
1451   private Path getSidelineDir() throws IOException {
1452     if (sidelineDir == null) {
1453       Path hbaseDir = FSUtils.getRootDir(getConf());
1454       Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1455       sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1456           + startMillis);
1457     }
1458     return sidelineDir;
1459   }
1460 
1461   /**
1462    * Sideline a region dir (instead of deleting it)
1463    */
1464   Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1465     return sidelineRegionDir(fs, null, hi);
1466   }
1467 
1468   /**
1469    * Sideline a region dir (instead of deleting it)
1470    *
1471    * @param parentDir if specified, the region will be sidelined to
1472    * folder like .../parentDir/<table name>/<region name>. The purpose
1473    * is to group together similar regions sidelined, for example, those
1474    * regions should be bulk loaded back later on. If null, it is ignored.
1475    */
1476   Path sidelineRegionDir(FileSystem fs,
1477       String parentDir, HbckInfo hi) throws IOException {
1478     TableName tableName = hi.getTableName();
1479     Path regionDir = hi.getHdfsRegionDir();
1480 
1481     if (!fs.exists(regionDir)) {
1482       LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1483       return null;
1484     }
1485 
1486     Path rootDir = getSidelineDir();
1487     if (parentDir != null) {
1488       rootDir = new Path(rootDir, parentDir);
1489     }
1490     Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1491     Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1492     fs.mkdirs(sidelineRegionDir);
1493     boolean success = false;
1494     FileStatus[] cfs =  fs.listStatus(regionDir);
1495     if (cfs == null) {
1496       LOG.info("Region dir is empty: " + regionDir);
1497     } else {
1498       for (FileStatus cf : cfs) {
1499         Path src = cf.getPath();
1500         Path dst =  new Path(sidelineRegionDir, src.getName());
1501         if (fs.isFile(src)) {
1502           // simple file
1503           success = fs.rename(src, dst);
1504           if (!success) {
1505             String msg = "Unable to rename file " + src +  " to " + dst;
1506             LOG.error(msg);
1507             throw new IOException(msg);
1508           }
1509           continue;
1510         }
1511 
1512         // is a directory.
1513         fs.mkdirs(dst);
1514 
1515         LOG.info("Sidelining files from " + src + " into containing region " + dst);
1516         // FileSystem.rename is inconsistent with directories -- if the
1517         // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1518         // it moves the src into the dst dir resulting in (foo/a/b).  If
1519         // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1520         FileStatus[] hfiles = fs.listStatus(src);
1521         if (hfiles != null && hfiles.length > 0) {
1522           for (FileStatus hfile : hfiles) {
1523             success = fs.rename(hfile.getPath(), dst);
1524             if (!success) {
1525               String msg = "Unable to rename file " + src +  " to " + dst;
1526               LOG.error(msg);
1527               throw new IOException(msg);
1528             }
1529           }
1530         }
1531         LOG.debug("Sideline directory contents:");
1532         debugLsr(sidelineRegionDir);
1533       }
1534     }
1535 
1536     LOG.info("Removing old region dir: " + regionDir);
1537     success = fs.delete(regionDir, true);
1538     if (!success) {
1539       String msg = "Unable to delete dir " + regionDir;
1540       LOG.error(msg);
1541       throw new IOException(msg);
1542     }
1543     return sidelineRegionDir;
1544   }
1545 
1546   /**
1547    * Side line an entire table.
1548    */
1549   void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1550       Path backupHbaseDir) throws IOException {
1551     Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1552     if (fs.exists(tableDir)) {
1553       Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1554       fs.mkdirs(backupTableDir.getParent());
1555       boolean success = fs.rename(tableDir, backupTableDir);
1556       if (!success) {
1557         throw new IOException("Failed to move  " + tableName + " from "
1558             +  tableDir + " to " + backupTableDir);
1559       }
1560     } else {
1561       LOG.info("No previous " + tableName +  " exists.  Continuing.");
1562     }
1563   }
1564 
1565   /**
1566    * @return Path to backup of original directory
1567    */
1568   Path sidelineOldMeta() throws IOException {
1569     // put current hbase:meta aside.
1570     Path hbaseDir = FSUtils.getRootDir(getConf());
1571     FileSystem fs = hbaseDir.getFileSystem(getConf());
1572     Path backupDir = getSidelineDir();
1573     fs.mkdirs(backupDir);
1574 
1575     try {
1576       sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1577     } catch (IOException e) {
1578         LOG.fatal("... failed to sideline meta. Currently in inconsistent state.  To restore "
1579             + "try to rename hbase:meta in " + backupDir.getName() + " to "
1580             + hbaseDir.getName() + ".", e);
1581       throw e; // throw original exception
1582     }
1583     return backupDir;
1584   }
1585 
1586   /**
1587    * Load the list of disabled tables in ZK into local set.
1588    * @throws ZooKeeperConnectionException
1589    * @throws IOException
1590    */
1591   private void loadTableStates()
1592   throws IOException {
1593     tableStates = MetaTableAccessor.getTableStates(connection);
1594   }
1595 
1596   /**
1597    * Check if the specified region's table is disabled.
1598    * @param tableName table to check status of
1599    */
1600   private boolean isTableDisabled(TableName tableName) {
1601     return tableStates.containsKey(tableName)
1602         && tableStates.get(tableName)
1603         .inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1604   }
1605 
1606   /**
1607    * Scan HDFS for all regions, recording their information into
1608    * regionInfoMap
1609    */
1610   public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1611     Path rootDir = FSUtils.getRootDir(getConf());
1612     FileSystem fs = rootDir.getFileSystem(getConf());
1613 
1614     // list all tables from HDFS
1615     List<FileStatus> tableDirs = Lists.newArrayList();
1616 
1617     boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1618 
1619     List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1620     for (Path path : paths) {
1621       TableName tableName = FSUtils.getTableName(path);
1622        if ((!checkMetaOnly &&
1623            isTableIncluded(tableName)) ||
1624            tableName.equals(TableName.META_TABLE_NAME)) {
1625          tableDirs.add(fs.getFileStatus(path));
1626        }
1627     }
1628 
1629     // verify that version file exists
1630     if (!foundVersionFile) {
1631       errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1632           "Version file does not exist in root dir " + rootDir);
1633       if (shouldFixVersionFile()) {
1634         LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1635             + " file.");
1636         setShouldRerun();
1637         FSUtils.setVersion(fs, rootDir, getConf().getInt(
1638             HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1639             HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1640             HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1641       }
1642     }
1643 
1644     // level 1:  <HBASE_DIR>/*
1645     List<WorkItemHdfsDir> dirs = new ArrayList<WorkItemHdfsDir>(tableDirs.size());
1646     List<Future<Void>> dirsFutures;
1647 
1648     for (FileStatus tableDir : tableDirs) {
1649       LOG.debug("Loading region dirs from " +tableDir.getPath());
1650       dirs.add(new WorkItemHdfsDir(this, fs, errors, tableDir));
1651     }
1652 
1653     // Invoke and wait for Callables to complete
1654     dirsFutures = executor.invokeAll(dirs);
1655 
1656     for(Future<Void> f: dirsFutures) {
1657       try {
1658         f.get();
1659       } catch(ExecutionException e) {
1660         LOG.warn("Could not load region dir " , e.getCause());
1661       }
1662     }
1663   }
1664 
1665   /**
1666    * Record the location of the hbase:meta region as found in ZooKeeper.
1667    */
1668   private boolean recordMetaRegion() throws IOException {
1669     RegionLocations rl = ((ClusterConnection)connection).locateRegion(TableName.META_TABLE_NAME,
1670         HConstants.EMPTY_START_ROW, false, false);
1671     if (rl == null) {
1672       errors.reportError(ERROR_CODE.NULL_META_REGION,
1673           "META region was not found in Zookeeper");
1674       return false;
1675     }
1676     for (HRegionLocation metaLocation : rl.getRegionLocations()) {
1677       // Check if Meta region is valid and existing
1678       if (metaLocation == null ) {
1679         errors.reportError(ERROR_CODE.NULL_META_REGION,
1680             "META region location is null");
1681         return false;
1682       }
1683       if (metaLocation.getRegionInfo() == null) {
1684         errors.reportError(ERROR_CODE.NULL_META_REGION,
1685             "META location regionInfo is null");
1686         return false;
1687       }
1688       if (metaLocation.getHostname() == null) {
1689         errors.reportError(ERROR_CODE.NULL_META_REGION,
1690             "META location hostName is null");
1691         return false;
1692       }
1693       ServerName sn = metaLocation.getServerName();
1694       MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
1695       HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1696       if (hbckInfo == null) {
1697         regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1698       } else {
1699         hbckInfo.metaEntry = m;
1700       }
1701     }
1702     return true;
1703   }
1704 
1705   private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1706     return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1707       @Override
1708       public void abort(String why, Throwable e) {
1709         LOG.error(why, e);
1710         System.exit(1);
1711       }
1712 
1713       @Override
1714       public boolean isAborted() {
1715         return false;
1716       }
1717 
1718     });
1719   }
1720 
1721   private ServerName getMetaRegionServerName(int replicaId)
1722   throws IOException, KeeperException {
1723     ZooKeeperWatcher zkw = createZooKeeperWatcher();
1724     ServerName sn = null;
1725     try {
1726       sn = new MetaTableLocator().getMetaRegionLocation(zkw, replicaId);
1727     } finally {
1728       zkw.close();
1729     }
1730     return sn;
1731   }
1732 
1733   /**
1734    * Contacts each regionserver and fetches metadata about regions.
1735    * @param regionServerList - the list of region servers to connect to
1736    * @throws IOException if a remote or network exception occurs
1737    */
1738   void processRegionServers(Collection<ServerName> regionServerList)
1739     throws IOException, InterruptedException {
1740 
1741     List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1742     List<Future<Void>> workFutures;
1743 
1744     // loop to contact each region server in parallel
1745     for (ServerName rsinfo: regionServerList) {
1746       workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1747     }
1748 
1749     workFutures = executor.invokeAll(workItems);
1750 
1751     for(int i=0; i<workFutures.size(); i++) {
1752       WorkItemRegion item = workItems.get(i);
1753       Future<Void> f = workFutures.get(i);
1754       try {
1755         f.get();
1756       } catch(ExecutionException e) {
1757         LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1758             e.getCause());
1759       }
1760     }
1761   }
1762 
1763   /**
1764    * Check consistency of all regions that have been found in previous phases.
1765    */
1766   private void checkAndFixConsistency()
1767   throws IOException, KeeperException, InterruptedException {
1768     // Divide the checks in two phases. One for default/primary replicas and another
1769     // for the non-primary ones. Keeps code cleaner this way.
1770     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1771       if (e.getValue().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
1772         checkRegionConsistency(e.getKey(), e.getValue());
1773       }
1774     }
1775     boolean prevHdfsCheck = shouldCheckHdfs();
1776     setCheckHdfs(false); //replicas don't have any hdfs data
1777     // Run a pass over the replicas and fix any assignment issues that exist on the currently
1778     // deployed/undeployed replicas.
1779     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1780       if (e.getValue().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
1781         checkRegionConsistency(e.getKey(), e.getValue());
1782       }
1783     }
1784     setCheckHdfs(prevHdfsCheck);
1785 
1786     if (shouldCheckHdfs()) {
1787       checkAndFixTableStates();
1788     }
1789   }
1790 
1791   /**
1792    * Check and fix table states, assumes full info available:
1793    * - tableInfos
1794    * - empty tables loaded
1795    */
1796   private void checkAndFixTableStates() throws IOException {
1797     // first check dangling states
1798     for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
1799       TableName tableName = entry.getKey();
1800       TableState tableState = entry.getValue();
1801       TableInfo tableInfo = tablesInfo.get(tableName);
1802       if (isTableIncluded(tableName)
1803           && !tableName.isSystemTable()
1804           && tableInfo == null) {
1805         if (fixMeta) {
1806           MetaTableAccessor.deleteTableState(connection, tableName);
1807           TableState state = MetaTableAccessor.getTableState(connection, tableName);
1808           if (state != null) {
1809             errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1810                 tableName + " unable to delete dangling table state " + tableState);
1811           }
1812         } else {
1813           errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1814               tableName + " has dangling table state " + tableState);
1815         }
1816       }
1817     }
1818     // check that all tables have states
1819     for (TableName tableName : tablesInfo.keySet()) {
1820       if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
1821         if (fixMeta) {
1822           MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
1823           TableState newState = MetaTableAccessor.getTableState(connection, tableName);
1824           if (newState == null) {
1825             errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1826                 "Unable to change state for table " + tableName + " in meta ");
1827           }
1828         } else {
1829           errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1830               tableName + " has no state in meta ");
1831         }
1832       }
1833     }
1834   }
1835 
1836   private void preCheckPermission() throws IOException, AccessDeniedException {
1837     if (shouldIgnorePreCheckPermission()) {
1838       return;
1839     }
1840 
1841     Path hbaseDir = FSUtils.getRootDir(getConf());
1842     FileSystem fs = hbaseDir.getFileSystem(getConf());
1843     UserProvider userProvider = UserProvider.instantiate(getConf());
1844     UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1845     FileStatus[] files = fs.listStatus(hbaseDir);
1846     for (FileStatus file : files) {
1847       try {
1848         FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1849       } catch (AccessDeniedException ace) {
1850         LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1851         errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1852           + " does not have write perms to " + file.getPath()
1853           + ". Please rerun hbck as hdfs user " + file.getOwner());
1854         throw ace;
1855       }
1856     }
1857   }
1858 
1859   /**
1860    * Deletes region from meta table
1861    */
1862   private void deleteMetaRegion(HbckInfo hi) throws IOException {
1863     deleteMetaRegion(hi.metaEntry.getRegionName());
1864   }
1865 
1866   /**
1867    * Deletes region from meta table
1868    */
1869   private void deleteMetaRegion(byte[] metaKey) throws IOException {
1870     Delete d = new Delete(metaKey);
1871     meta.delete(d);
1872     LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1873   }
1874 
1875   /**
1876    * Reset the split parent region info in meta table
1877    */
1878   private void resetSplitParent(HbckInfo hi) throws IOException {
1879     RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1880     Delete d = new Delete(hi.metaEntry.getRegionName());
1881     d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1882     d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1883     mutations.add(d);
1884 
1885     HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1886     hri.setOffline(false);
1887     hri.setSplit(false);
1888     Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1889     mutations.add(p);
1890 
1891     meta.mutateRow(mutations);
1892     LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1893   }
1894 
1895   /**
1896    * This backwards-compatibility wrapper for permanently offlining a region
1897    * that should not be alive.  If the region server does not support the
1898    * "offline" method, it will use the closest unassign method instead.  This
1899    * will basically work until one attempts to disable or delete the affected
1900    * table.  The problem has to do with in-memory only master state, so
1901    * restarting the HMaster or failing over to another should fix this.
1902    */
1903   private void offline(byte[] regionName) throws IOException {
1904     String regionString = Bytes.toStringBinary(regionName);
1905     if (!rsSupportsOffline) {
1906       LOG.warn("Using unassign region " + regionString
1907           + " instead of using offline method, you should"
1908           + " restart HMaster after these repairs");
1909       admin.unassign(regionName, true);
1910       return;
1911     }
1912 
1913     // first time we assume the rs's supports #offline.
1914     try {
1915       LOG.info("Offlining region " + regionString);
1916       admin.offline(regionName);
1917     } catch (IOException ioe) {
1918       String notFoundMsg = "java.lang.NoSuchMethodException: " +
1919         "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1920       if (ioe.getMessage().contains(notFoundMsg)) {
1921         LOG.warn("Using unassign region " + regionString
1922             + " instead of using offline method, you should"
1923             + " restart HMaster after these repairs");
1924         rsSupportsOffline = false; // in the future just use unassign
1925         admin.unassign(regionName, true);
1926         return;
1927       }
1928       throw ioe;
1929     }
1930   }
1931 
1932   private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1933     undeployRegionsForHbi(hi);
1934     // undeploy replicas of the region (but only if the method is invoked for the primary)
1935     if (hi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
1936       return;
1937     }
1938     int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
1939     for (int i = 1; i < numReplicas; i++) {
1940       if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
1941       HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
1942           hi.getPrimaryHRIForDeployedReplica(), i);
1943       HbckInfo h = regionInfoMap.get(hri.getEncodedName());
1944       if (h != null) {
1945         undeployRegionsForHbi(h);
1946         //set skip checks; we undeployed it, and we don't want to evaluate this anymore
1947         //in consistency checks
1948         h.setSkipChecks(true);
1949       }
1950     }
1951   }
1952 
1953   private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {
1954     for (OnlineEntry rse : hi.deployedEntries) {
1955       LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
1956       try {
1957         HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);
1958         offline(rse.hri.getRegionName());
1959       } catch (IOException ioe) {
1960         LOG.warn("Got exception when attempting to offline region "
1961             + Bytes.toString(rse.hri.getRegionName()), ioe);
1962       }
1963     }
1964   }
1965 
1966   /**
1967    * Attempts to undeploy a region from a region server based in information in
1968    * META.  Any operations that modify the file system should make sure that
1969    * its corresponding region is not deployed to prevent data races.
1970    *
1971    * A separate call is required to update the master in-memory region state
1972    * kept in the AssignementManager.  Because disable uses this state instead of
1973    * that found in META, we can't seem to cleanly disable/delete tables that
1974    * have been hbck fixed.  When used on a version of HBase that does not have
1975    * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master
1976    * restart or failover may be required.
1977    */
1978   private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
1979     if (hi.metaEntry == null && hi.hdfsEntry == null) {
1980       undeployRegions(hi);
1981       return;
1982     }
1983 
1984     // get assignment info and hregioninfo from meta.
1985     Get get = new Get(hi.getRegionName());
1986     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1987     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1988     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1989     // also get the locations of the replicas to close if the primary region is being closed
1990     if (hi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
1991       int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
1992       for (int i = 0; i < numReplicas; i++) {
1993         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
1994         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
1995       }
1996     }
1997     Result r = meta.get(get);
1998     RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
1999     if (rl == null) {
2000       LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2001           " since meta does not have handle to reach it");
2002       return;
2003     }
2004     for (HRegionLocation h : rl.getRegionLocations()) {
2005       ServerName serverName = h.getServerName();
2006       if (serverName == null) {
2007         errors.reportError("Unable to close region "
2008             + hi.getRegionNameAsString() +  " because meta does not "
2009             + "have handle to reach it.");
2010         continue;
2011       }
2012       HRegionInfo hri = h.getRegionInfo();
2013       if (hri == null) {
2014         LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2015             + " because hbase:meta had invalid or missing "
2016             + HConstants.CATALOG_FAMILY_STR + ":"
2017             + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2018             + " qualifier value.");
2019         continue;
2020       }
2021       // close the region -- close files and remove assignment
2022       HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2023     }
2024   }
2025 
2026   private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
2027     KeeperException, InterruptedException {
2028     // If we are trying to fix the errors
2029     if (shouldFixAssignments()) {
2030       errors.print(msg);
2031       undeployRegions(hbi);
2032       setShouldRerun();
2033       HRegionInfo hri = hbi.getHdfsHRI();
2034       if (hri == null) {
2035         hri = hbi.metaEntry;
2036       }
2037       HBaseFsckRepair.fixUnassigned(admin, hri);
2038       HBaseFsckRepair.waitUntilAssigned(admin, hri);
2039 
2040       // also assign replicas if needed (do it only when this call operates on a primary replica)
2041       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) return;
2042       int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();
2043       for (int i = 1; i < replicationCount; i++) {
2044         hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2045         HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2046         if (h != null) {
2047           undeployRegions(h);
2048           //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2049           //in consistency checks
2050           h.setSkipChecks(true);
2051         }
2052         HBaseFsckRepair.fixUnassigned(admin, hri);
2053         HBaseFsckRepair.waitUntilAssigned(admin, hri);
2054       }
2055 
2056     }
2057   }
2058 
2059   /**
2060    * Check a single region for consistency and correct deployment.
2061    */
2062   private void checkRegionConsistency(final String key, final HbckInfo hbi)
2063   throws IOException, KeeperException, InterruptedException {
2064 
2065     if (hbi.isSkipChecks()) return;
2066     String descriptiveName = hbi.toString();
2067     boolean inMeta = hbi.metaEntry != null;
2068     // In case not checking HDFS, assume the region is on HDFS
2069     boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2070     boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
2071     boolean isDeployed = !hbi.deployedOn.isEmpty();
2072     boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
2073     boolean deploymentMatchesMeta =
2074       hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2075       hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
2076     boolean splitParent =
2077         inMeta && hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
2078     boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry.getTable());
2079     boolean recentlyModified = inHdfs &&
2080       hbi.getModTime() + timelag > System.currentTimeMillis();
2081 
2082     // ========== First the healthy cases =============
2083     if (hbi.containsOnlyHdfsEdits()) {
2084       return;
2085     }
2086     if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2087       return;
2088     } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2089       LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2090         "tabled that is not deployed");
2091       return;
2092     } else if (recentlyModified) {
2093       LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2094       return;
2095     }
2096     // ========== Cases where the region is not in hbase:meta =============
2097     else if (!inMeta && !inHdfs && !isDeployed) {
2098       // We shouldn't have record of this region at all then!
2099       assert false : "Entry for region with no data";
2100     } else if (!inMeta && !inHdfs && isDeployed) {
2101       errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2102           + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2103           "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2104       if (shouldFixAssignments()) {
2105         undeployRegions(hbi);
2106       }
2107 
2108     } else if (!inMeta && inHdfs && !isDeployed) {
2109       if (hbi.isMerged()) {
2110         // This region has already been merged, the remaining hdfs file will be
2111         // cleaned by CatalogJanitor later
2112         hbi.setSkipChecks(true);
2113         LOG.info("Region " + descriptiveName
2114             + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2115         return;
2116       }
2117       errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2118           + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2119           "or deployed on any region server");
2120       // restore region consistency of an adopted orphan
2121       if (shouldFixMeta()) {
2122         if (!hbi.isHdfsRegioninfoPresent()) {
2123           LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2124               +  " in table integrity repair phase if -fixHdfsOrphans was" +
2125               " used.");
2126           return;
2127         }
2128 
2129         HRegionInfo hri = hbi.getHdfsHRI();
2130         TableInfo tableInfo = tablesInfo.get(hri.getTable());
2131         if (tableInfo.regionsFromMeta.isEmpty()) {
2132           for (HbckInfo h : regionInfoMap.values()) {
2133             if (hri.getTable().equals(h.getTableName())) {
2134               if (h.metaEntry != null) tableInfo.regionsFromMeta
2135                   .add((HRegionInfo) h.metaEntry);
2136             }
2137           }
2138           Collections.sort(tableInfo.regionsFromMeta);
2139         }
2140         for (HRegionInfo region : tableInfo.regionsFromMeta) {
2141           if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2142               && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2143                 hri.getEndKey()) >= 0)
2144               && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2145             if(region.isSplit() || region.isOffline()) continue;
2146             Path regionDir = hbi.getHdfsRegionDir();
2147             FileSystem fs = regionDir.getFileSystem(getConf());
2148             List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2149             for (Path familyDir : familyDirs) {
2150               List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2151               for (Path referenceFilePath : referenceFilePaths) {
2152                 Path parentRegionDir =
2153                     StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2154                 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2155                   LOG.warn(hri + " start and stop keys are in the range of " + region
2156                       + ". The region might not be cleaned up from hdfs when region " + region
2157                       + " split failed. Hence deleting from hdfs.");
2158                   HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2159                     regionDir.getParent(), hri);
2160                   return;
2161                 }
2162               }
2163             }
2164           }
2165         }
2166         LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2167         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2168         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2169             admin.getClusterStatus().getServers(), numReplicas);
2170 
2171         tryAssignmentRepair(hbi, "Trying to reassign region...");
2172       }
2173 
2174     } else if (!inMeta && inHdfs && isDeployed) {
2175       errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2176           + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2177       debugLsr(hbi.getHdfsRegionDir());
2178       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2179         // for replicas, this means that we should undeploy the region (we would have
2180         // gone over the primaries and fixed meta holes in first phase under
2181         // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2182         // this stage unless unwanted replica)
2183         if (shouldFixAssignments()) {
2184           undeployRegionsForHbi(hbi);
2185         }
2186       }
2187       if (shouldFixMeta() && hbi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2188         if (!hbi.isHdfsRegioninfoPresent()) {
2189           LOG.error("This should have been repaired in table integrity repair phase");
2190           return;
2191         }
2192 
2193         LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2194         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2195         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2196             admin.getClusterStatus().getServers(), numReplicas);
2197         tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2198       }
2199 
2200     // ========== Cases where the region is in hbase:meta =============
2201     } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2202       // check whether this is an actual error, or just transient state where parent
2203       // is not cleaned
2204       if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2205         // check that split daughters are there
2206         HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2207         HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2208         if (infoA != null && infoB != null) {
2209           // we already processed or will process daughters. Move on, nothing to see here.
2210           hbi.setSkipChecks(true);
2211           return;
2212         }
2213       }
2214       errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2215           + descriptiveName + " is a split parent in META, in HDFS, "
2216           + "and not deployed on any region server. This could be transient.");
2217       if (shouldFixSplitParents()) {
2218         setShouldRerun();
2219         resetSplitParent(hbi);
2220       }
2221     } else if (inMeta && !inHdfs && !isDeployed) {
2222       errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2223           + descriptiveName + " found in META, but not in HDFS "
2224           + "or deployed on any region server.");
2225       if (shouldFixMeta()) {
2226         deleteMetaRegion(hbi);
2227       }
2228     } else if (inMeta && !inHdfs && isDeployed) {
2229       errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2230           + " found in META, but not in HDFS, " +
2231           "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2232       // We treat HDFS as ground truth.  Any information in meta is transient
2233       // and equivalent data can be regenerated.  So, lets unassign and remove
2234       // these problems from META.
2235       if (shouldFixAssignments()) {
2236         errors.print("Trying to fix unassigned region...");
2237         undeployRegions(hbi);
2238       }
2239       if (shouldFixMeta()) {
2240         // wait for it to complete
2241         deleteMetaRegion(hbi);
2242       }
2243     } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2244       errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2245           + " not deployed on any region server.");
2246       tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2247     } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2248       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2249           "Region " + descriptiveName + " should not be deployed according " +
2250           "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2251       if (shouldFixAssignments()) {
2252         errors.print("Trying to close the region " + descriptiveName);
2253         setShouldRerun();
2254         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2255       }
2256     } else if (inMeta && inHdfs && isMultiplyDeployed) {
2257       errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2258           + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2259           + " but is multiply assigned to region servers " +
2260           Joiner.on(", ").join(hbi.deployedOn));
2261       // If we are trying to fix the errors
2262       if (shouldFixAssignments()) {
2263         errors.print("Trying to fix assignment error...");
2264         setShouldRerun();
2265         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2266       }
2267     } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2268       errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2269           + descriptiveName + " listed in hbase:meta on region server " +
2270           hbi.metaEntry.regionServer + " but found on region server " +
2271           hbi.deployedOn.get(0));
2272       // If we are trying to fix the errors
2273       if (shouldFixAssignments()) {
2274         errors.print("Trying to fix assignment error...");
2275         setShouldRerun();
2276         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2277         HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2278       }
2279     } else {
2280       errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2281           " is in an unforeseen state:" +
2282           " inMeta=" + inMeta +
2283           " inHdfs=" + inHdfs +
2284           " isDeployed=" + isDeployed +
2285           " isMultiplyDeployed=" + isMultiplyDeployed +
2286           " deploymentMatchesMeta=" + deploymentMatchesMeta +
2287           " shouldBeDeployed=" + shouldBeDeployed);
2288     }
2289   }
2290 
2291   /**
2292    * Checks tables integrity. Goes over all regions and scans the tables.
2293    * Collects all the pieces for each table and checks if there are missing,
2294    * repeated or overlapping ones.
2295    * @throws IOException
2296    */
2297   SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2298     tablesInfo = new TreeMap<TableName,TableInfo> ();
2299     LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2300     for (HbckInfo hbi : regionInfoMap.values()) {
2301       // Check only valid, working regions
2302       if (hbi.metaEntry == null) {
2303         // this assumes that consistency check has run loadMetaEntry
2304         Path p = hbi.getHdfsRegionDir();
2305         if (p == null) {
2306           errors.report("No regioninfo in Meta or HDFS. " + hbi);
2307         }
2308 
2309         // TODO test.
2310         continue;
2311       }
2312       if (hbi.metaEntry.regionServer == null) {
2313         errors.detail("Skipping region because no region server: " + hbi);
2314         continue;
2315       }
2316       if (hbi.metaEntry.isOffline()) {
2317         errors.detail("Skipping region because it is offline: " + hbi);
2318         continue;
2319       }
2320       if (hbi.containsOnlyHdfsEdits()) {
2321         errors.detail("Skipping region because it only contains edits" + hbi);
2322         continue;
2323       }
2324 
2325       // Missing regionDir or over-deployment is checked elsewhere. Include
2326       // these cases in modTInfo, so we can evaluate those regions as part of
2327       // the region chain in META
2328       //if (hbi.foundRegionDir == null) continue;
2329       //if (hbi.deployedOn.size() != 1) continue;
2330       if (hbi.deployedOn.size() == 0) continue;
2331 
2332       // We should be safe here
2333       TableName tableName = hbi.metaEntry.getTable();
2334       TableInfo modTInfo = tablesInfo.get(tableName);
2335       if (modTInfo == null) {
2336         modTInfo = new TableInfo(tableName);
2337       }
2338       for (ServerName server : hbi.deployedOn) {
2339         modTInfo.addServer(server);
2340       }
2341 
2342       if (!hbi.isSkipChecks()) {
2343         modTInfo.addRegionInfo(hbi);
2344       }
2345 
2346       tablesInfo.put(tableName, modTInfo);
2347     }
2348 
2349     loadTableInfosForTablesWithNoRegion();
2350 
2351     for (TableInfo tInfo : tablesInfo.values()) {
2352       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2353       if (!tInfo.checkRegionChain(handler)) {
2354         errors.report("Found inconsistency in table " + tInfo.getName());
2355       }
2356     }
2357     return tablesInfo;
2358   }
2359 
2360   /** Loads table info's for tables that may not have been included, since there are no
2361    * regions reported for the table, but table dir is there in hdfs
2362    */
2363   private void loadTableInfosForTablesWithNoRegion() throws IOException {
2364     Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2365     for (HTableDescriptor htd : allTables.values()) {
2366       if (checkMetaOnly && !htd.isMetaTable()) {
2367         continue;
2368       }
2369 
2370       TableName tableName = htd.getTableName();
2371       if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2372         TableInfo tableInfo = new TableInfo(tableName);
2373         tableInfo.htds.add(htd);
2374         tablesInfo.put(htd.getTableName(), tableInfo);
2375       }
2376     }
2377   }
2378 
2379   /**
2380    * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2381    * @return number of file move fixes done to merge regions.
2382    */
2383   public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2384     int fileMoves = 0;
2385     String thread = Thread.currentThread().getName();
2386     LOG.debug("[" + thread + "] Contained region dir after close and pause");
2387     debugLsr(contained.getHdfsRegionDir());
2388 
2389     // rename the contained into the container.
2390     FileSystem fs = targetRegionDir.getFileSystem(getConf());
2391     FileStatus[] dirs = null;
2392     try {
2393       dirs = fs.listStatus(contained.getHdfsRegionDir());
2394     } catch (FileNotFoundException fnfe) {
2395       // region we are attempting to merge in is not present!  Since this is a merge, there is
2396       // no harm skipping this region if it does not exist.
2397       if (!fs.exists(contained.getHdfsRegionDir())) {
2398         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2399             + " is missing. Assuming already sidelined or moved.");
2400       } else {
2401         sidelineRegionDir(fs, contained);
2402       }
2403       return fileMoves;
2404     }
2405 
2406     if (dirs == null) {
2407       if (!fs.exists(contained.getHdfsRegionDir())) {
2408         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2409             + " already sidelined.");
2410       } else {
2411         sidelineRegionDir(fs, contained);
2412       }
2413       return fileMoves;
2414     }
2415 
2416     for (FileStatus cf : dirs) {
2417       Path src = cf.getPath();
2418       Path dst =  new Path(targetRegionDir, src.getName());
2419 
2420       if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2421         // do not copy the old .regioninfo file.
2422         continue;
2423       }
2424 
2425       if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2426         // do not copy the .oldlogs files
2427         continue;
2428       }
2429 
2430       LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2431       // FileSystem.rename is inconsistent with directories -- if the
2432       // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2433       // it moves the src into the dst dir resulting in (foo/a/b).  If
2434       // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2435       for (FileStatus hfile : fs.listStatus(src)) {
2436         boolean success = fs.rename(hfile.getPath(), dst);
2437         if (success) {
2438           fileMoves++;
2439         }
2440       }
2441       LOG.debug("[" + thread + "] Sideline directory contents:");
2442       debugLsr(targetRegionDir);
2443     }
2444 
2445     // if all success.
2446     sidelineRegionDir(fs, contained);
2447     LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2448         getSidelineDir());
2449     debugLsr(contained.getHdfsRegionDir());
2450 
2451     return fileMoves;
2452   }
2453 
2454 
2455   static class WorkItemOverlapMerge implements Callable<Void> {
2456     private TableIntegrityErrorHandler handler;
2457     Collection<HbckInfo> overlapgroup;
2458 
2459     WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2460       this.handler = handler;
2461       this.overlapgroup = overlapgroup;
2462     }
2463 
2464     @Override
2465     public Void call() throws Exception {
2466       handler.handleOverlapGroup(overlapgroup);
2467       return null;
2468     }
2469   };
2470 
2471 
2472   /**
2473    * Maintain information about a particular table.
2474    */
2475   public class TableInfo {
2476     TableName tableName;
2477     TreeSet <ServerName> deployedOn;
2478 
2479     // backwards regions
2480     final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2481 
2482     // sidelined big overlapped regions
2483     final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2484 
2485     // region split calculator
2486     final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2487 
2488     // Histogram of different HTableDescriptors found.  Ideally there is only one!
2489     final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2490 
2491     // key = start split, values = set of splits in problem group
2492     final Multimap<byte[], HbckInfo> overlapGroups =
2493       TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2494 
2495     // list of regions derived from meta entries.
2496     final List<HRegionInfo> regionsFromMeta = new ArrayList<HRegionInfo>();
2497 
2498     TableInfo(TableName name) {
2499       this.tableName = name;
2500       deployedOn = new TreeSet <ServerName>();
2501     }
2502 
2503     /**
2504      * @return descriptor common to all regions.  null if are none or multiple!
2505      */
2506     private HTableDescriptor getHTD() {
2507       if (htds.size() == 1) {
2508         return (HTableDescriptor)htds.toArray()[0];
2509       } else {
2510         LOG.error("None/Multiple table descriptors found for table '"
2511           + tableName + "' regions: " + htds);
2512       }
2513       return null;
2514     }
2515 
2516     public void addRegionInfo(HbckInfo hir) {
2517       if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2518         // end key is absolute end key, just add it.
2519         // ignore replicas other than primary for these checks
2520         if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2521         return;
2522       }
2523 
2524       // if not the absolute end key, check for cycle
2525       if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2526         errors.reportError(
2527             ERROR_CODE.REGION_CYCLE,
2528             String.format("The endkey for this region comes before the "
2529                 + "startkey, startkey=%s, endkey=%s",
2530                 Bytes.toStringBinary(hir.getStartKey()),
2531                 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2532         backwards.add(hir);
2533         return;
2534       }
2535 
2536       // main case, add to split calculator
2537       // ignore replicas other than primary for these checks
2538       if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2539     }
2540 
2541     public void addServer(ServerName server) {
2542       this.deployedOn.add(server);
2543     }
2544 
2545     public TableName getName() {
2546       return tableName;
2547     }
2548 
2549     public int getNumRegions() {
2550       return sc.getStarts().size() + backwards.size();
2551     }
2552 
2553     private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2554       ErrorReporter errors;
2555 
2556       IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2557         this.errors = errors;
2558         setTableInfo(ti);
2559       }
2560 
2561       @Override
2562       public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2563         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2564             "First region should start with an empty key.  You need to "
2565             + " create a new region and regioninfo in HDFS to plug the hole.",
2566             getTableInfo(), hi);
2567       }
2568 
2569       @Override
2570       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2571         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2572             "Last region should end with an empty key. You need to "
2573                 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2574       }
2575 
2576       @Override
2577       public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2578         errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2579             "Region has the same start and end key.", getTableInfo(), hi);
2580       }
2581 
2582       @Override
2583       public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2584         byte[] key = r1.getStartKey();
2585         // dup start key
2586         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2587             "Multiple regions have the same startkey: "
2588             + Bytes.toStringBinary(key), getTableInfo(), r1);
2589         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2590             "Multiple regions have the same startkey: "
2591             + Bytes.toStringBinary(key), getTableInfo(), r2);
2592       }
2593 
2594       @Override
2595       public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2596         errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2597             "There is an overlap in the region chain.",
2598             getTableInfo(), hi1, hi2);
2599       }
2600 
2601       @Override
2602       public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2603         errors.reportError(
2604             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2605             "There is a hole in the region chain between "
2606                 + Bytes.toStringBinary(holeStart) + " and "
2607                 + Bytes.toStringBinary(holeStop)
2608                 + ".  You need to create a new .regioninfo and region "
2609                 + "dir in hdfs to plug the hole.");
2610       }
2611     };
2612 
2613     /**
2614      * This handler fixes integrity errors from hdfs information.  There are
2615      * basically three classes of integrity problems 1) holes, 2) overlaps, and
2616      * 3) invalid regions.
2617      *
2618      * This class overrides methods that fix holes and the overlap group case.
2619      * Individual cases of particular overlaps are handled by the general
2620      * overlap group merge repair case.
2621      *
2622      * If hbase is online, this forces regions offline before doing merge
2623      * operations.
2624      */
2625     private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2626       Configuration conf;
2627 
2628       boolean fixOverlaps = true;
2629 
2630       HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2631           boolean fixHoles, boolean fixOverlaps) {
2632         super(ti, errors);
2633         this.conf = conf;
2634         this.fixOverlaps = fixOverlaps;
2635         // TODO properly use fixHoles
2636       }
2637 
2638       /**
2639        * This is a special case hole -- when the first region of a table is
2640        * missing from META, HBase doesn't acknowledge the existance of the
2641        * table.
2642        */
2643       @Override
2644       public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2645         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2646             "First region should start with an empty key.  Creating a new " +
2647             "region and regioninfo in HDFS to plug the hole.",
2648             getTableInfo(), next);
2649         HTableDescriptor htd = getTableInfo().getHTD();
2650         // from special EMPTY_START_ROW to next region's startKey
2651         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2652             HConstants.EMPTY_START_ROW, next.getStartKey());
2653 
2654         // TODO test
2655         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2656         LOG.info("Table region start key was not empty.  Created new empty region: "
2657             + newRegion + " " +region);
2658         fixes++;
2659       }
2660 
2661       @Override
2662       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2663         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2664             "Last region should end with an empty key.  Creating a new "
2665                 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2666         HTableDescriptor htd = getTableInfo().getHTD();
2667         // from curEndKey to EMPTY_START_ROW
2668         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2669             HConstants.EMPTY_START_ROW);
2670 
2671         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2672         LOG.info("Table region end key was not empty.  Created new empty region: " + newRegion
2673             + " " + region);
2674         fixes++;
2675       }
2676 
2677       /**
2678        * There is a hole in the hdfs regions that violates the table integrity
2679        * rules.  Create a new empty region that patches the hole.
2680        */
2681       @Override
2682       public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2683         errors.reportError(
2684             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2685             "There is a hole in the region chain between "
2686                 + Bytes.toStringBinary(holeStartKey) + " and "
2687                 + Bytes.toStringBinary(holeStopKey)
2688                 + ".  Creating a new regioninfo and region "
2689                 + "dir in hdfs to plug the hole.");
2690         HTableDescriptor htd = getTableInfo().getHTD();
2691         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2692         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2693         LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2694         fixes++;
2695       }
2696 
2697       /**
2698        * This takes set of overlapping regions and merges them into a single
2699        * region.  This covers cases like degenerate regions, shared start key,
2700        * general overlaps, duplicate ranges, and partial overlapping regions.
2701        *
2702        * Cases:
2703        * - Clean regions that overlap
2704        * - Only .oldlogs regions (can't find start/stop range, or figure out)
2705        *
2706        * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
2707        */
2708       @Override
2709       public void handleOverlapGroup(Collection<HbckInfo> overlap)
2710           throws IOException {
2711         Preconditions.checkNotNull(overlap);
2712         Preconditions.checkArgument(overlap.size() >0);
2713 
2714         if (!this.fixOverlaps) {
2715           LOG.warn("Not attempting to repair overlaps.");
2716           return;
2717         }
2718 
2719         if (overlap.size() > maxMerge) {
2720           LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2721             "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2722           if (sidelineBigOverlaps) {
2723             // we only sideline big overlapped groups that exceeds the max number of regions to merge
2724             sidelineBigOverlaps(overlap);
2725           }
2726           return;
2727         }
2728 
2729         mergeOverlaps(overlap);
2730       }
2731 
2732       void mergeOverlaps(Collection<HbckInfo> overlap)
2733           throws IOException {
2734         String thread = Thread.currentThread().getName();
2735         LOG.info("== [" + thread + "] Merging regions into one region: "
2736           + Joiner.on(",").join(overlap));
2737         // get the min / max range and close all concerned regions
2738         Pair<byte[], byte[]> range = null;
2739         for (HbckInfo hi : overlap) {
2740           if (range == null) {
2741             range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2742           } else {
2743             if (RegionSplitCalculator.BYTES_COMPARATOR
2744                 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2745               range.setFirst(hi.getStartKey());
2746             }
2747             if (RegionSplitCalculator.BYTES_COMPARATOR
2748                 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2749               range.setSecond(hi.getEndKey());
2750             }
2751           }
2752           // need to close files so delete can happen.
2753           LOG.debug("[" + thread + "] Closing region before moving data around: " +  hi);
2754           LOG.debug("[" + thread + "] Contained region dir before close");
2755           debugLsr(hi.getHdfsRegionDir());
2756           try {
2757             LOG.info("[" + thread + "] Closing region: " + hi);
2758             closeRegion(hi);
2759           } catch (IOException ioe) {
2760             LOG.warn("[" + thread + "] Was unable to close region " + hi
2761               + ".  Just continuing... ", ioe);
2762           } catch (InterruptedException e) {
2763             LOG.warn("[" + thread + "] Was unable to close region " + hi
2764               + ".  Just continuing... ", e);
2765           }
2766 
2767           try {
2768             LOG.info("[" + thread + "] Offlining region: " + hi);
2769             offline(hi.getRegionName());
2770           } catch (IOException ioe) {
2771             LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2772               + ".  Just continuing... ", ioe);
2773           }
2774         }
2775 
2776         // create new empty container region.
2777         HTableDescriptor htd = getTableInfo().getHTD();
2778         // from start key to end Key
2779         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2780             range.getSecond());
2781         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2782         LOG.info("[" + thread + "] Created new empty container region: " +
2783             newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2784         debugLsr(region.getRegionFileSystem().getRegionDir());
2785 
2786         // all target regions are closed, should be able to safely cleanup.
2787         boolean didFix= false;
2788         Path target = region.getRegionFileSystem().getRegionDir();
2789         for (HbckInfo contained : overlap) {
2790           LOG.info("[" + thread + "] Merging " + contained  + " into " + target );
2791           int merges = mergeRegionDirs(target, contained);
2792           if (merges > 0) {
2793             didFix = true;
2794           }
2795         }
2796         if (didFix) {
2797           fixes++;
2798         }
2799       }
2800 
2801       /**
2802        * Sideline some regions in a big overlap group so that it
2803        * will have fewer regions, and it is easier to merge them later on.
2804        *
2805        * @param bigOverlap the overlapped group with regions more than maxMerge
2806        * @throws IOException
2807        */
2808       void sidelineBigOverlaps(
2809           Collection<HbckInfo> bigOverlap) throws IOException {
2810         int overlapsToSideline = bigOverlap.size() - maxMerge;
2811         if (overlapsToSideline > maxOverlapsToSideline) {
2812           overlapsToSideline = maxOverlapsToSideline;
2813         }
2814         List<HbckInfo> regionsToSideline =
2815           RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2816         FileSystem fs = FileSystem.get(conf);
2817         for (HbckInfo regionToSideline: regionsToSideline) {
2818           try {
2819             LOG.info("Closing region: " + regionToSideline);
2820             closeRegion(regionToSideline);
2821           } catch (IOException ioe) {
2822             LOG.warn("Was unable to close region " + regionToSideline
2823               + ".  Just continuing... ", ioe);
2824           } catch (InterruptedException e) {
2825             LOG.warn("Was unable to close region " + regionToSideline
2826               + ".  Just continuing... ", e);
2827           }
2828 
2829           try {
2830             LOG.info("Offlining region: " + regionToSideline);
2831             offline(regionToSideline.getRegionName());
2832           } catch (IOException ioe) {
2833             LOG.warn("Unable to offline region from master: " + regionToSideline
2834               + ".  Just continuing... ", ioe);
2835           }
2836 
2837           LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2838           Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2839           if (sidelineRegionDir != null) {
2840             sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2841             LOG.info("After sidelined big overlapped region: "
2842               + regionToSideline.getRegionNameAsString()
2843               + " to " + sidelineRegionDir.toString());
2844             fixes++;
2845           }
2846         }
2847       }
2848     }
2849 
2850     /**
2851      * Check the region chain (from META) of this table.  We are looking for
2852      * holes, overlaps, and cycles.
2853      * @return false if there are errors
2854      * @throws IOException
2855      */
2856     public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2857       // When table is disabled no need to check for the region chain. Some of the regions
2858       // accidently if deployed, this below code might report some issues like missing start
2859       // or end regions or region hole in chain and may try to fix which is unwanted.
2860       if (isTableDisabled(this.tableName)) {
2861         return true;
2862       }
2863       int originalErrorsCount = errors.getErrorList().size();
2864       Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2865       SortedSet<byte[]> splits = sc.getSplits();
2866 
2867       byte[] prevKey = null;
2868       byte[] problemKey = null;
2869 
2870       if (splits.size() == 0) {
2871         // no region for this table
2872         handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
2873       }
2874 
2875       for (byte[] key : splits) {
2876         Collection<HbckInfo> ranges = regions.get(key);
2877         if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2878           for (HbckInfo rng : ranges) {
2879             handler.handleRegionStartKeyNotEmpty(rng);
2880           }
2881         }
2882 
2883         // check for degenerate ranges
2884         for (HbckInfo rng : ranges) {
2885           // special endkey case converts '' to null
2886           byte[] endKey = rng.getEndKey();
2887           endKey = (endKey.length == 0) ? null : endKey;
2888           if (Bytes.equals(rng.getStartKey(),endKey)) {
2889             handler.handleDegenerateRegion(rng);
2890           }
2891         }
2892 
2893         if (ranges.size() == 1) {
2894           // this split key is ok -- no overlap, not a hole.
2895           if (problemKey != null) {
2896             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2897           }
2898           problemKey = null; // fell through, no more problem.
2899         } else if (ranges.size() > 1) {
2900           // set the new problem key group name, if already have problem key, just
2901           // keep using it.
2902           if (problemKey == null) {
2903             // only for overlap regions.
2904             LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2905             problemKey = key;
2906           }
2907           overlapGroups.putAll(problemKey, ranges);
2908 
2909           // record errors
2910           ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
2911           //  this dumb and n^2 but this shouldn't happen often
2912           for (HbckInfo r1 : ranges) {
2913             if (r1.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
2914             subRange.remove(r1);
2915             for (HbckInfo r2 : subRange) {
2916               if (r2.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
2917               if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
2918                 handler.handleDuplicateStartKeys(r1,r2);
2919               } else {
2920                 // overlap
2921                 handler.handleOverlapInRegionChain(r1, r2);
2922               }
2923             }
2924           }
2925 
2926         } else if (ranges.size() == 0) {
2927           if (problemKey != null) {
2928             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2929           }
2930           problemKey = null;
2931 
2932           byte[] holeStopKey = sc.getSplits().higher(key);
2933           // if higher key is null we reached the top.
2934           if (holeStopKey != null) {
2935             // hole
2936             handler.handleHoleInRegionChain(key, holeStopKey);
2937           }
2938         }
2939         prevKey = key;
2940       }
2941 
2942       // When the last region of a table is proper and having an empty end key, 'prevKey'
2943       // will be null.
2944       if (prevKey != null) {
2945         handler.handleRegionEndKeyNotEmpty(prevKey);
2946       }
2947 
2948       // TODO fold this into the TableIntegrityHandler
2949       if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
2950         LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
2951             " false to run serially.");
2952         boolean ok = handleOverlapsParallel(handler, prevKey);
2953         if (!ok) {
2954           return false;
2955         }
2956       } else {
2957         LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
2958             " true to run in parallel.");
2959         for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2960           handler.handleOverlapGroup(overlap);
2961         }
2962       }
2963 
2964       if (details) {
2965         // do full region split map dump
2966         errors.print("---- Table '"  +  this.tableName
2967             + "': region split map");
2968         dump(splits, regions);
2969         errors.print("---- Table '"  +  this.tableName
2970             + "': overlap groups");
2971         dumpOverlapProblems(overlapGroups);
2972         errors.print("There are " + overlapGroups.keySet().size()
2973             + " overlap groups with " + overlapGroups.size()
2974             + " overlapping regions");
2975       }
2976       if (!sidelinedRegions.isEmpty()) {
2977         LOG.warn("Sidelined big overlapped regions, please bulk load them!");
2978         errors.print("---- Table '"  +  this.tableName
2979             + "': sidelined big overlapped regions");
2980         dumpSidelinedRegions(sidelinedRegions);
2981       }
2982       return errors.getErrorList().size() == originalErrorsCount;
2983     }
2984 
2985     private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
2986         throws IOException {
2987       // we parallelize overlap handler for the case we have lots of groups to fix.  We can
2988       // safely assume each group is independent.
2989       List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
2990       List<Future<Void>> rets;
2991       for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2992         //
2993         merges.add(new WorkItemOverlapMerge(overlap, handler));
2994       }
2995       try {
2996         rets = executor.invokeAll(merges);
2997       } catch (InterruptedException e) {
2998         LOG.error("Overlap merges were interrupted", e);
2999         return false;
3000       }
3001       for(int i=0; i<merges.size(); i++) {
3002         WorkItemOverlapMerge work = merges.get(i);
3003         Future<Void> f = rets.get(i);
3004         try {
3005           f.get();
3006         } catch(ExecutionException e) {
3007           LOG.warn("Failed to merge overlap group" + work, e.getCause());
3008         } catch (InterruptedException e) {
3009           LOG.error("Waiting for overlap merges was interrupted", e);
3010           return false;
3011         }
3012       }
3013       return true;
3014     }
3015 
3016     /**
3017      * This dumps data in a visually reasonable way for visual debugging
3018      *
3019      * @param splits
3020      * @param regions
3021      */
3022     void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
3023       // we display this way because the last end key should be displayed as well.
3024       StringBuilder sb = new StringBuilder();
3025       for (byte[] k : splits) {
3026         sb.setLength(0); // clear out existing buffer, if any.
3027         sb.append(Bytes.toStringBinary(k) + ":\t");
3028         for (HbckInfo r : regions.get(k)) {
3029           sb.append("[ "+ r.toString() + ", "
3030               + Bytes.toStringBinary(r.getEndKey())+ "]\t");
3031         }
3032         errors.print(sb.toString());
3033       }
3034     }
3035   }
3036 
3037   public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
3038     // we display this way because the last end key should be displayed as
3039     // well.
3040     for (byte[] k : regions.keySet()) {
3041       errors.print(Bytes.toStringBinary(k) + ":");
3042       for (HbckInfo r : regions.get(k)) {
3043         errors.print("[ " + r.toString() + ", "
3044             + Bytes.toStringBinary(r.getEndKey()) + "]");
3045       }
3046       errors.print("----");
3047     }
3048   }
3049 
3050   public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
3051     for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
3052       TableName tableName = entry.getValue().getTableName();
3053       Path path = entry.getKey();
3054       errors.print("This sidelined region dir should be bulk loaded: "
3055         + path.toString());
3056       errors.print("Bulk load command looks like: "
3057         + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
3058         + path.toUri().getPath() + " "+ tableName);
3059     }
3060   }
3061 
3062   public Multimap<byte[], HbckInfo> getOverlapGroups(
3063       TableName table) {
3064     TableInfo ti = tablesInfo.get(table);
3065     return ti.overlapGroups;
3066   }
3067 
3068   /**
3069    * Return a list of user-space table names whose metadata have not been
3070    * modified in the last few milliseconds specified by timelag
3071    * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
3072    * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
3073    * milliseconds specified by timelag, then the table is a candidate to be returned.
3074    * @return tables that have not been modified recently
3075    * @throws IOException if an error is encountered
3076    */
3077   HTableDescriptor[] getTables(AtomicInteger numSkipped) {
3078     List<TableName> tableNames = new ArrayList<TableName>();
3079     long now = System.currentTimeMillis();
3080 
3081     for (HbckInfo hbi : regionInfoMap.values()) {
3082       MetaEntry info = hbi.metaEntry;
3083 
3084       // if the start key is zero, then we have found the first region of a table.
3085       // pick only those tables that were not modified in the last few milliseconds.
3086       if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
3087         if (info.modTime + timelag < now) {
3088           tableNames.add(info.getTable());
3089         } else {
3090           numSkipped.incrementAndGet(); // one more in-flux table
3091         }
3092       }
3093     }
3094     return getHTableDescriptors(tableNames);
3095   }
3096 
3097   HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
3098     HTableDescriptor[] htd = new HTableDescriptor[0];
3099     Admin admin = null;
3100     try {
3101       LOG.info("getHTableDescriptors == tableNames => " + tableNames);
3102       admin = new HBaseAdmin(getConf());
3103       htd = admin.getTableDescriptorsByTableName(tableNames);
3104     } catch (IOException e) {
3105       LOG.debug("Exception getting table descriptors", e);
3106     } finally {
3107       if (admin != null) {
3108         try {
3109           admin.close();
3110         } catch (IOException e) {
3111           LOG.debug("Exception closing HBaseAdmin", e);
3112         }
3113       }
3114     }
3115     return htd;
3116   }
3117 
3118   /**
3119    * Gets the entry in regionInfo corresponding to the the given encoded
3120    * region name. If the region has not been seen yet, a new entry is added
3121    * and returned.
3122    */
3123   private synchronized HbckInfo getOrCreateInfo(String name) {
3124     HbckInfo hbi = regionInfoMap.get(name);
3125     if (hbi == null) {
3126       hbi = new HbckInfo(null);
3127       regionInfoMap.put(name, hbi);
3128     }
3129     return hbi;
3130   }
3131 
3132   private void checkAndFixTableLocks() throws IOException {
3133     TableLockChecker checker = new TableLockChecker(createZooKeeperWatcher(), errors);
3134     checker.checkTableLocks();
3135 
3136     if (this.fixTableLocks) {
3137       checker.fixExpiredTableLocks();
3138     }
3139   }
3140 
3141   /**
3142     * Check values in regionInfo for hbase:meta
3143     * Check if zero or more than one regions with hbase:meta are found.
3144     * If there are inconsistencies (i.e. zero or more than one regions
3145     * pretend to be holding the hbase:meta) try to fix that and report an error.
3146     * @throws IOException from HBaseFsckRepair functions
3147     * @throws KeeperException
3148     * @throws InterruptedException
3149     */
3150   boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3151     Map<Integer, HbckInfo> metaRegions = new HashMap<Integer, HbckInfo>();
3152     for (HbckInfo value : regionInfoMap.values()) {
3153       if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3154         metaRegions.put(value.getReplicaId(), value);
3155       }
3156     }
3157     int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME)
3158         .getRegionReplication();
3159     boolean noProblem = true;
3160     // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
3161     // Check the deployed servers. It should be exactly one server for each replica.
3162     for (int i = 0; i < metaReplication; i++) {
3163       HbckInfo metaHbckInfo = metaRegions.remove(i);
3164       List<ServerName> servers = new ArrayList<ServerName>();
3165       if (metaHbckInfo != null) {
3166         servers = metaHbckInfo.deployedOn;
3167       }
3168       if (servers.size() != 1) {
3169         noProblem = false;
3170         if (servers.size() == 0) {
3171           assignMetaReplica(i);
3172         } else if (servers.size() > 1) {
3173           errors
3174           .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
3175                        metaHbckInfo.getReplicaId() + " is found on more than one region.");
3176           if (shouldFixAssignments()) {
3177             errors.print("Trying to fix a problem with hbase:meta, replicaId " +
3178                          metaHbckInfo.getReplicaId() +"..");
3179             setShouldRerun();
3180             // try fix it (treat is a dupe assignment)
3181             HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers);
3182           }
3183         }
3184       }
3185     }
3186     // unassign whatever is remaining in metaRegions. They are excess replicas.
3187     for (Map.Entry<Integer, HbckInfo> entry : metaRegions.entrySet()) {
3188       noProblem = false;
3189       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
3190           "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
3191           ", deployed " + metaRegions.size());
3192       if (shouldFixAssignments()) {
3193         errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
3194             " of hbase:meta..");
3195         setShouldRerun();
3196         unassignMetaReplica(entry.getValue());
3197       }
3198     }
3199     // if noProblem is false, rerun hbck with hopefully fixed META
3200     // if noProblem is true, no errors, so continue normally
3201     return noProblem;
3202   }
3203 
3204   private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException,
3205   KeeperException {
3206     undeployRegions(hi);
3207     ZooKeeperWatcher zkw = createZooKeeperWatcher();
3208     ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId()));
3209   }
3210 
3211   private void assignMetaReplica(int replicaId)
3212       throws IOException, KeeperException, InterruptedException {
3213     errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
3214         replicaId +" is not found on any region.");
3215     if (shouldFixAssignments()) {
3216       errors.print("Trying to fix a problem with hbase:meta..");
3217       setShouldRerun();
3218       // try to fix it (treat it as unassigned region)
3219       HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
3220           HRegionInfo.FIRST_META_REGIONINFO, replicaId);
3221       HBaseFsckRepair.fixUnassigned(admin, h);
3222       HBaseFsckRepair.waitUntilAssigned(admin, h);
3223     }
3224   }
3225 
3226   /**
3227    * Scan hbase:meta, adding all regions found to the regionInfo map.
3228    * @throws IOException if an error is encountered
3229    */
3230   boolean loadMetaEntries() throws IOException {
3231     MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() {
3232       int countRecord = 1;
3233 
3234       // comparator to sort KeyValues with latest modtime
3235       final Comparator<Cell> comp = new Comparator<Cell>() {
3236         @Override
3237         public int compare(Cell k1, Cell k2) {
3238           return (int)(k1.getTimestamp() - k2.getTimestamp());
3239         }
3240       };
3241 
3242       @Override
3243       public boolean visit(Result result) throws IOException {
3244         try {
3245 
3246           // record the latest modification of this META record
3247           long ts =  Collections.max(result.listCells(), comp).getTimestamp();
3248           RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
3249           if (rl == null) {
3250             emptyRegionInfoQualifiers.add(result);
3251             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3252               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3253             return true;
3254           }
3255           ServerName sn = null;
3256           if (rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID) == null ||
3257               rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) {
3258             emptyRegionInfoQualifiers.add(result);
3259             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3260               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3261             return true;
3262           }
3263           HRegionInfo hri = rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo();
3264           if (!(isTableIncluded(hri.getTable())
3265               || hri.isMetaRegion())) {
3266             return true;
3267           }
3268           PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
3269           for (HRegionLocation h : rl.getRegionLocations()) {
3270             if (h == null || h.getRegionInfo() == null) {
3271               continue;
3272             }
3273             sn = h.getServerName();
3274             hri = h.getRegionInfo();
3275 
3276             MetaEntry m = null;
3277             if (hri.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
3278               m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3279             } else {
3280               m = new MetaEntry(hri, sn, ts, null, null);
3281             }
3282             HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3283             if (previous == null) {
3284               regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3285             } else if (previous.metaEntry == null) {
3286               previous.metaEntry = m;
3287             } else {
3288               throw new IOException("Two entries in hbase:meta are same " + previous);
3289             }
3290           }
3291           PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result);
3292           for (HRegionInfo mergeRegion : new HRegionInfo[] {
3293               mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3294             if (mergeRegion != null) {
3295               // This region is already been merged
3296               HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3297               hbInfo.setMerged(true);
3298             }
3299           }
3300 
3301           // show proof of progress to the user, once for every 100 records.
3302           if (countRecord % 100 == 0) {
3303             errors.progress();
3304           }
3305           countRecord++;
3306           return true;
3307         } catch (RuntimeException e) {
3308           LOG.error("Result=" + result);
3309           throw e;
3310         }
3311       }
3312     };
3313     if (!checkMetaOnly) {
3314       // Scan hbase:meta to pick up user regions
3315       MetaTableAccessor.fullScanRegions(connection, visitor);
3316     }
3317 
3318     errors.print("");
3319     return true;
3320   }
3321 
3322   /**
3323    * Stores the regioninfo entries scanned from META
3324    */
3325   static class MetaEntry extends HRegionInfo {
3326     ServerName regionServer;   // server hosting this region
3327     long modTime;          // timestamp of most recent modification metadata
3328     HRegionInfo splitA, splitB; //split daughters
3329 
3330     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
3331       this(rinfo, regionServer, modTime, null, null);
3332     }
3333 
3334     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
3335         HRegionInfo splitA, HRegionInfo splitB) {
3336       super(rinfo);
3337       this.regionServer = regionServer;
3338       this.modTime = modTime;
3339       this.splitA = splitA;
3340       this.splitB = splitB;
3341     }
3342 
3343     @Override
3344     public boolean equals(Object o) {
3345       boolean superEq = super.equals(o);
3346       if (!superEq) {
3347         return superEq;
3348       }
3349 
3350       MetaEntry me = (MetaEntry) o;
3351       if (!regionServer.equals(me.regionServer)) {
3352         return false;
3353       }
3354       return (modTime == me.modTime);
3355     }
3356 
3357     @Override
3358     public int hashCode() {
3359       int hash = Arrays.hashCode(getRegionName());
3360       hash ^= getRegionId();
3361       hash ^= Arrays.hashCode(getStartKey());
3362       hash ^= Arrays.hashCode(getEndKey());
3363       hash ^= Boolean.valueOf(isOffline()).hashCode();
3364       hash ^= getTable().hashCode();
3365       if (regionServer != null) {
3366         hash ^= regionServer.hashCode();
3367       }
3368       hash ^= modTime;
3369       return hash;
3370     }
3371   }
3372 
3373   /**
3374    * Stores the regioninfo entries from HDFS
3375    */
3376   static class HdfsEntry {
3377     HRegionInfo hri;
3378     Path hdfsRegionDir = null;
3379     long hdfsRegionDirModTime  = 0;
3380     boolean hdfsRegioninfoFilePresent = false;
3381     boolean hdfsOnlyEdits = false;
3382   }
3383 
3384   /**
3385    * Stores the regioninfo retrieved from Online region servers.
3386    */
3387   static class OnlineEntry {
3388     HRegionInfo hri;
3389     ServerName hsa;
3390 
3391     @Override
3392     public String toString() {
3393       return hsa.toString() + ";" + hri.getRegionNameAsString();
3394     }
3395   }
3396 
3397   /**
3398    * Maintain information about a particular region.  It gathers information
3399    * from three places -- HDFS, META, and region servers.
3400    */
3401   public static class HbckInfo implements KeyRange {
3402     private MetaEntry metaEntry = null; // info in META
3403     private HdfsEntry hdfsEntry = null; // info in HDFS
3404     private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
3405     private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
3406     private boolean skipChecks = false; // whether to skip further checks to this region info.
3407     private boolean isMerged = false;// whether this region has already been merged into another one
3408     private int deployedReplicaId = HRegionInfo.DEFAULT_REPLICA_ID;
3409     private HRegionInfo primaryHRIForDeployedReplica = null;
3410 
3411     HbckInfo(MetaEntry metaEntry) {
3412       this.metaEntry = metaEntry;
3413     }
3414 
3415     public int getReplicaId() {
3416       if (metaEntry != null) return metaEntry.getReplicaId();
3417       return deployedReplicaId;
3418     }
3419 
3420     public synchronized void addServer(HRegionInfo hri, ServerName server) {
3421       OnlineEntry rse = new OnlineEntry() ;
3422       rse.hri = hri;
3423       rse.hsa = server;
3424       this.deployedEntries.add(rse);
3425       this.deployedOn.add(server);
3426       // save the replicaId that we see deployed in the cluster
3427       this.deployedReplicaId = hri.getReplicaId();
3428       this.primaryHRIForDeployedReplica =
3429           RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
3430     }
3431 
3432     @Override
3433     public synchronized String toString() {
3434       StringBuilder sb = new StringBuilder();
3435       sb.append("{ meta => ");
3436       sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3437       sb.append( ", hdfs => " + getHdfsRegionDir());
3438       sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3439       sb.append( ", replicaId => " + getReplicaId());
3440       sb.append(" }");
3441       return sb.toString();
3442     }
3443 
3444     @Override
3445     public byte[] getStartKey() {
3446       if (this.metaEntry != null) {
3447         return this.metaEntry.getStartKey();
3448       } else if (this.hdfsEntry != null) {
3449         return this.hdfsEntry.hri.getStartKey();
3450       } else {
3451         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3452         return null;
3453       }
3454     }
3455 
3456     @Override
3457     public byte[] getEndKey() {
3458       if (this.metaEntry != null) {
3459         return this.metaEntry.getEndKey();
3460       } else if (this.hdfsEntry != null) {
3461         return this.hdfsEntry.hri.getEndKey();
3462       } else {
3463         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3464         return null;
3465       }
3466     }
3467 
3468     public TableName getTableName() {
3469       if (this.metaEntry != null) {
3470         return this.metaEntry.getTable();
3471       } else if (this.hdfsEntry != null) {
3472         // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3473         // so we get the name from the Path
3474         Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3475         return FSUtils.getTableName(tableDir);
3476       } else {
3477         // return the info from the first online/deployed hri
3478         for (OnlineEntry e : deployedEntries) {
3479           return e.hri.getTable();
3480         }
3481         return null;
3482       }
3483     }
3484 
3485     public String getRegionNameAsString() {
3486       if (metaEntry != null) {
3487         return metaEntry.getRegionNameAsString();
3488       } else if (hdfsEntry != null) {
3489         if (hdfsEntry.hri != null) {
3490           return hdfsEntry.hri.getRegionNameAsString();
3491         }
3492       } else {
3493         // return the info from the first online/deployed hri
3494         for (OnlineEntry e : deployedEntries) {
3495           return e.hri.getRegionNameAsString();
3496         }
3497       }
3498       return null;
3499     }
3500 
3501     public byte[] getRegionName() {
3502       if (metaEntry != null) {
3503         return metaEntry.getRegionName();
3504       } else if (hdfsEntry != null) {
3505         return hdfsEntry.hri.getRegionName();
3506       } else {
3507         // return the info from the first online/deployed hri
3508         for (OnlineEntry e : deployedEntries) {
3509           return e.hri.getRegionName();
3510         }
3511         return null;
3512       }
3513     }
3514 
3515     public HRegionInfo getPrimaryHRIForDeployedReplica() {
3516       return primaryHRIForDeployedReplica;
3517     }
3518 
3519     Path getHdfsRegionDir() {
3520       if (hdfsEntry == null) {
3521         return null;
3522       }
3523       return hdfsEntry.hdfsRegionDir;
3524     }
3525 
3526     boolean containsOnlyHdfsEdits() {
3527       if (hdfsEntry == null) {
3528         return false;
3529       }
3530       return hdfsEntry.hdfsOnlyEdits;
3531     }
3532 
3533     boolean isHdfsRegioninfoPresent() {
3534       if (hdfsEntry == null) {
3535         return false;
3536       }
3537       return hdfsEntry.hdfsRegioninfoFilePresent;
3538     }
3539 
3540     long getModTime() {
3541       if (hdfsEntry == null) {
3542         return 0;
3543       }
3544       return hdfsEntry.hdfsRegionDirModTime;
3545     }
3546 
3547     HRegionInfo getHdfsHRI() {
3548       if (hdfsEntry == null) {
3549         return null;
3550       }
3551       return hdfsEntry.hri;
3552     }
3553 
3554     public void setSkipChecks(boolean skipChecks) {
3555       this.skipChecks = skipChecks;
3556     }
3557 
3558     public boolean isSkipChecks() {
3559       return skipChecks;
3560     }
3561 
3562     public void setMerged(boolean isMerged) {
3563       this.isMerged = isMerged;
3564     }
3565 
3566     public boolean isMerged() {
3567       return this.isMerged;
3568     }
3569   }
3570 
3571   final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3572     @Override
3573     public int compare(HbckInfo l, HbckInfo r) {
3574       if (l == r) {
3575         // same instance
3576         return 0;
3577       }
3578 
3579       int tableCompare = l.getTableName().compareTo(r.getTableName());
3580       if (tableCompare != 0) {
3581         return tableCompare;
3582       }
3583 
3584       int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3585           l.getStartKey(), r.getStartKey());
3586       if (startComparison != 0) {
3587         return startComparison;
3588       }
3589 
3590       // Special case for absolute endkey
3591       byte[] endKey = r.getEndKey();
3592       endKey = (endKey.length == 0) ? null : endKey;
3593       byte[] endKey2 = l.getEndKey();
3594       endKey2 = (endKey2.length == 0) ? null : endKey2;
3595       int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3596           endKey2,  endKey);
3597 
3598       if (endComparison != 0) {
3599         return endComparison;
3600       }
3601 
3602       // use regionId as tiebreaker.
3603       // Null is considered after all possible values so make it bigger.
3604       if (l.hdfsEntry == null && r.hdfsEntry == null) {
3605         return 0;
3606       }
3607       if (l.hdfsEntry == null && r.hdfsEntry != null) {
3608         return 1;
3609       }
3610       // l.hdfsEntry must not be null
3611       if (r.hdfsEntry == null) {
3612         return -1;
3613       }
3614       // both l.hdfsEntry and r.hdfsEntry must not be null.
3615       return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3616     }
3617   };
3618 
3619   /**
3620    * Prints summary of all tables found on the system.
3621    */
3622   private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
3623     StringBuilder sb = new StringBuilder();
3624     errors.print("Summary:");
3625     for (TableInfo tInfo : tablesInfo.values()) {
3626       if (errors.tableHasErrors(tInfo)) {
3627         errors.print("Table " + tInfo.getName() + " is inconsistent.");
3628       } else {
3629         errors.print("  " + tInfo.getName() + " is okay.");
3630       }
3631       errors.print("    Number of regions: " + tInfo.getNumRegions());
3632       sb.setLength(0); // clear out existing buffer, if any.
3633       sb.append("    Deployed on: ");
3634       for (ServerName server : tInfo.deployedOn) {
3635         sb.append(" " + server.toString());
3636       }
3637       errors.print(sb.toString());
3638     }
3639   }
3640 
3641   static ErrorReporter getErrorReporter(
3642       final Configuration conf) throws ClassNotFoundException {
3643     Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3644     return ReflectionUtils.newInstance(reporter, conf);
3645   }
3646 
3647   public interface ErrorReporter {
3648     enum ERROR_CODE {
3649       UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3650       NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META,
3651       NOT_DEPLOYED,
3652       MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3653       FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3654       HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3655       ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3656       WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR, ORPHAN_TABLE_STATE,
3657       NO_TABLE_STATE
3658     }
3659     void clear();
3660     void report(String message);
3661     void reportError(String message);
3662     void reportError(ERROR_CODE errorCode, String message);
3663     void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3664     void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3665     void reportError(
3666       ERROR_CODE errorCode,
3667       String message,
3668       TableInfo table,
3669       HbckInfo info1,
3670       HbckInfo info2
3671     );
3672     int summarize();
3673     void detail(String details);
3674     ArrayList<ERROR_CODE> getErrorList();
3675     void progress();
3676     void print(String message);
3677     void resetErrors();
3678     boolean tableHasErrors(TableInfo table);
3679   }
3680 
3681   static class PrintingErrorReporter implements ErrorReporter {
3682     public int errorCount = 0;
3683     private int showProgress;
3684 
3685     Set<TableInfo> errorTables = new HashSet<TableInfo>();
3686 
3687     // for use by unit tests to verify which errors were discovered
3688     private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3689 
3690     @Override
3691     public void clear() {
3692       errorTables.clear();
3693       errorList.clear();
3694       errorCount = 0;
3695     }
3696 
3697     @Override
3698     public synchronized void reportError(ERROR_CODE errorCode, String message) {
3699       if (errorCode == ERROR_CODE.WRONG_USAGE) {
3700         System.err.println(message);
3701         return;
3702       }
3703 
3704       errorList.add(errorCode);
3705       if (!summary) {
3706         System.out.println("ERROR: " + message);
3707       }
3708       errorCount++;
3709       showProgress = 0;
3710     }
3711 
3712     @Override
3713     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3714       errorTables.add(table);
3715       reportError(errorCode, message);
3716     }
3717 
3718     @Override
3719     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3720                                          HbckInfo info) {
3721       errorTables.add(table);
3722       String reference = "(region " + info.getRegionNameAsString() + ")";
3723       reportError(errorCode, reference + " " + message);
3724     }
3725 
3726     @Override
3727     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3728                                          HbckInfo info1, HbckInfo info2) {
3729       errorTables.add(table);
3730       String reference = "(regions " + info1.getRegionNameAsString()
3731           + " and " + info2.getRegionNameAsString() + ")";
3732       reportError(errorCode, reference + " " + message);
3733     }
3734 
3735     @Override
3736     public synchronized void reportError(String message) {
3737       reportError(ERROR_CODE.UNKNOWN, message);
3738     }
3739 
3740     /**
3741      * Report error information, but do not increment the error count.  Intended for cases
3742      * where the actual error would have been reported previously.
3743      * @param message
3744      */
3745     @Override
3746     public synchronized void report(String message) {
3747       if (! summary) {
3748         System.out.println("ERROR: " + message);
3749       }
3750       showProgress = 0;
3751     }
3752 
3753     @Override
3754     public synchronized int summarize() {
3755       System.out.println(Integer.toString(errorCount) +
3756                          " inconsistencies detected.");
3757       if (errorCount == 0) {
3758         System.out.println("Status: OK");
3759         return 0;
3760       } else {
3761         System.out.println("Status: INCONSISTENT");
3762         return -1;
3763       }
3764     }
3765 
3766     @Override
3767     public ArrayList<ERROR_CODE> getErrorList() {
3768       return errorList;
3769     }
3770 
3771     @Override
3772     public synchronized void print(String message) {
3773       if (!summary) {
3774         System.out.println(message);
3775       }
3776     }
3777 
3778     @Override
3779     public boolean tableHasErrors(TableInfo table) {
3780       return errorTables.contains(table);
3781     }
3782 
3783     @Override
3784     public void resetErrors() {
3785       errorCount = 0;
3786     }
3787 
3788     @Override
3789     public synchronized void detail(String message) {
3790       if (details) {
3791         System.out.println(message);
3792       }
3793       showProgress = 0;
3794     }
3795 
3796     @Override
3797     public synchronized void progress() {
3798       if (showProgress++ == 10) {
3799         if (!summary) {
3800           System.out.print(".");
3801         }
3802         showProgress = 0;
3803       }
3804     }
3805   }
3806 
3807   /**
3808    * Contact a region server and get all information from it
3809    */
3810   static class WorkItemRegion implements Callable<Void> {
3811     private HBaseFsck hbck;
3812     private ServerName rsinfo;
3813     private ErrorReporter errors;
3814     private HConnection connection;
3815 
3816     WorkItemRegion(HBaseFsck hbck, ServerName info,
3817                    ErrorReporter errors, HConnection connection) {
3818       this.hbck = hbck;
3819       this.rsinfo = info;
3820       this.errors = errors;
3821       this.connection = connection;
3822     }
3823 
3824     @Override
3825     public synchronized Void call() throws IOException {
3826       errors.progress();
3827       try {
3828         BlockingInterface server = connection.getAdmin(rsinfo);
3829 
3830         // list all online regions from this region server
3831         List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
3832         regions = filterRegions(regions);
3833 
3834         if (details) {
3835           errors.detail("RegionServer: " + rsinfo.getServerName() +
3836                            " number of regions: " + regions.size());
3837           for (HRegionInfo rinfo: regions) {
3838             errors.detail("  " + rinfo.getRegionNameAsString() +
3839                              " id: " + rinfo.getRegionId() +
3840                              " encoded_name: " + rinfo.getEncodedName() +
3841                              " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3842                              " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3843           }
3844         }
3845 
3846         // check to see if the existence of this region matches the region in META
3847         for (HRegionInfo r:regions) {
3848           HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3849           hbi.addServer(r, rsinfo);
3850         }
3851       } catch (IOException e) {          // unable to connect to the region server.
3852         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3853           " Unable to fetch region information. " + e);
3854         throw e;
3855       }
3856       return null;
3857     }
3858 
3859     private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
3860       List<HRegionInfo> ret = Lists.newArrayList();
3861       for (HRegionInfo hri : regions) {
3862         if (hri.isMetaTable() || (!hbck.checkMetaOnly
3863             && hbck.isTableIncluded(hri.getTable()))) {
3864           ret.add(hri);
3865         }
3866       }
3867       return ret;
3868     }
3869   }
3870 
3871   /**
3872    * Contact hdfs and get all information about specified table directory into
3873    * regioninfo list.
3874    */
3875   static class WorkItemHdfsDir implements Callable<Void> {
3876     private HBaseFsck hbck;
3877     private FileStatus tableDir;
3878     private ErrorReporter errors;
3879     private FileSystem fs;
3880 
3881     WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
3882                     FileStatus status) {
3883       this.hbck = hbck;
3884       this.fs = fs;
3885       this.tableDir = status;
3886       this.errors = errors;
3887     }
3888 
3889     @Override
3890     public synchronized Void call() throws IOException {
3891       try {
3892         // level 2: <HBASE_DIR>/<table>/*
3893         FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
3894         for (FileStatus regionDir : regionDirs) {
3895           String encodedName = regionDir.getPath().getName();
3896           // ignore directories that aren't hexadecimal
3897           if (!encodedName.toLowerCase().matches("[0-9a-f]+")) {
3898             continue;
3899           }
3900 
3901           LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
3902           HbckInfo hbi = hbck.getOrCreateInfo(encodedName);
3903           HdfsEntry he = new HdfsEntry();
3904           synchronized (hbi) {
3905             if (hbi.getHdfsRegionDir() != null) {
3906               errors.print("Directory " + encodedName + " duplicate??" +
3907                            hbi.getHdfsRegionDir());
3908             }
3909 
3910             he.hdfsRegionDir = regionDir.getPath();
3911             he.hdfsRegionDirModTime = regionDir.getModificationTime();
3912             Path regioninfoFile = new Path(he.hdfsRegionDir, HRegionFileSystem.REGION_INFO_FILE);
3913             he.hdfsRegioninfoFilePresent = fs.exists(regioninfoFile);
3914             // we add to orphan list when we attempt to read .regioninfo
3915 
3916             // Set a flag if this region contains only edits
3917             // This is special case if a region is left after split
3918             he.hdfsOnlyEdits = true;
3919             FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
3920             Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath());
3921             for (FileStatus subDir : subDirs) {
3922               String sdName = subDir.getPath().getName();
3923               if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
3924                 he.hdfsOnlyEdits = false;
3925                 break;
3926               }
3927             }
3928             hbi.hdfsEntry = he;
3929           }
3930         }
3931       } catch (IOException e) {
3932         // unable to connect to the region server.
3933         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
3934             + tableDir.getPath().getName()
3935             + " Unable to fetch region information. " + e);
3936         throw e;
3937       }
3938       return null;
3939     }
3940   }
3941 
3942   /**
3943    * Contact hdfs and get all information about specified table directory into
3944    * regioninfo list.
3945    */
3946   static class WorkItemHdfsRegionInfo implements Callable<Void> {
3947     private HbckInfo hbi;
3948     private HBaseFsck hbck;
3949     private ErrorReporter errors;
3950 
3951     WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
3952       this.hbi = hbi;
3953       this.hbck = hbck;
3954       this.errors = errors;
3955     }
3956 
3957     @Override
3958     public synchronized Void call() throws IOException {
3959       // only load entries that haven't been loaded yet.
3960       if (hbi.getHdfsHRI() == null) {
3961         try {
3962           hbck.loadHdfsRegioninfo(hbi);
3963         } catch (IOException ioe) {
3964           String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3965               + hbi.getTableName() + " in hdfs dir "
3966               + hbi.getHdfsRegionDir()
3967               + "!  It may be an invalid format or version file.  Treating as "
3968               + "an orphaned regiondir.";
3969           errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3970           try {
3971             hbck.debugLsr(hbi.getHdfsRegionDir());
3972           } catch (IOException ioe2) {
3973             LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3974             throw ioe2;
3975           }
3976           hbck.orphanHdfsDirs.add(hbi);
3977           throw ioe;
3978         }
3979       }
3980       return null;
3981     }
3982   };
3983 
3984   /**
3985    * Display the full report from fsck. This displays all live and dead region
3986    * servers, and all known regions.
3987    */
3988   public void setDisplayFullReport() {
3989     details = true;
3990   }
3991 
3992   /**
3993    * Set summary mode.
3994    * Print only summary of the tables and status (OK or INCONSISTENT)
3995    */
3996   void setSummary() {
3997     summary = true;
3998   }
3999 
4000   /**
4001    * Set hbase:meta check mode.
4002    * Print only info about hbase:meta table deployment/state
4003    */
4004   void setCheckMetaOnly() {
4005     checkMetaOnly = true;
4006   }
4007 
4008   /**
4009    * Set region boundaries check mode.
4010    */
4011   void setRegionBoundariesCheck() {
4012     checkRegionBoundaries = true;
4013   }
4014 
4015   /**
4016    * Set table locks fix mode.
4017    * Delete table locks held for a long time
4018    */
4019   public void setFixTableLocks(boolean shouldFix) {
4020     fixTableLocks = shouldFix;
4021     fixAny |= shouldFix;
4022   }
4023 
4024   /**
4025    * Check if we should rerun fsck again. This checks if we've tried to
4026    * fix something and we should rerun fsck tool again.
4027    * Display the full report from fsck. This displays all live and dead
4028    * region servers, and all known regions.
4029    */
4030   void setShouldRerun() {
4031     rerun = true;
4032   }
4033 
4034   boolean shouldRerun() {
4035     return rerun;
4036   }
4037 
4038   /**
4039    * Fix inconsistencies found by fsck. This should try to fix errors (if any)
4040    * found by fsck utility.
4041    */
4042   public void setFixAssignments(boolean shouldFix) {
4043     fixAssignments = shouldFix;
4044     fixAny |= shouldFix;
4045   }
4046 
4047   boolean shouldFixAssignments() {
4048     return fixAssignments;
4049   }
4050 
4051   public void setFixMeta(boolean shouldFix) {
4052     fixMeta = shouldFix;
4053     fixAny |= shouldFix;
4054   }
4055 
4056   boolean shouldFixMeta() {
4057     return fixMeta;
4058   }
4059 
4060   public void setFixEmptyMetaCells(boolean shouldFix) {
4061     fixEmptyMetaCells = shouldFix;
4062     fixAny |= shouldFix;
4063   }
4064 
4065   boolean shouldFixEmptyMetaCells() {
4066     return fixEmptyMetaCells;
4067   }
4068 
4069   public void setCheckHdfs(boolean checking) {
4070     checkHdfs = checking;
4071   }
4072 
4073   boolean shouldCheckHdfs() {
4074     return checkHdfs;
4075   }
4076 
4077   public void setFixHdfsHoles(boolean shouldFix) {
4078     fixHdfsHoles = shouldFix;
4079     fixAny |= shouldFix;
4080   }
4081 
4082   boolean shouldFixHdfsHoles() {
4083     return fixHdfsHoles;
4084   }
4085 
4086   public void setFixTableOrphans(boolean shouldFix) {
4087     fixTableOrphans = shouldFix;
4088     fixAny |= shouldFix;
4089   }
4090 
4091   boolean shouldFixTableOrphans() {
4092     return fixTableOrphans;
4093   }
4094 
4095   public void setFixHdfsOverlaps(boolean shouldFix) {
4096     fixHdfsOverlaps = shouldFix;
4097     fixAny |= shouldFix;
4098   }
4099 
4100   boolean shouldFixHdfsOverlaps() {
4101     return fixHdfsOverlaps;
4102   }
4103 
4104   public void setFixHdfsOrphans(boolean shouldFix) {
4105     fixHdfsOrphans = shouldFix;
4106     fixAny |= shouldFix;
4107   }
4108 
4109   boolean shouldFixHdfsOrphans() {
4110     return fixHdfsOrphans;
4111   }
4112 
4113   public void setFixVersionFile(boolean shouldFix) {
4114     fixVersionFile = shouldFix;
4115     fixAny |= shouldFix;
4116   }
4117 
4118   public boolean shouldFixVersionFile() {
4119     return fixVersionFile;
4120   }
4121 
4122   public void setSidelineBigOverlaps(boolean sbo) {
4123     this.sidelineBigOverlaps = sbo;
4124   }
4125 
4126   public boolean shouldSidelineBigOverlaps() {
4127     return sidelineBigOverlaps;
4128   }
4129 
4130   public void setFixSplitParents(boolean shouldFix) {
4131     fixSplitParents = shouldFix;
4132     fixAny |= shouldFix;
4133   }
4134 
4135   boolean shouldFixSplitParents() {
4136     return fixSplitParents;
4137   }
4138 
4139   public void setFixReferenceFiles(boolean shouldFix) {
4140     fixReferenceFiles = shouldFix;
4141     fixAny |= shouldFix;
4142   }
4143 
4144   boolean shouldFixReferenceFiles() {
4145     return fixReferenceFiles;
4146   }
4147 
4148   public boolean shouldIgnorePreCheckPermission() {
4149     return !fixAny || ignorePreCheckPermission;
4150   }
4151 
4152   public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
4153     this.ignorePreCheckPermission = ignorePreCheckPermission;
4154   }
4155 
4156   /**
4157    * @param mm maximum number of regions to merge into a single region.
4158    */
4159   public void setMaxMerge(int mm) {
4160     this.maxMerge = mm;
4161   }
4162 
4163   public int getMaxMerge() {
4164     return maxMerge;
4165   }
4166 
4167   public void setMaxOverlapsToSideline(int mo) {
4168     this.maxOverlapsToSideline = mo;
4169   }
4170 
4171   public int getMaxOverlapsToSideline() {
4172     return maxOverlapsToSideline;
4173   }
4174 
4175   /**
4176    * Only check/fix tables specified by the list,
4177    * Empty list means all tables are included.
4178    */
4179   boolean isTableIncluded(TableName table) {
4180     return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
4181   }
4182 
4183   public void includeTable(TableName table) {
4184     tablesIncluded.add(table);
4185   }
4186 
4187   Set<TableName> getIncludedTables() {
4188     return new HashSet<TableName>(tablesIncluded);
4189   }
4190 
4191   /**
4192    * We are interested in only those tables that have not changed their state in
4193    * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
4194    * @param seconds - the time in seconds
4195    */
4196   public void setTimeLag(long seconds) {
4197     timelag = seconds * 1000; // convert to milliseconds
4198   }
4199 
4200   /**
4201    *
4202    * @param sidelineDir - HDFS path to sideline data
4203    */
4204   public void setSidelineDir(String sidelineDir) {
4205     this.sidelineDir = new Path(sidelineDir);
4206   }
4207 
4208   protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4209     return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4210   }
4211 
4212   public HFileCorruptionChecker getHFilecorruptionChecker() {
4213     return hfcc;
4214   }
4215 
4216   public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4217     this.hfcc = hfcc;
4218   }
4219 
4220   public void setRetCode(int code) {
4221     this.retcode = code;
4222   }
4223 
4224   public int getRetCode() {
4225     return retcode;
4226   }
4227 
4228   protected HBaseFsck printUsageAndExit() {
4229     StringWriter sw = new StringWriter(2048);
4230     PrintWriter out = new PrintWriter(sw);
4231     out.println("Usage: fsck [opts] {only tables}");
4232     out.println(" where [opts] are:");
4233     out.println("   -help Display help options (this)");
4234     out.println("   -details Display full report of all regions.");
4235     out.println("   -timelag <timeInSeconds>  Process only regions that " +
4236                        " have not experienced any metadata updates in the last " +
4237                        " <timeInSeconds> seconds.");
4238     out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4239         " before checking if the fix worked if run with -fix");
4240     out.println("   -summary Print only summary of the tables and status.");
4241     out.println("   -metaonly Only check the state of the hbase:meta table.");
4242     out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4243     out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
4244 
4245     out.println("");
4246     out.println("  Metadata Repair options: (expert features, use with caution!)");
4247     out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
4248     out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
4249     out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
4250     out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
4251         + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4252     out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
4253     out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
4254     out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4255     out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
4256     out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
4257     out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4258     out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
4259     out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4260     out.println("   -fixSplitParents  Try to force offline split parents to be online.");
4261     out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
4262     out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
4263     out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
4264         + " (empty REGIONINFO_QUALIFIER rows)");
4265 
4266     out.println("");
4267     out.println("  Datafile Repair options: (expert features, use with caution!)");
4268     out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
4269     out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
4270 
4271     out.println("");
4272     out.println("  Metadata Repair shortcuts");
4273     out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4274         "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles -fixTableLocks");
4275     out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4276 
4277     out.println("");
4278     out.println("  Table lock options");
4279     out.println("   -fixTableLocks    Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
4280 
4281     out.flush();
4282     errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4283 
4284     setRetCode(-2);
4285     return this;
4286   }
4287 
4288   /**
4289    * Main program
4290    *
4291    * @param args
4292    * @throws Exception
4293    */
4294   public static void main(String[] args) throws Exception {
4295     // create a fsck object
4296     Configuration conf = HBaseConfiguration.create();
4297     Path hbasedir = FSUtils.getRootDir(conf);
4298     URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4299     FSUtils.setFsDefault(conf, new Path(defaultFs));
4300     int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4301     System.exit(ret);
4302   }
4303 
4304   /**
4305    * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
4306    */
4307   static class HBaseFsckTool extends Configured implements Tool {
4308     HBaseFsckTool(Configuration conf) { super(conf); }
4309     @Override
4310     public int run(String[] args) throws Exception {
4311       HBaseFsck hbck = new HBaseFsck(getConf());
4312       hbck.exec(hbck.executor, args);
4313       hbck.close();
4314       return hbck.getRetCode();
4315     }
4316   };
4317 
4318 
4319   public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
4320     ServiceException, InterruptedException {
4321     long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4322 
4323     boolean checkCorruptHFiles = false;
4324     boolean sidelineCorruptHFiles = false;
4325 
4326     // Process command-line args.
4327     for (int i = 0; i < args.length; i++) {
4328       String cmd = args[i];
4329       if (cmd.equals("-help") || cmd.equals("-h")) {
4330         return printUsageAndExit();
4331       } else if (cmd.equals("-details")) {
4332         setDisplayFullReport();
4333       } else if (cmd.equals("-timelag")) {
4334         if (i == args.length - 1) {
4335           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4336           return printUsageAndExit();
4337         }
4338         try {
4339           long timelag = Long.parseLong(args[i+1]);
4340           setTimeLag(timelag);
4341         } catch (NumberFormatException e) {
4342           errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4343           return printUsageAndExit();
4344         }
4345         i++;
4346       } else if (cmd.equals("-sleepBeforeRerun")) {
4347         if (i == args.length - 1) {
4348           errors.reportError(ERROR_CODE.WRONG_USAGE,
4349             "HBaseFsck: -sleepBeforeRerun needs a value.");
4350           return printUsageAndExit();
4351         }
4352         try {
4353           sleepBeforeRerun = Long.parseLong(args[i+1]);
4354         } catch (NumberFormatException e) {
4355           errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4356           return printUsageAndExit();
4357         }
4358         i++;
4359       } else if (cmd.equals("-sidelineDir")) {
4360         if (i == args.length - 1) {
4361           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4362           return printUsageAndExit();
4363         }
4364         i++;
4365         setSidelineDir(args[i]);
4366       } else if (cmd.equals("-fix")) {
4367         errors.reportError(ERROR_CODE.WRONG_USAGE,
4368           "This option is deprecated, please use  -fixAssignments instead.");
4369         setFixAssignments(true);
4370       } else if (cmd.equals("-fixAssignments")) {
4371         setFixAssignments(true);
4372       } else if (cmd.equals("-fixMeta")) {
4373         setFixMeta(true);
4374       } else if (cmd.equals("-noHdfsChecking")) {
4375         setCheckHdfs(false);
4376       } else if (cmd.equals("-fixHdfsHoles")) {
4377         setFixHdfsHoles(true);
4378       } else if (cmd.equals("-fixHdfsOrphans")) {
4379         setFixHdfsOrphans(true);
4380       } else if (cmd.equals("-fixTableOrphans")) {
4381         setFixTableOrphans(true);
4382       } else if (cmd.equals("-fixHdfsOverlaps")) {
4383         setFixHdfsOverlaps(true);
4384       } else if (cmd.equals("-fixVersionFile")) {
4385         setFixVersionFile(true);
4386       } else if (cmd.equals("-sidelineBigOverlaps")) {
4387         setSidelineBigOverlaps(true);
4388       } else if (cmd.equals("-fixSplitParents")) {
4389         setFixSplitParents(true);
4390       } else if (cmd.equals("-ignorePreCheckPermission")) {
4391         setIgnorePreCheckPermission(true);
4392       } else if (cmd.equals("-checkCorruptHFiles")) {
4393         checkCorruptHFiles = true;
4394       } else if (cmd.equals("-sidelineCorruptHFiles")) {
4395         sidelineCorruptHFiles = true;
4396       } else if (cmd.equals("-fixReferenceFiles")) {
4397         setFixReferenceFiles(true);
4398       } else if (cmd.equals("-fixEmptyMetaCells")) {
4399         setFixEmptyMetaCells(true);
4400       } else if (cmd.equals("-repair")) {
4401         // this attempts to merge overlapping hdfs regions, needs testing
4402         // under load
4403         setFixHdfsHoles(true);
4404         setFixHdfsOrphans(true);
4405         setFixMeta(true);
4406         setFixAssignments(true);
4407         setFixHdfsOverlaps(true);
4408         setFixVersionFile(true);
4409         setSidelineBigOverlaps(true);
4410         setFixSplitParents(false);
4411         setCheckHdfs(true);
4412         setFixReferenceFiles(true);
4413         setFixTableLocks(true);
4414       } else if (cmd.equals("-repairHoles")) {
4415         // this will make all missing hdfs regions available but may lose data
4416         setFixHdfsHoles(true);
4417         setFixHdfsOrphans(false);
4418         setFixMeta(true);
4419         setFixAssignments(true);
4420         setFixHdfsOverlaps(false);
4421         setSidelineBigOverlaps(false);
4422         setFixSplitParents(false);
4423         setCheckHdfs(true);
4424       } else if (cmd.equals("-maxOverlapsToSideline")) {
4425         if (i == args.length - 1) {
4426           errors.reportError(ERROR_CODE.WRONG_USAGE,
4427             "-maxOverlapsToSideline needs a numeric value argument.");
4428           return printUsageAndExit();
4429         }
4430         try {
4431           int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
4432           setMaxOverlapsToSideline(maxOverlapsToSideline);
4433         } catch (NumberFormatException e) {
4434           errors.reportError(ERROR_CODE.WRONG_USAGE,
4435             "-maxOverlapsToSideline needs a numeric value argument.");
4436           return printUsageAndExit();
4437         }
4438         i++;
4439       } else if (cmd.equals("-maxMerge")) {
4440         if (i == args.length - 1) {
4441           errors.reportError(ERROR_CODE.WRONG_USAGE,
4442             "-maxMerge needs a numeric value argument.");
4443           return printUsageAndExit();
4444         }
4445         try {
4446           int maxMerge = Integer.parseInt(args[i+1]);
4447           setMaxMerge(maxMerge);
4448         } catch (NumberFormatException e) {
4449           errors.reportError(ERROR_CODE.WRONG_USAGE,
4450             "-maxMerge needs a numeric value argument.");
4451           return printUsageAndExit();
4452         }
4453         i++;
4454       } else if (cmd.equals("-summary")) {
4455         setSummary();
4456       } else if (cmd.equals("-metaonly")) {
4457         setCheckMetaOnly();
4458       } else if (cmd.equals("-boundaries")) {
4459         setRegionBoundariesCheck();
4460       } else if (cmd.equals("-fixTableLocks")) {
4461         setFixTableLocks(true);
4462       } else if (cmd.startsWith("-")) {
4463         errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
4464         return printUsageAndExit();
4465       } else {
4466         includeTable(TableName.valueOf(cmd));
4467         errors.print("Allow checking/fixes for table: " + cmd);
4468       }
4469     }
4470 
4471     errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
4472 
4473     // pre-check current user has FS write permission or not
4474     try {
4475       preCheckPermission();
4476     } catch (AccessDeniedException ace) {
4477       Runtime.getRuntime().exit(-1);
4478     } catch (IOException ioe) {
4479       Runtime.getRuntime().exit(-1);
4480     }
4481 
4482     // do the real work of hbck
4483     connect();
4484 
4485     try {
4486       // if corrupt file mode is on, first fix them since they may be opened later
4487       if (checkCorruptHFiles || sidelineCorruptHFiles) {
4488         LOG.info("Checking all hfiles for corruption");
4489         HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
4490         setHFileCorruptionChecker(hfcc); // so we can get result
4491         Collection<TableName> tables = getIncludedTables();
4492         Collection<Path> tableDirs = new ArrayList<Path>();
4493         Path rootdir = FSUtils.getRootDir(getConf());
4494         if (tables.size() > 0) {
4495           for (TableName t : tables) {
4496             tableDirs.add(FSUtils.getTableDir(rootdir, t));
4497           }
4498         } else {
4499           tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
4500         }
4501         hfcc.checkTables(tableDirs);
4502         hfcc.report(errors);
4503       }
4504 
4505       // check and fix table integrity, region consistency.
4506       int code = onlineHbck();
4507       setRetCode(code);
4508       // If we have changed the HBase state it is better to run hbck again
4509       // to see if we haven't broken something else in the process.
4510       // We run it only once more because otherwise we can easily fall into
4511       // an infinite loop.
4512       if (shouldRerun()) {
4513         try {
4514           LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
4515           Thread.sleep(sleepBeforeRerun);
4516         } catch (InterruptedException ie) {
4517           LOG.warn("Interrupted while sleeping");
4518           return this;
4519         }
4520         // Just report
4521         setFixAssignments(false);
4522         setFixMeta(false);
4523         setFixHdfsHoles(false);
4524         setFixHdfsOverlaps(false);
4525         setFixVersionFile(false);
4526         setFixTableOrphans(false);
4527         errors.resetErrors();
4528         code = onlineHbck();
4529         setRetCode(code);
4530       }
4531     } finally {
4532       IOUtils.cleanup(null, this);
4533     }
4534     return this;
4535   }
4536 
4537   /**
4538    * ls -r for debugging purposes
4539    */
4540   void debugLsr(Path p) throws IOException {
4541     debugLsr(getConf(), p, errors);
4542   }
4543 
4544   /**
4545    * ls -r for debugging purposes
4546    */
4547   public static void debugLsr(Configuration conf,
4548       Path p) throws IOException {
4549     debugLsr(conf, p, new PrintingErrorReporter());
4550   }
4551 
4552   /**
4553    * ls -r for debugging purposes
4554    */
4555   public static void debugLsr(Configuration conf,
4556       Path p, ErrorReporter errors) throws IOException {
4557     if (!LOG.isDebugEnabled() || p == null) {
4558       return;
4559     }
4560     FileSystem fs = p.getFileSystem(conf);
4561 
4562     if (!fs.exists(p)) {
4563       // nothing
4564       return;
4565     }
4566     errors.print(p.toString());
4567 
4568     if (fs.isFile(p)) {
4569       return;
4570     }
4571 
4572     if (fs.getFileStatus(p).isDirectory()) {
4573       FileStatus[] fss= fs.listStatus(p);
4574       for (FileStatus status : fss) {
4575         debugLsr(conf, status.getPath(), errors);
4576       }
4577     }
4578   }
4579 }