View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.Closeable;
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.io.InterruptedIOException;
24  import java.io.PrintWriter;
25  import java.io.StringWriter;
26  import java.net.InetAddress;
27  import java.net.URI;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Collection;
31  import java.util.Collections;
32  import java.util.Comparator;
33  import java.util.HashMap;
34  import java.util.HashSet;
35  import java.util.Iterator;
36  import java.util.List;
37  import java.util.Map;
38  import java.util.Map.Entry;
39  import java.util.Set;
40  import java.util.SortedMap;
41  import java.util.SortedSet;
42  import java.util.TreeMap;
43  import java.util.TreeSet;
44  import java.util.concurrent.Callable;
45  import java.util.concurrent.ConcurrentSkipListMap;
46  import java.util.concurrent.ExecutionException;
47  import java.util.concurrent.ExecutorService;
48  import java.util.concurrent.Executors;
49  import java.util.concurrent.Future;
50  import java.util.concurrent.FutureTask;
51  import java.util.concurrent.ScheduledThreadPoolExecutor;
52  import java.util.concurrent.TimeUnit;
53  import java.util.concurrent.TimeoutException;
54  import java.util.concurrent.atomic.AtomicBoolean;
55  import java.util.concurrent.atomic.AtomicInteger;
56  
57  import com.google.common.base.Joiner;
58  import com.google.common.base.Preconditions;
59  import com.google.common.collect.ImmutableList;
60  import com.google.common.collect.Lists;
61  import com.google.common.collect.Multimap;
62  import com.google.common.collect.Ordering;
63  import com.google.common.collect.TreeMultimap;
64  import com.google.protobuf.ServiceException;
65  
66  import org.apache.commons.lang.RandomStringUtils;
67  import org.apache.commons.lang.StringUtils;
68  import org.apache.commons.logging.Log;
69  import org.apache.commons.logging.LogFactory;
70  import org.apache.hadoop.conf.Configuration;
71  import org.apache.hadoop.conf.Configured;
72  import org.apache.hadoop.fs.FSDataOutputStream;
73  import org.apache.hadoop.fs.FileStatus;
74  import org.apache.hadoop.fs.FileSystem;
75  import org.apache.hadoop.fs.Path;
76  import org.apache.hadoop.fs.permission.FsAction;
77  import org.apache.hadoop.fs.permission.FsPermission;
78  import org.apache.hadoop.hbase.Abortable;
79  import org.apache.hadoop.hbase.Cell;
80  import org.apache.hadoop.hbase.ClusterStatus;
81  import org.apache.hadoop.hbase.HBaseConfiguration;
82  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
83  import org.apache.hadoop.hbase.HColumnDescriptor;
84  import org.apache.hadoop.hbase.HConstants;
85  import org.apache.hadoop.hbase.HRegionInfo;
86  import org.apache.hadoop.hbase.HRegionLocation;
87  import org.apache.hadoop.hbase.HTableDescriptor;
88  import org.apache.hadoop.hbase.KeyValue;
89  import org.apache.hadoop.hbase.MasterNotRunningException;
90  import org.apache.hadoop.hbase.MetaTableAccessor;
91  import org.apache.hadoop.hbase.RegionLocations;
92  import org.apache.hadoop.hbase.ServerName;
93  import org.apache.hadoop.hbase.TableDescriptor;
94  import org.apache.hadoop.hbase.TableName;
95  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
96  import org.apache.hadoop.hbase.classification.InterfaceAudience;
97  import org.apache.hadoop.hbase.classification.InterfaceStability;
98  import org.apache.hadoop.hbase.client.Admin;
99  import org.apache.hadoop.hbase.client.ClusterConnection;
100 import org.apache.hadoop.hbase.client.ConnectionFactory;
101 import org.apache.hadoop.hbase.client.Delete;
102 import org.apache.hadoop.hbase.client.Get;
103 import org.apache.hadoop.hbase.client.HBaseAdmin;
104 import org.apache.hadoop.hbase.client.HConnection;
105 import org.apache.hadoop.hbase.client.Put;
106 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
107 import org.apache.hadoop.hbase.client.Result;
108 import org.apache.hadoop.hbase.client.RowMutations;
109 import org.apache.hadoop.hbase.client.Table;
110 import org.apache.hadoop.hbase.client.TableState;
111 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
112 import org.apache.hadoop.hbase.io.hfile.HFile;
113 import org.apache.hadoop.hbase.master.MasterFileSystem;
114 import org.apache.hadoop.hbase.master.RegionState;
115 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
116 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
117 import org.apache.hadoop.hbase.regionserver.HRegion;
118 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
119 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
120 import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL;
121 import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
122 import org.apache.hadoop.hbase.security.AccessDeniedException;
123 import org.apache.hadoop.hbase.security.UserProvider;
124 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
125 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
126 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
127 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
128 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
129 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
130 import org.apache.hadoop.hbase.wal.WAL;
131 import org.apache.hadoop.hbase.wal.WALFactory;
132 import org.apache.hadoop.hbase.wal.WALSplitter;
133 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
134 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
135 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
136 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
137 import org.apache.hadoop.io.IOUtils;
138 import org.apache.hadoop.ipc.RemoteException;
139 import org.apache.hadoop.security.UserGroupInformation;
140 import org.apache.hadoop.util.ReflectionUtils;
141 import org.apache.hadoop.util.Tool;
142 import org.apache.hadoop.util.ToolRunner;
143 import org.apache.zookeeper.KeeperException;
144 
145 /**
146  * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
147  * table integrity problems in a corrupted HBase.
148  * <p>
149  * Region consistency checks verify that hbase:meta, region deployment on region
150  * servers and the state of data in HDFS (.regioninfo files) all are in
151  * accordance.
152  * <p>
153  * Table integrity checks verify that all possible row keys resolve to exactly
154  * one region of a table.  This means there are no individual degenerate
155  * or backwards regions; no holes between regions; and that there are no
156  * overlapping regions.
157  * <p>
158  * The general repair strategy works in two phases:
159  * <ol>
160  * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
161  * <li> Repair Region Consistency with hbase:meta and assignments
162  * </ol>
163  * <p>
164  * For table integrity repairs, the tables' region directories are scanned
165  * for .regioninfo files.  Each table's integrity is then verified.  If there
166  * are any orphan regions (regions with no .regioninfo files) or holes, new
167  * regions are fabricated.  Backwards regions are sidelined as well as empty
168  * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
169  * a new region is created and all data is merged into the new region.
170  * <p>
171  * Table integrity repairs deal solely with HDFS and could potentially be done
172  * offline -- the hbase region servers or master do not need to be running.
173  * This phase can eventually be used to completely reconstruct the hbase:meta table in
174  * an offline fashion.
175  * <p>
176  * Region consistency requires three conditions -- 1) valid .regioninfo file
177  * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
178  * and 3) a region is deployed only at the regionserver that was assigned to
179  * with proper state in the master.
180  * <p>
181  * Region consistency repairs require hbase to be online so that hbck can
182  * contact the HBase master and region servers.  The hbck#connect() method must
183  * first be called successfully.  Much of the region consistency information
184  * is transient and less risky to repair.
185  * <p>
186  * If hbck is run from the command line, there are a handful of arguments that
187  * can be used to limit the kinds of repairs hbck will do.  See the code in
188  * {@link #printUsageAndExit()} for more details.
189  */
190 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
191 @InterfaceStability.Evolving
192 public class HBaseFsck extends Configured implements Closeable {
193   public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
194   public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
195   private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
196   private static boolean rsSupportsOffline = true;
197   private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
198   private static final int DEFAULT_MAX_MERGE = 5;
199   private static final String TO_BE_LOADED = "to_be_loaded";
200   private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
201   private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
202   private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200;
203 
204   /**********************
205    * Internal resources
206    **********************/
207   private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
208   private ClusterStatus status;
209   private ClusterConnection connection;
210   private Admin admin;
211   private Table meta;
212   // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
213   protected ExecutorService executor;
214   private long startMillis = System.currentTimeMillis();
215   private HFileCorruptionChecker hfcc;
216   private int retcode = 0;
217   private Path HBCK_LOCK_PATH;
218   private FSDataOutputStream hbckOutFd;
219   // This lock is to prevent cleanup of balancer resources twice between
220   // ShutdownHook and the main code. We cleanup only if the connect() is
221   // successful
222   private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
223 
224   /***********
225    * Options
226    ***********/
227   private static boolean details = false; // do we display the full report
228   private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
229   private boolean fixAssignments = false; // fix assignment errors?
230   private boolean fixMeta = false; // fix meta errors?
231   private boolean checkHdfs = true; // load and check fs consistency?
232   private boolean fixHdfsHoles = false; // fix fs holes?
233   private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
234   private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
235   private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
236   private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
237   private boolean fixSplitParents = false; // fix lingering split parents
238   private boolean fixReferenceFiles = false; // fix lingering reference store file
239   private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
240   private boolean fixTableLocks = false; // fix table locks which are expired
241   private boolean fixAny = false; // Set to true if any of the fix is required.
242 
243   // limit checking/fixes to listed tables, if empty attempt to check/fix all
244   // hbase:meta are always checked
245   private Set<TableName> tablesIncluded = new HashSet<TableName>();
246   private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
247   // maximum number of overlapping regions to sideline
248   private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
249   private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
250   private Path sidelineDir = null;
251 
252   private boolean rerun = false; // if we tried to fix something, rerun hbck
253   private static boolean summary = false; // if we want to print less output
254   private boolean checkMetaOnly = false;
255   private boolean checkRegionBoundaries = false;
256   private boolean ignorePreCheckPermission = false; // if pre-check permission
257 
258   /*********
259    * State
260    *********/
261   final private ErrorReporter errors;
262   int fixes = 0;
263 
264   /**
265    * This map contains the state of all hbck items.  It maps from encoded region
266    * name to HbckInfo structure.  The information contained in HbckInfo is used
267    * to detect and correct consistency (hdfs/meta/deployment) problems.
268    */
269   private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
270   // Empty regioninfo qualifiers in hbase:meta
271   private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
272 
273   /**
274    * This map from Tablename -> TableInfo contains the structures necessary to
275    * detect table consistency problems (holes, dupes, overlaps).  It is sorted
276    * to prevent dupes.
277    *
278    * If tablesIncluded is empty, this map contains all tables.
279    * Otherwise, it contains only meta tables and tables in tablesIncluded,
280    * unless checkMetaOnly is specified, in which case, it contains only
281    * the meta table
282    */
283   private SortedMap<TableName, TableInfo> tablesInfo =
284       new ConcurrentSkipListMap<TableName, TableInfo>();
285 
286   /**
287    * When initially looking at HDFS, we attempt to find any orphaned data.
288    */
289   private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
290 
291   private Map<TableName, Set<String>> orphanTableDirs =
292       new HashMap<TableName, Set<String>>();
293   private Map<TableName, TableState> tableStates =
294       new HashMap<TableName, TableState>();
295   private final RetryCounterFactory lockFileRetryCounterFactory;
296 
297 
298   /**
299    * Constructor
300    *
301    * @param conf Configuration object
302    * @throws MasterNotRunningException if the master is not running
303    * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
304    */
305   public HBaseFsck(Configuration conf) throws MasterNotRunningException,
306       ZooKeeperConnectionException, IOException, ClassNotFoundException {
307     super(conf);
308     // make a copy, just to be sure we're not overriding someone else's config
309     setConf(HBaseConfiguration.create(getConf()));
310     // disable blockcache for tool invocation, see HBASE-10500
311     getConf().setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
312     // Disable usage of meta replicas in hbck
313     getConf().setBoolean(HConstants.USE_META_REPLICAS, false);
314     errors = getErrorReporter(conf);
315 
316     int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
317     executor = new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
318     lockFileRetryCounterFactory = new RetryCounterFactory(
319         getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS), 
320         getConf().getInt("hbase.hbck.lockfile.attempt.sleep.interval",
321             DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL));
322   }
323 
324   /**
325    * Constructor
326    *
327    * @param conf
328    *          Configuration object
329    * @throws MasterNotRunningException
330    *           if the master is not running
331    * @throws ZooKeeperConnectionException
332    *           if unable to connect to ZooKeeper
333    */
334   public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
335       ZooKeeperConnectionException, IOException, ClassNotFoundException {
336     super(conf);
337     errors = getErrorReporter(getConf());
338     this.executor = exec;
339     lockFileRetryCounterFactory = new RetryCounterFactory(
340         getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
341         getConf().getInt("hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL));
342   }
343   
344   private class FileLockCallable implements Callable<FSDataOutputStream> {
345     RetryCounter retryCounter;
346 
347     public FileLockCallable(RetryCounter retryCounter) {
348       this.retryCounter = retryCounter;
349     }
350     @Override
351     public FSDataOutputStream call() throws IOException {
352       try {
353         FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
354         FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
355             HConstants.DATA_FILE_UMASK_KEY);
356         Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
357         fs.mkdirs(tmpDir);
358         HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
359         final FSDataOutputStream out = createFileWithRetries(fs, HBCK_LOCK_PATH, defaultPerms);
360         out.writeBytes(InetAddress.getLocalHost().toString());
361         out.flush();
362         return out;
363       } catch(RemoteException e) {
364         if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
365           return null;
366         } else {
367           throw e;
368         }
369       }
370     }
371 
372     private FSDataOutputStream createFileWithRetries(final FileSystem fs,
373         final Path hbckLockFilePath, final FsPermission defaultPerms)
374         throws IOException {
375 
376       IOException exception = null;
377       do {
378         try {
379           return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
380         } catch (IOException ioe) {
381           LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
382               + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
383               + retryCounter.getMaxAttempts());
384           LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(), 
385               ioe);
386           try {
387             exception = ioe;
388             retryCounter.sleepUntilNextRetry();
389           } catch (InterruptedException ie) {
390             throw (InterruptedIOException) new InterruptedIOException(
391                 "Can't create lock file " + hbckLockFilePath.getName())
392             .initCause(ie);
393           }
394         }
395       } while (retryCounter.shouldRetry());
396 
397       throw exception;
398     }
399   }
400 
401   /**
402    * This method maintains a lock using a file. If the creation fails we return null
403    *
404    * @return FSDataOutputStream object corresponding to the newly opened lock file
405    * @throws IOException
406    */
407   private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
408     RetryCounter retryCounter = lockFileRetryCounterFactory.create();
409     FileLockCallable callable = new FileLockCallable(retryCounter);
410     ExecutorService executor = Executors.newFixedThreadPool(1);
411     FutureTask<FSDataOutputStream> futureTask = new FutureTask<FSDataOutputStream>(callable);
412     executor.execute(futureTask);
413     final int timeoutInSeconds = 30;
414     FSDataOutputStream stream = null;
415     try {
416       stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
417     } catch (ExecutionException ee) {
418       LOG.warn("Encountered exception when opening lock file", ee);
419     } catch (InterruptedException ie) {
420       LOG.warn("Interrupted when opening lock file", ie);
421       Thread.currentThread().interrupt();
422     } catch (TimeoutException exception) {
423       // took too long to obtain lock
424       LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
425       futureTask.cancel(true);
426     } finally {
427       executor.shutdownNow();
428     }
429     return stream;
430   }
431 
432   private void unlockHbck() {
433     if (hbckLockCleanup.compareAndSet(true, false)) {
434       RetryCounter retryCounter = lockFileRetryCounterFactory.create();
435       do {
436         try {
437           IOUtils.closeStream(hbckOutFd);
438           FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()),
439               HBCK_LOCK_PATH, true);
440           return;
441         } catch (IOException ioe) {
442           LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
443               + (retryCounter.getAttemptTimes() + 1) + " of "
444               + retryCounter.getMaxAttempts());
445           LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
446           try {
447             retryCounter.sleepUntilNextRetry();
448           } catch (InterruptedException ie) {
449             Thread.currentThread().interrupt();
450             LOG.warn("Interrupted while deleting lock file" +
451                 HBCK_LOCK_PATH);
452             return;
453           }
454         }
455       } while (retryCounter.shouldRetry());
456 
457     }
458   }
459 
460   /**
461    * To repair region consistency, one must call connect() in order to repair
462    * online state.
463    */
464   public void connect() throws IOException {
465 
466     // Check if another instance of balancer is running
467     hbckOutFd = checkAndMarkRunningHbck();
468     if (hbckOutFd == null) {
469       setRetCode(-1);
470       LOG.error("Another instance of hbck is running, exiting this instance.[If you are sure" +
471           " no other instance is running, delete the lock file " +
472           HBCK_LOCK_PATH + " and rerun the tool]");
473       throw new IOException("Duplicate hbck - Abort");
474     }
475 
476     // Make sure to cleanup the lock
477     hbckLockCleanup.set(true);
478 
479     // Add a shutdown hook to this thread, incase user tries to
480     // kill the hbck with a ctrl-c, we want to cleanup the lock so that
481     // it is available for further calls
482     Runtime.getRuntime().addShutdownHook(new Thread() {
483       @Override
484       public void run() {
485         IOUtils.closeStream(HBaseFsck.this);
486         unlockHbck();
487       }
488     });
489     LOG.debug("Launching hbck");
490 
491     connection = (ClusterConnection)ConnectionFactory.createConnection(getConf());
492     admin = connection.getAdmin();
493     meta = connection.getTable(TableName.META_TABLE_NAME);
494     status = admin.getClusterStatus();
495   }
496 
497   /**
498    * Get deployed regions according to the region servers.
499    */
500   private void loadDeployedRegions() throws IOException, InterruptedException {
501     // From the master, get a list of all known live region servers
502     Collection<ServerName> regionServers = status.getServers();
503     errors.print("Number of live region servers: " + regionServers.size());
504     if (details) {
505       for (ServerName rsinfo: regionServers) {
506         errors.print("  " + rsinfo.getServerName());
507       }
508     }
509 
510     // From the master, get a list of all dead region servers
511     Collection<ServerName> deadRegionServers = status.getDeadServerNames();
512     errors.print("Number of dead region servers: " + deadRegionServers.size());
513     if (details) {
514       for (ServerName name: deadRegionServers) {
515         errors.print("  " + name);
516       }
517     }
518 
519     // Print the current master name and state
520     errors.print("Master: " + status.getMaster());
521 
522     // Print the list of all backup masters
523     Collection<ServerName> backupMasters = status.getBackupMasters();
524     errors.print("Number of backup masters: " + backupMasters.size());
525     if (details) {
526       for (ServerName name: backupMasters) {
527         errors.print("  " + name);
528       }
529     }
530 
531     errors.print("Average load: " + status.getAverageLoad());
532     errors.print("Number of requests: " + status.getRequestsCount());
533     errors.print("Number of regions: " + status.getRegionsCount());
534 
535     Map<String, RegionState> rits = status.getRegionsInTransition();
536     errors.print("Number of regions in transition: " + rits.size());
537     if (details) {
538       for (RegionState state: rits.values()) {
539         errors.print("  " + state.toDescriptiveString());
540       }
541     }
542 
543     // Determine what's deployed
544     processRegionServers(regionServers);
545   }
546 
547   /**
548    * Clear the current state of hbck.
549    */
550   private void clearState() {
551     // Make sure regionInfo is empty before starting
552     fixes = 0;
553     regionInfoMap.clear();
554     emptyRegionInfoQualifiers.clear();
555     tableStates.clear();
556     errors.clear();
557     tablesInfo.clear();
558     orphanHdfsDirs.clear();
559   }
560 
561   /**
562    * This repair method analyzes hbase data in hdfs and repairs it to satisfy
563    * the table integrity rules.  HBase doesn't need to be online for this
564    * operation to work.
565    */
566   public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
567     // Initial pass to fix orphans.
568     if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
569         || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
570       LOG.info("Loading regioninfos HDFS");
571       // if nothing is happening this should always complete in two iterations.
572       int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
573       int curIter = 0;
574       do {
575         clearState(); // clears hbck state and reset fixes to 0 and.
576         // repair what's on HDFS
577         restoreHdfsIntegrity();
578         curIter++;// limit the number of iterations.
579       } while (fixes > 0 && curIter <= maxIterations);
580 
581       // Repairs should be done in the first iteration and verification in the second.
582       // If there are more than 2 passes, something funny has happened.
583       if (curIter > 2) {
584         if (curIter == maxIterations) {
585           LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
586               + "Tables integrity may not be fully repaired!");
587         } else {
588           LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
589         }
590       }
591     }
592   }
593 
594   /**
595    * This repair method requires the cluster to be online since it contacts
596    * region servers and the masters.  It makes each region's state in HDFS, in
597    * hbase:meta, and deployments consistent.
598    *
599    * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable
600    * error.  If 0, we have a clean hbase.
601    */
602   public int onlineConsistencyRepair() throws IOException, KeeperException,
603     InterruptedException {
604     clearState();
605 
606     // get regions according to what is online on each RegionServer
607     loadDeployedRegions();
608     // check whether hbase:meta is deployed and online
609     recordMetaRegion();
610     // Check if hbase:meta is found only once and in the right place
611     if (!checkMetaRegion()) {
612       String errorMsg = "hbase:meta table is not consistent. ";
613       if (shouldFixAssignments()) {
614         errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
615       } else {
616         errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
617       }
618       errors.reportError(errorMsg + " Exiting...");
619       return -2;
620     }
621     // Not going with further consistency check for tables when hbase:meta itself is not consistent.
622     LOG.info("Loading regionsinfo from the hbase:meta table");
623     boolean success = loadMetaEntries();
624     if (!success) return -1;
625 
626     // Empty cells in hbase:meta?
627     reportEmptyMetaCells();
628 
629     // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
630     if (shouldFixEmptyMetaCells()) {
631       fixEmptyMetaCells();
632     }
633 
634     // get a list of all tables that have not changed recently.
635     if (!checkMetaOnly) {
636       reportTablesInFlux();
637     }
638 
639     // Get disabled tables states
640     loadTableStates();
641 
642     // load regiondirs and regioninfos from HDFS
643     if (shouldCheckHdfs()) {
644       LOG.info("Loading region directories from HDFS");
645       loadHdfsRegionDirs();
646       LOG.info("Loading region information from HDFS");
647       loadHdfsRegionInfos();
648     }
649 
650     // fix the orphan tables
651     fixOrphanTables();
652 
653     LOG.info("Checking and fixing region consistency");
654 
655     // Check and fix consistency
656     checkAndFixConsistency();
657 
658     // Check integrity (does not fix)
659     checkIntegrity();
660     return errors.getErrorList().size();
661   }
662 
663   /**
664    * Contacts the master and prints out cluster-wide information
665    * @return 0 on success, non-zero on failure
666    */
667   public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
668     // print hbase server version
669     errors.print("Version: " + status.getHBaseVersion());
670     offlineHdfsIntegrityRepair();
671 
672     // turn the balancer off
673     boolean oldBalancer = admin.setBalancerRunning(false, true);
674     try {
675       onlineConsistencyRepair();
676     }
677     finally {
678       admin.setBalancerRunning(oldBalancer, false);
679     }
680 
681     if (checkRegionBoundaries) {
682       checkRegionBoundaries();
683     }
684 
685     offlineReferenceFileRepair();
686 
687     checkAndFixTableLocks();
688 
689     // Remove the hbck lock
690     unlockHbck();
691 
692     // Print table summary
693     printTableSummary(tablesInfo);
694     return errors.summarize();
695   }
696 
697   public static byte[] keyOnly (byte[] b) {
698     if (b == null)
699       return b;
700     int rowlength = Bytes.toShort(b, 0);
701     byte[] result = new byte[rowlength];
702     System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
703     return result;
704   }
705 
706   @Override
707   public void close() throws IOException {
708     try {
709       unlockHbck();
710     } catch (Exception io) {
711       LOG.warn(io);
712     }
713     IOUtils.cleanup(null, admin, meta, connection);
714   }
715 
716   private static class RegionBoundariesInformation {
717     public byte [] regionName;
718     public byte [] metaFirstKey;
719     public byte [] metaLastKey;
720     public byte [] storesFirstKey;
721     public byte [] storesLastKey;
722     @Override
723     public String toString () {
724       return "regionName=" + Bytes.toStringBinary(regionName) +
725              "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
726              "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
727              "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
728              "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
729     }
730   }
731 
732   public void checkRegionBoundaries() {
733     try {
734       ByteArrayComparator comparator = new ByteArrayComparator();
735       List<HRegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
736       final RegionBoundariesInformation currentRegionBoundariesInformation =
737           new RegionBoundariesInformation();
738       Path hbaseRoot = FSUtils.getRootDir(getConf());
739       for (HRegionInfo regionInfo : regions) {
740         Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
741         currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
742         // For each region, get the start and stop key from the META and compare them to the
743         // same information from the Stores.
744         Path path = new Path(tableDir, regionInfo.getEncodedName());
745         FileSystem fs = path.getFileSystem(getConf());
746         FileStatus[] files = fs.listStatus(path);
747         // For all the column families in this region...
748         byte[] storeFirstKey = null;
749         byte[] storeLastKey = null;
750         for (FileStatus file : files) {
751           String fileName = file.getPath().toString();
752           fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
753           if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
754             FileStatus[] storeFiles = fs.listStatus(file.getPath());
755             // For all the stores in this column family.
756             for (FileStatus storeFile : storeFiles) {
757               HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
758                   getConf()), getConf());
759               if ((reader.getFirstKey() != null)
760                   && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
761                       reader.getFirstKey()) > 0))) {
762                 storeFirstKey = reader.getFirstKey();
763               }
764               if ((reader.getLastKey() != null)
765                   && ((storeLastKey == null) || (comparator.compare(storeLastKey,
766                       reader.getLastKey())) < 0)) {
767                 storeLastKey = reader.getLastKey();
768               }
769               reader.close();
770             }
771           }
772         }
773         currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
774         currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
775         currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
776         currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
777         if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
778           currentRegionBoundariesInformation.metaFirstKey = null;
779         if (currentRegionBoundariesInformation.metaLastKey.length == 0)
780           currentRegionBoundariesInformation.metaLastKey = null;
781 
782         // For a region to be correct, we need the META start key to be smaller or equal to the
783         // smallest start key from all the stores, and the start key from the next META entry to
784         // be bigger than the last key from all the current stores. First region start key is null;
785         // Last region end key is null; some regions can be empty and not have any store.
786 
787         boolean valid = true;
788         // Checking start key.
789         if ((currentRegionBoundariesInformation.storesFirstKey != null)
790             && (currentRegionBoundariesInformation.metaFirstKey != null)) {
791           valid = valid
792               && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
793                 currentRegionBoundariesInformation.metaFirstKey) >= 0;
794         }
795         // Checking stop key.
796         if ((currentRegionBoundariesInformation.storesLastKey != null)
797             && (currentRegionBoundariesInformation.metaLastKey != null)) {
798           valid = valid
799               && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
800                 currentRegionBoundariesInformation.metaLastKey) < 0;
801         }
802         if (!valid) {
803           errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
804             tablesInfo.get(regionInfo.getTable()));
805           LOG.warn("Region's boundaries not alligned between stores and META for:");
806           LOG.warn(currentRegionBoundariesInformation);
807         }
808       }
809     } catch (IOException e) {
810       LOG.error(e);
811     }
812   }
813 
814   /**
815    * Iterates through the list of all orphan/invalid regiondirs.
816    */
817   private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
818     for (HbckInfo hi : orphanHdfsDirs) {
819       LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
820       adoptHdfsOrphan(hi);
821     }
822   }
823 
824   /**
825    * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
826    * these orphans by creating a new region, and moving the column families,
827    * recovered edits, WALs, into the new region dir.  We determine the region
828    * startkey and endkeys by looking at all of the hfiles inside the column
829    * families to identify the min and max keys. The resulting region will
830    * likely violate table integrity but will be dealt with by merging
831    * overlapping regions.
832    */
833   @SuppressWarnings("deprecation")
834   private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
835     Path p = hi.getHdfsRegionDir();
836     FileSystem fs = p.getFileSystem(getConf());
837     FileStatus[] dirs = fs.listStatus(p);
838     if (dirs == null) {
839       LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
840           p + ". This dir could probably be deleted.");
841       return ;
842     }
843 
844     TableName tableName = hi.getTableName();
845     TableInfo tableInfo = tablesInfo.get(tableName);
846     Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
847     HTableDescriptor template = tableInfo.getHTD();
848 
849     // find min and max key values
850     Pair<byte[],byte[]> orphanRegionRange = null;
851     for (FileStatus cf : dirs) {
852       String cfName= cf.getPath().getName();
853       // TODO Figure out what the special dirs are
854       if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
855 
856       FileStatus[] hfiles = fs.listStatus(cf.getPath());
857       for (FileStatus hfile : hfiles) {
858         byte[] start, end;
859         HFile.Reader hf = null;
860         try {
861           CacheConfig cacheConf = new CacheConfig(getConf());
862           hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
863           hf.loadFileInfo();
864           KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
865           start = startKv.getRow();
866           KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
867           end = endKv.getRow();
868         } catch (IOException ioe) {
869           LOG.warn("Problem reading orphan file " + hfile + ", skipping");
870           continue;
871         } catch (NullPointerException ioe) {
872           LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
873           continue;
874         } finally {
875           if (hf != null) {
876             hf.close();
877           }
878         }
879 
880         // expand the range to include the range of all hfiles
881         if (orphanRegionRange == null) {
882           // first range
883           orphanRegionRange = new Pair<byte[], byte[]>(start, end);
884         } else {
885           // TODO add test
886 
887           // expand range only if the hfile is wider.
888           if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
889             orphanRegionRange.setFirst(start);
890           }
891           if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
892             orphanRegionRange.setSecond(end);
893           }
894         }
895       }
896     }
897     if (orphanRegionRange == null) {
898       LOG.warn("No data in dir " + p + ", sidelining data");
899       fixes++;
900       sidelineRegionDir(fs, hi);
901       return;
902     }
903     LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
904         Bytes.toString(orphanRegionRange.getSecond()) + ")");
905 
906     // create new region on hdfs.  move data into place.
907     HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
908     LOG.info("Creating new region : " + hri);
909     HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
910     Path target = region.getRegionFileSystem().getRegionDir();
911 
912     // rename all the data to new region
913     mergeRegionDirs(target, hi);
914     fixes++;
915   }
916 
917   /**
918    * This method determines if there are table integrity errors in HDFS.  If
919    * there are errors and the appropriate "fix" options are enabled, the method
920    * will first correct orphan regions making them into legit regiondirs, and
921    * then reload to merge potentially overlapping regions.
922    *
923    * @return number of table integrity errors found
924    */
925   private int restoreHdfsIntegrity() throws IOException, InterruptedException {
926     // Determine what's on HDFS
927     LOG.info("Loading HBase regioninfo from HDFS...");
928     loadHdfsRegionDirs(); // populating regioninfo table.
929 
930     int errs = errors.getErrorList().size();
931     // First time just get suggestions.
932     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
933     checkHdfsIntegrity(false, false);
934 
935     if (errors.getErrorList().size() == errs) {
936       LOG.info("No integrity errors.  We are done with this phase. Glorious.");
937       return 0;
938     }
939 
940     if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
941       adoptHdfsOrphans(orphanHdfsDirs);
942       // TODO optimize by incrementally adding instead of reloading.
943     }
944 
945     // Make sure there are no holes now.
946     if (shouldFixHdfsHoles()) {
947       clearState(); // this also resets # fixes.
948       loadHdfsRegionDirs();
949       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
950       tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
951     }
952 
953     // Now we fix overlaps
954     if (shouldFixHdfsOverlaps()) {
955       // second pass we fix overlaps.
956       clearState(); // this also resets # fixes.
957       loadHdfsRegionDirs();
958       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
959       tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
960     }
961 
962     return errors.getErrorList().size();
963   }
964 
965   /**
966    * Scan all the store file names to find any lingering reference files,
967    * which refer to some none-exiting files. If "fix" option is enabled,
968    * any lingering reference file will be sidelined if found.
969    * <p>
970    * Lingering reference file prevents a region from opening. It has to
971    * be fixed before a cluster can start properly.
972    */
973   private void offlineReferenceFileRepair() throws IOException {
974     Configuration conf = getConf();
975     Path hbaseRoot = FSUtils.getRootDir(conf);
976     FileSystem fs = hbaseRoot.getFileSystem(conf);
977     LOG.info("Computing mapping of all store files");
978     Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot, errors);
979     errors.print("");
980     LOG.info("Validating mapping using HDFS state");
981     for (Path path: allFiles.values()) {
982       boolean isReference = false;
983       try {
984         isReference = StoreFileInfo.isReference(path);
985       } catch (Throwable t) {
986         // Ignore. Some files may not be store files at all.
987         // For example, files under .oldlogs folder in hbase:meta
988         // Warning message is already logged by
989         // StoreFile#isReference.
990       }
991       if (!isReference) continue;
992 
993       Path referredToFile = StoreFileInfo.getReferredToFile(path);
994       if (fs.exists(referredToFile)) continue;  // good, expected
995 
996       // Found a lingering reference file
997       errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
998         "Found lingering reference file " + path);
999       if (!shouldFixReferenceFiles()) continue;
1000 
1001       // Now, trying to fix it since requested
1002       boolean success = false;
1003       String pathStr = path.toString();
1004 
1005       // A reference file path should be like
1006       // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1007       // Up 5 directories to get the root folder.
1008       // So the file will be sidelined to a similar folder structure.
1009       int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1010       for (int i = 0; index > 0 && i < 5; i++) {
1011         index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1012       }
1013       if (index > 0) {
1014         Path rootDir = getSidelineDir();
1015         Path dst = new Path(rootDir, pathStr.substring(index + 1));
1016         fs.mkdirs(dst.getParent());
1017         LOG.info("Trying to sildeline reference file "
1018           + path + " to " + dst);
1019         setShouldRerun();
1020 
1021         success = fs.rename(path, dst);
1022       }
1023       if (!success) {
1024         LOG.error("Failed to sideline reference file " + path);
1025       }
1026     }
1027   }
1028 
1029   /**
1030    * TODO -- need to add tests for this.
1031    */
1032   private void reportEmptyMetaCells() {
1033     errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1034       emptyRegionInfoQualifiers.size());
1035     if (details) {
1036       for (Result r: emptyRegionInfoQualifiers) {
1037         errors.print("  " + r);
1038       }
1039     }
1040   }
1041 
1042   /**
1043    * TODO -- need to add tests for this.
1044    */
1045   private void reportTablesInFlux() {
1046     AtomicInteger numSkipped = new AtomicInteger(0);
1047     HTableDescriptor[] allTables = getTables(numSkipped);
1048     errors.print("Number of Tables: " + allTables.length);
1049     if (details) {
1050       if (numSkipped.get() > 0) {
1051         errors.detail("Number of Tables in flux: " + numSkipped.get());
1052       }
1053       for (HTableDescriptor td : allTables) {
1054         errors.detail("  Table: " + td.getTableName() + "\t" +
1055                            (td.isReadOnly() ? "ro" : "rw") + "\t" +
1056                             (td.isMetaRegion() ? "META" : "    ") + "\t" +
1057                            " families: " + td.getFamilies().size());
1058       }
1059     }
1060   }
1061 
1062   public ErrorReporter getErrors() {
1063     return errors;
1064   }
1065 
1066   /**
1067    * Read the .regioninfo file from the file system.  If there is no
1068    * .regioninfo, add it to the orphan hdfs region list.
1069    */
1070   private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
1071     Path regionDir = hbi.getHdfsRegionDir();
1072     if (regionDir == null) {
1073       LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
1074       return;
1075     }
1076 
1077     if (hbi.hdfsEntry.hri != null) {
1078       // already loaded data
1079       return;
1080     }
1081 
1082     FileSystem fs = FileSystem.get(getConf());
1083     HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
1084     LOG.debug("HRegionInfo read: " + hri.toString());
1085     hbi.hdfsEntry.hri = hri;
1086   }
1087 
1088   /**
1089    * Exception thrown when a integrity repair operation fails in an
1090    * unresolvable way.
1091    */
1092   public static class RegionRepairException extends IOException {
1093     private static final long serialVersionUID = 1L;
1094     final IOException ioe;
1095     public RegionRepairException(String s, IOException ioe) {
1096       super(s);
1097       this.ioe = ioe;
1098     }
1099   }
1100 
1101   /**
1102    * Populate hbi's from regionInfos loaded from file system.
1103    */
1104   private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1105       throws IOException, InterruptedException {
1106     tablesInfo.clear(); // regenerating the data
1107     // generate region split structure
1108     Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1109 
1110     // Parallelized read of .regioninfo files.
1111     List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
1112     List<Future<Void>> hbiFutures;
1113 
1114     for (HbckInfo hbi : hbckInfos) {
1115       WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1116       hbis.add(work);
1117     }
1118 
1119     // Submit and wait for completion
1120     hbiFutures = executor.invokeAll(hbis);
1121 
1122     for(int i=0; i<hbiFutures.size(); i++) {
1123       WorkItemHdfsRegionInfo work = hbis.get(i);
1124       Future<Void> f = hbiFutures.get(i);
1125       try {
1126         f.get();
1127       } catch(ExecutionException e) {
1128         LOG.warn("Failed to read .regioninfo file for region " +
1129               work.hbi.getRegionNameAsString(), e.getCause());
1130       }
1131     }
1132 
1133     Path hbaseRoot = FSUtils.getRootDir(getConf());
1134     FileSystem fs = hbaseRoot.getFileSystem(getConf());
1135     // serialized table info gathering.
1136     for (HbckInfo hbi: hbckInfos) {
1137 
1138       if (hbi.getHdfsHRI() == null) {
1139         // was an orphan
1140         continue;
1141       }
1142 
1143 
1144       // get table name from hdfs, populate various HBaseFsck tables.
1145       TableName tableName = hbi.getTableName();
1146       if (tableName == null) {
1147         // There was an entry in hbase:meta not in the HDFS?
1148         LOG.warn("tableName was null for: " + hbi);
1149         continue;
1150       }
1151 
1152       TableInfo modTInfo = tablesInfo.get(tableName);
1153       if (modTInfo == null) {
1154         // only executed once per table.
1155         modTInfo = new TableInfo(tableName);
1156         tablesInfo.put(tableName, modTInfo);
1157         try {
1158           TableDescriptor htd =
1159               FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1160           modTInfo.htds.add(htd.getHTableDescriptor());
1161         } catch (IOException ioe) {
1162           if (!orphanTableDirs.containsKey(tableName)) {
1163             LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1164             //should only report once for each table
1165             errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1166                 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1167             Set<String> columns = new HashSet<String>();
1168             orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1169           }
1170         }
1171       }
1172       if (!hbi.isSkipChecks()) {
1173         modTInfo.addRegionInfo(hbi);
1174       }
1175     }
1176 
1177     loadTableInfosForTablesWithNoRegion();
1178     errors.print("");
1179 
1180     return tablesInfo;
1181   }
1182 
1183   /**
1184    * To get the column family list according to the column family dirs
1185    * @param columns
1186    * @param hbi
1187    * @return a set of column families
1188    * @throws IOException
1189    */
1190   private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1191     Path regionDir = hbi.getHdfsRegionDir();
1192     FileSystem fs = regionDir.getFileSystem(getConf());
1193     FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1194     for (FileStatus subdir : subDirs) {
1195       String columnfamily = subdir.getPath().getName();
1196       columns.add(columnfamily);
1197     }
1198     return columns;
1199   }
1200 
1201   /**
1202    * To fabricate a .tableinfo file with following contents<br>
1203    * 1. the correct tablename <br>
1204    * 2. the correct colfamily list<br>
1205    * 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1206    * @throws IOException
1207    */
1208   private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1209       Set<String> columns) throws IOException {
1210     if (columns ==null || columns.isEmpty()) return false;
1211     HTableDescriptor htd = new HTableDescriptor(tableName);
1212     for (String columnfamimly : columns) {
1213       htd.addFamily(new HColumnDescriptor(columnfamimly));
1214     }
1215     fstd.createTableDescriptor(new TableDescriptor(htd), true);
1216     return true;
1217   }
1218 
1219   /**
1220    * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1221    * @throws IOException
1222    */
1223   public void fixEmptyMetaCells() throws IOException {
1224     if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1225       LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1226       for (Result region : emptyRegionInfoQualifiers) {
1227         deleteMetaRegion(region.getRow());
1228         errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1229       }
1230       emptyRegionInfoQualifiers.clear();
1231     }
1232   }
1233 
1234   /**
1235    * To fix orphan table by creating a .tableinfo file under tableDir <br>
1236    * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1237    * 2. else create a default .tableinfo file with following items<br>
1238    * &nbsp;2.1 the correct tablename <br>
1239    * &nbsp;2.2 the correct colfamily list<br>
1240    * &nbsp;2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1241    * @throws IOException
1242    */
1243   public void fixOrphanTables() throws IOException {
1244     if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1245 
1246       List<TableName> tmpList = new ArrayList<TableName>();
1247       tmpList.addAll(orphanTableDirs.keySet());
1248       HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1249       Iterator<Entry<TableName, Set<String>>> iter =
1250           orphanTableDirs.entrySet().iterator();
1251       int j = 0;
1252       int numFailedCase = 0;
1253       FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1254       while (iter.hasNext()) {
1255         Entry<TableName, Set<String>> entry =
1256             iter.next();
1257         TableName tableName = entry.getKey();
1258         LOG.info("Trying to fix orphan table error: " + tableName);
1259         if (j < htds.length) {
1260           if (tableName.equals(htds[j].getTableName())) {
1261             HTableDescriptor htd = htds[j];
1262             LOG.info("fixing orphan table: " + tableName + " from cache");
1263             fstd.createTableDescriptor(new TableDescriptor(htd), true);
1264             j++;
1265             iter.remove();
1266           }
1267         } else {
1268           if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1269             LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1270             LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1271             iter.remove();
1272           } else {
1273             LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1274             numFailedCase++;
1275           }
1276         }
1277         fixes++;
1278       }
1279 
1280       if (orphanTableDirs.isEmpty()) {
1281         // all orphanTableDirs are luckily recovered
1282         // re-run doFsck after recovering the .tableinfo file
1283         setShouldRerun();
1284         LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1285       } else if (numFailedCase > 0) {
1286         LOG.error("Failed to fix " + numFailedCase
1287             + " OrphanTables with default .tableinfo files");
1288       }
1289 
1290     }
1291     //cleanup the list
1292     orphanTableDirs.clear();
1293 
1294   }
1295 
1296   /**
1297    * This borrows code from MasterFileSystem.bootstrap(). Explicitly creates it's own WAL, so be
1298    * sure to close it as well as the region when you're finished.
1299    *
1300    * @return an open hbase:meta HRegion
1301    */
1302   private HRegion createNewMeta() throws IOException {
1303     Path rootdir = FSUtils.getRootDir(getConf());
1304     Configuration c = getConf();
1305     HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1306     HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1307     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1308     // The WAL subsystem will use the default rootDir rather than the passed in rootDir
1309     // unless I pass along via the conf.
1310     Configuration confForWAL = new Configuration(c);
1311     confForWAL.set(HConstants.HBASE_DIR, rootdir.toString());
1312     WAL wal = (new WALFactory(confForWAL,
1313         Collections.<WALActionsListener>singletonList(new MetricsWAL()),
1314         "hbck-meta-recovery-" + RandomStringUtils.randomNumeric(8))).
1315         getWAL(metaHRI.getEncodedNameAsBytes());
1316     HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor, wal);
1317     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1318     return meta;
1319   }
1320 
1321   /**
1322    * Generate set of puts to add to new meta.  This expects the tables to be
1323    * clean with no overlaps or holes.  If there are any problems it returns null.
1324    *
1325    * @return An array list of puts to do in bulk, null if tables have problems
1326    */
1327   private ArrayList<Put> generatePuts(
1328       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1329     ArrayList<Put> puts = new ArrayList<Put>();
1330     boolean hasProblems = false;
1331     for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1332       TableName name = e.getKey();
1333 
1334       // skip "hbase:meta"
1335       if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1336         continue;
1337       }
1338 
1339       TableInfo ti = e.getValue();
1340       puts.add(MetaTableAccessor
1341           .makePutFromTableState(new TableState(ti.tableName, TableState.State.ENABLED)));
1342       for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1343           .entrySet()) {
1344         Collection<HbckInfo> his = spl.getValue();
1345         int sz = his.size();
1346         if (sz != 1) {
1347           // problem
1348           LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1349               + " had " +  sz + " regions instead of exactly 1." );
1350           hasProblems = true;
1351           continue;
1352         }
1353 
1354         // add the row directly to meta.
1355         HbckInfo hi = his.iterator().next();
1356         HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
1357         Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1358         puts.add(p);
1359       }
1360     }
1361     return hasProblems ? null : puts;
1362   }
1363 
1364   /**
1365    * Suggest fixes for each table
1366    */
1367   private void suggestFixes(
1368       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1369     logParallelMerge();
1370     for (TableInfo tInfo : tablesInfo.values()) {
1371       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1372       tInfo.checkRegionChain(handler);
1373     }
1374   }
1375 
1376   /**
1377    * Rebuilds meta from information in hdfs/fs.  Depends on configuration settings passed into
1378    * hbck constructor to point to a particular fs/dir. Assumes HBase is OFFLINE.
1379    *
1380    * @param fix flag that determines if method should attempt to fix holes
1381    * @return true if successful, false if attempt failed.
1382    */
1383   public boolean rebuildMeta(boolean fix) throws IOException,
1384       InterruptedException {
1385 
1386     // TODO check to make sure hbase is offline. (or at least the table
1387     // currently being worked on is off line)
1388 
1389     // Determine what's on HDFS
1390     LOG.info("Loading HBase regioninfo from HDFS...");
1391     loadHdfsRegionDirs(); // populating regioninfo table.
1392 
1393     int errs = errors.getErrorList().size();
1394     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1395     checkHdfsIntegrity(false, false);
1396 
1397     // make sure ok.
1398     if (errors.getErrorList().size() != errs) {
1399       // While in error state, iterate until no more fixes possible
1400       while(true) {
1401         fixes = 0;
1402         suggestFixes(tablesInfo);
1403         errors.clear();
1404         loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1405         checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1406 
1407         int errCount = errors.getErrorList().size();
1408 
1409         if (fixes == 0) {
1410           if (errCount > 0) {
1411             return false; // failed to fix problems.
1412           } else {
1413             break; // no fixes and no problems? drop out and fix stuff!
1414           }
1415         }
1416       }
1417     }
1418 
1419     // we can rebuild, move old meta out of the way and start
1420     LOG.info("HDFS regioninfo's seems good.  Sidelining old hbase:meta");
1421     Path backupDir = sidelineOldMeta();
1422 
1423     LOG.info("Creating new hbase:meta");
1424     HRegion meta = createNewMeta();
1425 
1426     // populate meta
1427     List<Put> puts = generatePuts(tablesInfo);
1428     if (puts == null) {
1429       LOG.fatal("Problem encountered when creating new hbase:meta entries.  " +
1430         "You may need to restore the previously sidelined hbase:meta");
1431       return false;
1432     }
1433     meta.batchMutate(puts.toArray(new Put[puts.size()]));
1434     meta.close();
1435     if (meta.getWAL() != null) {
1436       meta.getWAL().close();
1437     }
1438     LOG.info("Success! hbase:meta table rebuilt.");
1439     LOG.info("Old hbase:meta is moved into " + backupDir);
1440     return true;
1441   }
1442 
1443   /**
1444    * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1445    */
1446   private void logParallelMerge() {
1447     if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1448       LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1449           " false to run serially.");
1450     } else {
1451       LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
1452           " true to run in parallel.");
1453     }
1454   }
1455 
1456   private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1457       boolean fixOverlaps) throws IOException {
1458     LOG.info("Checking HBase region split map from HDFS data...");
1459     logParallelMerge();
1460     for (TableInfo tInfo : tablesInfo.values()) {
1461       TableIntegrityErrorHandler handler;
1462       if (fixHoles || fixOverlaps) {
1463         handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1464           fixHoles, fixOverlaps);
1465       } else {
1466         handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1467       }
1468       if (!tInfo.checkRegionChain(handler)) {
1469         // should dump info as well.
1470         errors.report("Found inconsistency in table " + tInfo.getName());
1471       }
1472     }
1473     return tablesInfo;
1474   }
1475 
1476   private Path getSidelineDir() throws IOException {
1477     if (sidelineDir == null) {
1478       Path hbaseDir = FSUtils.getRootDir(getConf());
1479       Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1480       sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1481           + startMillis);
1482     }
1483     return sidelineDir;
1484   }
1485 
1486   /**
1487    * Sideline a region dir (instead of deleting it)
1488    */
1489   Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1490     return sidelineRegionDir(fs, null, hi);
1491   }
1492 
1493   /**
1494    * Sideline a region dir (instead of deleting it)
1495    *
1496    * @param parentDir if specified, the region will be sidelined to
1497    * folder like .../parentDir/<table name>/<region name>. The purpose
1498    * is to group together similar regions sidelined, for example, those
1499    * regions should be bulk loaded back later on. If null, it is ignored.
1500    */
1501   Path sidelineRegionDir(FileSystem fs,
1502       String parentDir, HbckInfo hi) throws IOException {
1503     TableName tableName = hi.getTableName();
1504     Path regionDir = hi.getHdfsRegionDir();
1505 
1506     if (!fs.exists(regionDir)) {
1507       LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1508       return null;
1509     }
1510 
1511     Path rootDir = getSidelineDir();
1512     if (parentDir != null) {
1513       rootDir = new Path(rootDir, parentDir);
1514     }
1515     Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1516     Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1517     fs.mkdirs(sidelineRegionDir);
1518     boolean success = false;
1519     FileStatus[] cfs =  fs.listStatus(regionDir);
1520     if (cfs == null) {
1521       LOG.info("Region dir is empty: " + regionDir);
1522     } else {
1523       for (FileStatus cf : cfs) {
1524         Path src = cf.getPath();
1525         Path dst =  new Path(sidelineRegionDir, src.getName());
1526         if (fs.isFile(src)) {
1527           // simple file
1528           success = fs.rename(src, dst);
1529           if (!success) {
1530             String msg = "Unable to rename file " + src +  " to " + dst;
1531             LOG.error(msg);
1532             throw new IOException(msg);
1533           }
1534           continue;
1535         }
1536 
1537         // is a directory.
1538         fs.mkdirs(dst);
1539 
1540         LOG.info("Sidelining files from " + src + " into containing region " + dst);
1541         // FileSystem.rename is inconsistent with directories -- if the
1542         // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1543         // it moves the src into the dst dir resulting in (foo/a/b).  If
1544         // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1545         FileStatus[] hfiles = fs.listStatus(src);
1546         if (hfiles != null && hfiles.length > 0) {
1547           for (FileStatus hfile : hfiles) {
1548             success = fs.rename(hfile.getPath(), dst);
1549             if (!success) {
1550               String msg = "Unable to rename file " + src +  " to " + dst;
1551               LOG.error(msg);
1552               throw new IOException(msg);
1553             }
1554           }
1555         }
1556         LOG.debug("Sideline directory contents:");
1557         debugLsr(sidelineRegionDir);
1558       }
1559     }
1560 
1561     LOG.info("Removing old region dir: " + regionDir);
1562     success = fs.delete(regionDir, true);
1563     if (!success) {
1564       String msg = "Unable to delete dir " + regionDir;
1565       LOG.error(msg);
1566       throw new IOException(msg);
1567     }
1568     return sidelineRegionDir;
1569   }
1570 
1571   /**
1572    * Side line an entire table.
1573    */
1574   void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1575       Path backupHbaseDir) throws IOException {
1576     Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1577     if (fs.exists(tableDir)) {
1578       Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1579       fs.mkdirs(backupTableDir.getParent());
1580       boolean success = fs.rename(tableDir, backupTableDir);
1581       if (!success) {
1582         throw new IOException("Failed to move  " + tableName + " from "
1583             +  tableDir + " to " + backupTableDir);
1584       }
1585     } else {
1586       LOG.info("No previous " + tableName +  " exists.  Continuing.");
1587     }
1588   }
1589 
1590   /**
1591    * @return Path to backup of original directory
1592    */
1593   Path sidelineOldMeta() throws IOException {
1594     // put current hbase:meta aside.
1595     Path hbaseDir = FSUtils.getRootDir(getConf());
1596     FileSystem fs = hbaseDir.getFileSystem(getConf());
1597     Path backupDir = getSidelineDir();
1598     fs.mkdirs(backupDir);
1599 
1600     try {
1601       sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1602     } catch (IOException e) {
1603         LOG.fatal("... failed to sideline meta. Currently in inconsistent state.  To restore "
1604             + "try to rename hbase:meta in " + backupDir.getName() + " to "
1605             + hbaseDir.getName() + ".", e);
1606       throw e; // throw original exception
1607     }
1608     return backupDir;
1609   }
1610 
1611   /**
1612    * Load the list of disabled tables in ZK into local set.
1613    * @throws ZooKeeperConnectionException
1614    * @throws IOException
1615    */
1616   private void loadTableStates()
1617   throws IOException {
1618     tableStates = MetaTableAccessor.getTableStates(connection);
1619   }
1620 
1621   /**
1622    * Check if the specified region's table is disabled.
1623    * @param tableName table to check status of
1624    */
1625   private boolean isTableDisabled(TableName tableName) {
1626     return tableStates.containsKey(tableName)
1627         && tableStates.get(tableName)
1628         .inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1629   }
1630 
1631   /**
1632    * Scan HDFS for all regions, recording their information into
1633    * regionInfoMap
1634    */
1635   public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1636     Path rootDir = FSUtils.getRootDir(getConf());
1637     FileSystem fs = rootDir.getFileSystem(getConf());
1638 
1639     // list all tables from HDFS
1640     List<FileStatus> tableDirs = Lists.newArrayList();
1641 
1642     boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1643 
1644     List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1645     for (Path path : paths) {
1646       TableName tableName = FSUtils.getTableName(path);
1647        if ((!checkMetaOnly &&
1648            isTableIncluded(tableName)) ||
1649            tableName.equals(TableName.META_TABLE_NAME)) {
1650          tableDirs.add(fs.getFileStatus(path));
1651        }
1652     }
1653 
1654     // verify that version file exists
1655     if (!foundVersionFile) {
1656       errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1657           "Version file does not exist in root dir " + rootDir);
1658       if (shouldFixVersionFile()) {
1659         LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1660             + " file.");
1661         setShouldRerun();
1662         FSUtils.setVersion(fs, rootDir, getConf().getInt(
1663             HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1664             HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1665             HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1666       }
1667     }
1668 
1669     // level 1:  <HBASE_DIR>/*
1670     List<WorkItemHdfsDir> dirs = new ArrayList<WorkItemHdfsDir>(tableDirs.size());
1671     List<Future<Void>> dirsFutures;
1672 
1673     for (FileStatus tableDir : tableDirs) {
1674       LOG.debug("Loading region dirs from " +tableDir.getPath());
1675       dirs.add(new WorkItemHdfsDir(this, fs, errors, tableDir));
1676     }
1677 
1678     // Invoke and wait for Callables to complete
1679     dirsFutures = executor.invokeAll(dirs);
1680 
1681     for(Future<Void> f: dirsFutures) {
1682       try {
1683         f.get();
1684       } catch(ExecutionException e) {
1685         LOG.warn("Could not load region dir " , e.getCause());
1686       }
1687     }
1688     errors.print("");
1689   }
1690 
1691   /**
1692    * Record the location of the hbase:meta region as found in ZooKeeper.
1693    */
1694   private boolean recordMetaRegion() throws IOException {
1695     RegionLocations rl = ((ClusterConnection)connection).locateRegion(TableName.META_TABLE_NAME,
1696         HConstants.EMPTY_START_ROW, false, false);
1697     if (rl == null) {
1698       errors.reportError(ERROR_CODE.NULL_META_REGION,
1699           "META region was not found in Zookeeper");
1700       return false;
1701     }
1702     for (HRegionLocation metaLocation : rl.getRegionLocations()) {
1703       // Check if Meta region is valid and existing
1704       if (metaLocation == null ) {
1705         errors.reportError(ERROR_CODE.NULL_META_REGION,
1706             "META region location is null");
1707         return false;
1708       }
1709       if (metaLocation.getRegionInfo() == null) {
1710         errors.reportError(ERROR_CODE.NULL_META_REGION,
1711             "META location regionInfo is null");
1712         return false;
1713       }
1714       if (metaLocation.getHostname() == null) {
1715         errors.reportError(ERROR_CODE.NULL_META_REGION,
1716             "META location hostName is null");
1717         return false;
1718       }
1719       ServerName sn = metaLocation.getServerName();
1720       MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
1721       HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1722       if (hbckInfo == null) {
1723         regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1724       } else {
1725         hbckInfo.metaEntry = m;
1726       }
1727     }
1728     return true;
1729   }
1730 
1731   private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1732     return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1733       @Override
1734       public void abort(String why, Throwable e) {
1735         LOG.error(why, e);
1736         System.exit(1);
1737       }
1738 
1739       @Override
1740       public boolean isAborted() {
1741         return false;
1742       }
1743 
1744     });
1745   }
1746 
1747   private ServerName getMetaRegionServerName(int replicaId)
1748   throws IOException, KeeperException {
1749     ZooKeeperWatcher zkw = createZooKeeperWatcher();
1750     ServerName sn = null;
1751     try {
1752       sn = new MetaTableLocator().getMetaRegionLocation(zkw, replicaId);
1753     } finally {
1754       zkw.close();
1755     }
1756     return sn;
1757   }
1758 
1759   /**
1760    * Contacts each regionserver and fetches metadata about regions.
1761    * @param regionServerList - the list of region servers to connect to
1762    * @throws IOException if a remote or network exception occurs
1763    */
1764   void processRegionServers(Collection<ServerName> regionServerList)
1765     throws IOException, InterruptedException {
1766 
1767     List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1768     List<Future<Void>> workFutures;
1769 
1770     // loop to contact each region server in parallel
1771     for (ServerName rsinfo: regionServerList) {
1772       workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1773     }
1774 
1775     workFutures = executor.invokeAll(workItems);
1776 
1777     for(int i=0; i<workFutures.size(); i++) {
1778       WorkItemRegion item = workItems.get(i);
1779       Future<Void> f = workFutures.get(i);
1780       try {
1781         f.get();
1782       } catch(ExecutionException e) {
1783         LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1784             e.getCause());
1785       }
1786     }
1787   }
1788 
1789   /**
1790    * Check consistency of all regions that have been found in previous phases.
1791    */
1792   private void checkAndFixConsistency()
1793   throws IOException, KeeperException, InterruptedException {
1794     // Divide the checks in two phases. One for default/primary replicas and another
1795     // for the non-primary ones. Keeps code cleaner this way.
1796 
1797     List<CheckRegionConsistencyWorkItem> workItems =
1798         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1799     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1800       if (e.getValue().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
1801         workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1802       }
1803     }
1804     checkRegionConsistencyConcurrently(workItems);
1805 
1806     boolean prevHdfsCheck = shouldCheckHdfs();
1807     setCheckHdfs(false); //replicas don't have any hdfs data
1808     // Run a pass over the replicas and fix any assignment issues that exist on the currently
1809     // deployed/undeployed replicas.
1810     List<CheckRegionConsistencyWorkItem> replicaWorkItems =
1811         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1812     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1813       if (e.getValue().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
1814         replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1815       }
1816     }
1817     checkRegionConsistencyConcurrently(replicaWorkItems);
1818     setCheckHdfs(prevHdfsCheck);
1819 
1820     if (shouldCheckHdfs()) {
1821       checkAndFixTableStates();
1822     }
1823   }
1824 
1825   /**
1826    * Check consistency of all regions using mulitple threads concurrently.
1827    */
1828   private void checkRegionConsistencyConcurrently(
1829     final List<CheckRegionConsistencyWorkItem> workItems)
1830     throws IOException, KeeperException, InterruptedException {
1831     if (workItems.isEmpty()) {
1832       return;  // nothing to check
1833     }
1834 
1835     List<Future<Void>> workFutures = executor.invokeAll(workItems);
1836     for(Future<Void> f: workFutures) {
1837       try {
1838         f.get();
1839       } catch(ExecutionException e1) {
1840         LOG.warn("Could not check region consistency " , e1.getCause());
1841         if (e1.getCause() instanceof IOException) {
1842           throw (IOException)e1.getCause();
1843         } else if (e1.getCause() instanceof KeeperException) {
1844           throw (KeeperException)e1.getCause();
1845         } else if (e1.getCause() instanceof InterruptedException) {
1846           throw (InterruptedException)e1.getCause();
1847         } else {
1848           throw new IOException(e1.getCause());
1849         }
1850       }
1851     }
1852   }
1853 
1854   class CheckRegionConsistencyWorkItem implements Callable<Void> {
1855     private final String key;
1856     private final HbckInfo hbi;
1857 
1858     CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
1859       this.key = key;
1860       this.hbi = hbi;
1861     }
1862 
1863     @Override
1864     public synchronized Void call() throws Exception {
1865       checkRegionConsistency(key, hbi);
1866       return null;
1867     }
1868   }
1869 
1870   /**
1871    * Check and fix table states, assumes full info available:
1872    * - tableInfos
1873    * - empty tables loaded
1874    */
1875   private void checkAndFixTableStates() throws IOException {
1876     // first check dangling states
1877     for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
1878       TableName tableName = entry.getKey();
1879       TableState tableState = entry.getValue();
1880       TableInfo tableInfo = tablesInfo.get(tableName);
1881       if (isTableIncluded(tableName)
1882           && !tableName.isSystemTable()
1883           && tableInfo == null) {
1884         if (fixMeta) {
1885           MetaTableAccessor.deleteTableState(connection, tableName);
1886           TableState state = MetaTableAccessor.getTableState(connection, tableName);
1887           if (state != null) {
1888             errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1889                 tableName + " unable to delete dangling table state " + tableState);
1890           }
1891         } else {
1892           errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1893               tableName + " has dangling table state " + tableState);
1894         }
1895       }
1896     }
1897     // check that all tables have states
1898     for (TableName tableName : tablesInfo.keySet()) {
1899       if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
1900         if (fixMeta) {
1901           MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
1902           TableState newState = MetaTableAccessor.getTableState(connection, tableName);
1903           if (newState == null) {
1904             errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1905                 "Unable to change state for table " + tableName + " in meta ");
1906           }
1907         } else {
1908           errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1909               tableName + " has no state in meta ");
1910         }
1911       }
1912     }
1913   }
1914 
1915   private void preCheckPermission() throws IOException, AccessDeniedException {
1916     if (shouldIgnorePreCheckPermission()) {
1917       return;
1918     }
1919 
1920     Path hbaseDir = FSUtils.getRootDir(getConf());
1921     FileSystem fs = hbaseDir.getFileSystem(getConf());
1922     UserProvider userProvider = UserProvider.instantiate(getConf());
1923     UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1924     FileStatus[] files = fs.listStatus(hbaseDir);
1925     for (FileStatus file : files) {
1926       try {
1927         FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1928       } catch (AccessDeniedException ace) {
1929         LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1930         errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1931           + " does not have write perms to " + file.getPath()
1932           + ". Please rerun hbck as hdfs user " + file.getOwner());
1933         throw ace;
1934       }
1935     }
1936   }
1937 
1938   /**
1939    * Deletes region from meta table
1940    */
1941   private void deleteMetaRegion(HbckInfo hi) throws IOException {
1942     deleteMetaRegion(hi.metaEntry.getRegionName());
1943   }
1944 
1945   /**
1946    * Deletes region from meta table
1947    */
1948   private void deleteMetaRegion(byte[] metaKey) throws IOException {
1949     Delete d = new Delete(metaKey);
1950     meta.delete(d);
1951     LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1952   }
1953 
1954   /**
1955    * Reset the split parent region info in meta table
1956    */
1957   private void resetSplitParent(HbckInfo hi) throws IOException {
1958     RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1959     Delete d = new Delete(hi.metaEntry.getRegionName());
1960     d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1961     d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1962     mutations.add(d);
1963 
1964     HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1965     hri.setOffline(false);
1966     hri.setSplit(false);
1967     Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1968     mutations.add(p);
1969 
1970     meta.mutateRow(mutations);
1971     LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1972   }
1973 
1974   /**
1975    * This backwards-compatibility wrapper for permanently offlining a region
1976    * that should not be alive.  If the region server does not support the
1977    * "offline" method, it will use the closest unassign method instead.  This
1978    * will basically work until one attempts to disable or delete the affected
1979    * table.  The problem has to do with in-memory only master state, so
1980    * restarting the HMaster or failing over to another should fix this.
1981    */
1982   private void offline(byte[] regionName) throws IOException {
1983     String regionString = Bytes.toStringBinary(regionName);
1984     if (!rsSupportsOffline) {
1985       LOG.warn("Using unassign region " + regionString
1986           + " instead of using offline method, you should"
1987           + " restart HMaster after these repairs");
1988       admin.unassign(regionName, true);
1989       return;
1990     }
1991 
1992     // first time we assume the rs's supports #offline.
1993     try {
1994       LOG.info("Offlining region " + regionString);
1995       admin.offline(regionName);
1996     } catch (IOException ioe) {
1997       String notFoundMsg = "java.lang.NoSuchMethodException: " +
1998         "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1999       if (ioe.getMessage().contains(notFoundMsg)) {
2000         LOG.warn("Using unassign region " + regionString
2001             + " instead of using offline method, you should"
2002             + " restart HMaster after these repairs");
2003         rsSupportsOffline = false; // in the future just use unassign
2004         admin.unassign(regionName, true);
2005         return;
2006       }
2007       throw ioe;
2008     }
2009   }
2010 
2011   private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
2012     undeployRegionsForHbi(hi);
2013     // undeploy replicas of the region (but only if the method is invoked for the primary)
2014     if (hi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2015       return;
2016     }
2017     int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2018     for (int i = 1; i < numReplicas; i++) {
2019       if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2020       HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
2021           hi.getPrimaryHRIForDeployedReplica(), i);
2022       HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2023       if (h != null) {
2024         undeployRegionsForHbi(h);
2025         //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2026         //in consistency checks
2027         h.setSkipChecks(true);
2028       }
2029     }
2030   }
2031 
2032   private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {
2033     for (OnlineEntry rse : hi.deployedEntries) {
2034       LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
2035       try {
2036         HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);
2037         offline(rse.hri.getRegionName());
2038       } catch (IOException ioe) {
2039         LOG.warn("Got exception when attempting to offline region "
2040             + Bytes.toString(rse.hri.getRegionName()), ioe);
2041       }
2042     }
2043   }
2044 
2045   /**
2046    * Attempts to undeploy a region from a region server based in information in
2047    * META.  Any operations that modify the file system should make sure that
2048    * its corresponding region is not deployed to prevent data races.
2049    *
2050    * A separate call is required to update the master in-memory region state
2051    * kept in the AssignementManager.  Because disable uses this state instead of
2052    * that found in META, we can't seem to cleanly disable/delete tables that
2053    * have been hbck fixed.  When used on a version of HBase that does not have
2054    * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master
2055    * restart or failover may be required.
2056    */
2057   private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
2058     if (hi.metaEntry == null && hi.hdfsEntry == null) {
2059       undeployRegions(hi);
2060       return;
2061     }
2062 
2063     // get assignment info and hregioninfo from meta.
2064     Get get = new Get(hi.getRegionName());
2065     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2066     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2067     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2068     // also get the locations of the replicas to close if the primary region is being closed
2069     if (hi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2070       int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2071       for (int i = 0; i < numReplicas; i++) {
2072         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
2073         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
2074       }
2075     }
2076     Result r = meta.get(get);
2077     RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2078     if (rl == null) {
2079       LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2080           " since meta does not have handle to reach it");
2081       return;
2082     }
2083     for (HRegionLocation h : rl.getRegionLocations()) {
2084       ServerName serverName = h.getServerName();
2085       if (serverName == null) {
2086         errors.reportError("Unable to close region "
2087             + hi.getRegionNameAsString() +  " because meta does not "
2088             + "have handle to reach it.");
2089         continue;
2090       }
2091       HRegionInfo hri = h.getRegionInfo();
2092       if (hri == null) {
2093         LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2094             + " because hbase:meta had invalid or missing "
2095             + HConstants.CATALOG_FAMILY_STR + ":"
2096             + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2097             + " qualifier value.");
2098         continue;
2099       }
2100       // close the region -- close files and remove assignment
2101       HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2102     }
2103   }
2104 
2105   private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
2106     KeeperException, InterruptedException {
2107     // If we are trying to fix the errors
2108     if (shouldFixAssignments()) {
2109       errors.print(msg);
2110       undeployRegions(hbi);
2111       setShouldRerun();
2112       HRegionInfo hri = hbi.getHdfsHRI();
2113       if (hri == null) {
2114         hri = hbi.metaEntry;
2115       }
2116       HBaseFsckRepair.fixUnassigned(admin, hri);
2117       HBaseFsckRepair.waitUntilAssigned(admin, hri);
2118 
2119       // also assign replicas if needed (do it only when this call operates on a primary replica)
2120       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) return;
2121       int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();
2122       for (int i = 1; i < replicationCount; i++) {
2123         hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2124         HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2125         if (h != null) {
2126           undeployRegions(h);
2127           //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2128           //in consistency checks
2129           h.setSkipChecks(true);
2130         }
2131         HBaseFsckRepair.fixUnassigned(admin, hri);
2132         HBaseFsckRepair.waitUntilAssigned(admin, hri);
2133       }
2134 
2135     }
2136   }
2137 
2138   /**
2139    * Check a single region for consistency and correct deployment.
2140    */
2141   private void checkRegionConsistency(final String key, final HbckInfo hbi)
2142   throws IOException, KeeperException, InterruptedException {
2143 
2144     if (hbi.isSkipChecks()) return;
2145     String descriptiveName = hbi.toString();
2146     boolean inMeta = hbi.metaEntry != null;
2147     // In case not checking HDFS, assume the region is on HDFS
2148     boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2149     boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
2150     boolean isDeployed = !hbi.deployedOn.isEmpty();
2151     boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
2152     boolean deploymentMatchesMeta =
2153       hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2154       hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
2155     boolean splitParent =
2156         inMeta && hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
2157     boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry.getTable());
2158     boolean recentlyModified = inHdfs &&
2159       hbi.getModTime() + timelag > System.currentTimeMillis();
2160 
2161     // ========== First the healthy cases =============
2162     if (hbi.containsOnlyHdfsEdits()) {
2163       return;
2164     }
2165     if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2166       return;
2167     } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2168       LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2169         "tabled that is not deployed");
2170       return;
2171     } else if (recentlyModified) {
2172       LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2173       return;
2174     }
2175     // ========== Cases where the region is not in hbase:meta =============
2176     else if (!inMeta && !inHdfs && !isDeployed) {
2177       // We shouldn't have record of this region at all then!
2178       assert false : "Entry for region with no data";
2179     } else if (!inMeta && !inHdfs && isDeployed) {
2180       errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2181           + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2182           "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2183       if (shouldFixAssignments()) {
2184         undeployRegions(hbi);
2185       }
2186 
2187     } else if (!inMeta && inHdfs && !isDeployed) {
2188       if (hbi.isMerged()) {
2189         // This region has already been merged, the remaining hdfs file will be
2190         // cleaned by CatalogJanitor later
2191         hbi.setSkipChecks(true);
2192         LOG.info("Region " + descriptiveName
2193             + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2194         return;
2195       }
2196       errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2197           + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2198           "or deployed on any region server");
2199       // restore region consistency of an adopted orphan
2200       if (shouldFixMeta()) {
2201         if (!hbi.isHdfsRegioninfoPresent()) {
2202           LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2203               +  " in table integrity repair phase if -fixHdfsOrphans was" +
2204               " used.");
2205           return;
2206         }
2207 
2208         HRegionInfo hri = hbi.getHdfsHRI();
2209         TableInfo tableInfo = tablesInfo.get(hri.getTable());
2210 
2211         for (HRegionInfo region : tableInfo.getRegionsFromMeta()) {
2212           if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2213               && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2214                 hri.getEndKey()) >= 0)
2215               && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2216             if(region.isSplit() || region.isOffline()) continue;
2217             Path regionDir = hbi.getHdfsRegionDir();
2218             FileSystem fs = regionDir.getFileSystem(getConf());
2219             List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2220             for (Path familyDir : familyDirs) {
2221               List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2222               for (Path referenceFilePath : referenceFilePaths) {
2223                 Path parentRegionDir =
2224                     StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2225                 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2226                   LOG.warn(hri + " start and stop keys are in the range of " + region
2227                       + ". The region might not be cleaned up from hdfs when region " + region
2228                       + " split failed. Hence deleting from hdfs.");
2229                   HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2230                     regionDir.getParent(), hri);
2231                   return;
2232                 }
2233               }
2234             }
2235           }
2236         }
2237         LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2238         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2239         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2240             admin.getClusterStatus().getServers(), numReplicas);
2241 
2242         tryAssignmentRepair(hbi, "Trying to reassign region...");
2243       }
2244 
2245     } else if (!inMeta && inHdfs && isDeployed) {
2246       errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2247           + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2248       debugLsr(hbi.getHdfsRegionDir());
2249       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2250         // for replicas, this means that we should undeploy the region (we would have
2251         // gone over the primaries and fixed meta holes in first phase under
2252         // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2253         // this stage unless unwanted replica)
2254         if (shouldFixAssignments()) {
2255           undeployRegionsForHbi(hbi);
2256         }
2257       }
2258       if (shouldFixMeta() && hbi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2259         if (!hbi.isHdfsRegioninfoPresent()) {
2260           LOG.error("This should have been repaired in table integrity repair phase");
2261           return;
2262         }
2263 
2264         LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2265         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2266         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2267             admin.getClusterStatus().getServers(), numReplicas);
2268         tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2269       }
2270 
2271     // ========== Cases where the region is in hbase:meta =============
2272     } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2273       // check whether this is an actual error, or just transient state where parent
2274       // is not cleaned
2275       if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2276         // check that split daughters are there
2277         HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2278         HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2279         if (infoA != null && infoB != null) {
2280           // we already processed or will process daughters. Move on, nothing to see here.
2281           hbi.setSkipChecks(true);
2282           return;
2283         }
2284       }
2285       errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2286           + descriptiveName + " is a split parent in META, in HDFS, "
2287           + "and not deployed on any region server. This could be transient.");
2288       if (shouldFixSplitParents()) {
2289         setShouldRerun();
2290         resetSplitParent(hbi);
2291       }
2292     } else if (inMeta && !inHdfs && !isDeployed) {
2293       errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2294           + descriptiveName + " found in META, but not in HDFS "
2295           + "or deployed on any region server.");
2296       if (shouldFixMeta()) {
2297         deleteMetaRegion(hbi);
2298       }
2299     } else if (inMeta && !inHdfs && isDeployed) {
2300       errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2301           + " found in META, but not in HDFS, " +
2302           "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2303       // We treat HDFS as ground truth.  Any information in meta is transient
2304       // and equivalent data can be regenerated.  So, lets unassign and remove
2305       // these problems from META.
2306       if (shouldFixAssignments()) {
2307         errors.print("Trying to fix unassigned region...");
2308         undeployRegions(hbi);
2309       }
2310       if (shouldFixMeta()) {
2311         // wait for it to complete
2312         deleteMetaRegion(hbi);
2313       }
2314     } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2315       errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2316           + " not deployed on any region server.");
2317       tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2318     } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2319       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2320           "Region " + descriptiveName + " should not be deployed according " +
2321           "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2322       if (shouldFixAssignments()) {
2323         errors.print("Trying to close the region " + descriptiveName);
2324         setShouldRerun();
2325         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2326       }
2327     } else if (inMeta && inHdfs && isMultiplyDeployed) {
2328       errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2329           + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2330           + " but is multiply assigned to region servers " +
2331           Joiner.on(", ").join(hbi.deployedOn));
2332       // If we are trying to fix the errors
2333       if (shouldFixAssignments()) {
2334         errors.print("Trying to fix assignment error...");
2335         setShouldRerun();
2336         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2337       }
2338     } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2339       errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2340           + descriptiveName + " listed in hbase:meta on region server " +
2341           hbi.metaEntry.regionServer + " but found on region server " +
2342           hbi.deployedOn.get(0));
2343       // If we are trying to fix the errors
2344       if (shouldFixAssignments()) {
2345         errors.print("Trying to fix assignment error...");
2346         setShouldRerun();
2347         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2348         HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2349       }
2350     } else {
2351       errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2352           " is in an unforeseen state:" +
2353           " inMeta=" + inMeta +
2354           " inHdfs=" + inHdfs +
2355           " isDeployed=" + isDeployed +
2356           " isMultiplyDeployed=" + isMultiplyDeployed +
2357           " deploymentMatchesMeta=" + deploymentMatchesMeta +
2358           " shouldBeDeployed=" + shouldBeDeployed);
2359     }
2360   }
2361 
2362   /**
2363    * Checks tables integrity. Goes over all regions and scans the tables.
2364    * Collects all the pieces for each table and checks if there are missing,
2365    * repeated or overlapping ones.
2366    * @throws IOException
2367    */
2368   SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2369     tablesInfo = new TreeMap<TableName,TableInfo> ();
2370     LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2371     for (HbckInfo hbi : regionInfoMap.values()) {
2372       // Check only valid, working regions
2373       if (hbi.metaEntry == null) {
2374         // this assumes that consistency check has run loadMetaEntry
2375         Path p = hbi.getHdfsRegionDir();
2376         if (p == null) {
2377           errors.report("No regioninfo in Meta or HDFS. " + hbi);
2378         }
2379 
2380         // TODO test.
2381         continue;
2382       }
2383       if (hbi.metaEntry.regionServer == null) {
2384         errors.detail("Skipping region because no region server: " + hbi);
2385         continue;
2386       }
2387       if (hbi.metaEntry.isOffline()) {
2388         errors.detail("Skipping region because it is offline: " + hbi);
2389         continue;
2390       }
2391       if (hbi.containsOnlyHdfsEdits()) {
2392         errors.detail("Skipping region because it only contains edits" + hbi);
2393         continue;
2394       }
2395 
2396       // Missing regionDir or over-deployment is checked elsewhere. Include
2397       // these cases in modTInfo, so we can evaluate those regions as part of
2398       // the region chain in META
2399       //if (hbi.foundRegionDir == null) continue;
2400       //if (hbi.deployedOn.size() != 1) continue;
2401       if (hbi.deployedOn.size() == 0) continue;
2402 
2403       // We should be safe here
2404       TableName tableName = hbi.metaEntry.getTable();
2405       TableInfo modTInfo = tablesInfo.get(tableName);
2406       if (modTInfo == null) {
2407         modTInfo = new TableInfo(tableName);
2408       }
2409       for (ServerName server : hbi.deployedOn) {
2410         modTInfo.addServer(server);
2411       }
2412 
2413       if (!hbi.isSkipChecks()) {
2414         modTInfo.addRegionInfo(hbi);
2415       }
2416 
2417       tablesInfo.put(tableName, modTInfo);
2418     }
2419 
2420     loadTableInfosForTablesWithNoRegion();
2421 
2422     logParallelMerge();
2423     for (TableInfo tInfo : tablesInfo.values()) {
2424       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2425       if (!tInfo.checkRegionChain(handler)) {
2426         errors.report("Found inconsistency in table " + tInfo.getName());
2427       }
2428     }
2429     return tablesInfo;
2430   }
2431 
2432   /** Loads table info's for tables that may not have been included, since there are no
2433    * regions reported for the table, but table dir is there in hdfs
2434    */
2435   private void loadTableInfosForTablesWithNoRegion() throws IOException {
2436     Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2437     for (HTableDescriptor htd : allTables.values()) {
2438       if (checkMetaOnly && !htd.isMetaTable()) {
2439         continue;
2440       }
2441 
2442       TableName tableName = htd.getTableName();
2443       if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2444         TableInfo tableInfo = new TableInfo(tableName);
2445         tableInfo.htds.add(htd);
2446         tablesInfo.put(htd.getTableName(), tableInfo);
2447       }
2448     }
2449   }
2450 
2451   /**
2452    * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2453    * @return number of file move fixes done to merge regions.
2454    */
2455   public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2456     int fileMoves = 0;
2457     String thread = Thread.currentThread().getName();
2458     LOG.debug("[" + thread + "] Contained region dir after close and pause");
2459     debugLsr(contained.getHdfsRegionDir());
2460 
2461     // rename the contained into the container.
2462     FileSystem fs = targetRegionDir.getFileSystem(getConf());
2463     FileStatus[] dirs = null;
2464     try {
2465       dirs = fs.listStatus(contained.getHdfsRegionDir());
2466     } catch (FileNotFoundException fnfe) {
2467       // region we are attempting to merge in is not present!  Since this is a merge, there is
2468       // no harm skipping this region if it does not exist.
2469       if (!fs.exists(contained.getHdfsRegionDir())) {
2470         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2471             + " is missing. Assuming already sidelined or moved.");
2472       } else {
2473         sidelineRegionDir(fs, contained);
2474       }
2475       return fileMoves;
2476     }
2477 
2478     if (dirs == null) {
2479       if (!fs.exists(contained.getHdfsRegionDir())) {
2480         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2481             + " already sidelined.");
2482       } else {
2483         sidelineRegionDir(fs, contained);
2484       }
2485       return fileMoves;
2486     }
2487 
2488     for (FileStatus cf : dirs) {
2489       Path src = cf.getPath();
2490       Path dst =  new Path(targetRegionDir, src.getName());
2491 
2492       if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2493         // do not copy the old .regioninfo file.
2494         continue;
2495       }
2496 
2497       if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2498         // do not copy the .oldlogs files
2499         continue;
2500       }
2501 
2502       LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2503       // FileSystem.rename is inconsistent with directories -- if the
2504       // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2505       // it moves the src into the dst dir resulting in (foo/a/b).  If
2506       // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2507       for (FileStatus hfile : fs.listStatus(src)) {
2508         boolean success = fs.rename(hfile.getPath(), dst);
2509         if (success) {
2510           fileMoves++;
2511         }
2512       }
2513       LOG.debug("[" + thread + "] Sideline directory contents:");
2514       debugLsr(targetRegionDir);
2515     }
2516 
2517     // if all success.
2518     sidelineRegionDir(fs, contained);
2519     LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2520         getSidelineDir());
2521     debugLsr(contained.getHdfsRegionDir());
2522 
2523     return fileMoves;
2524   }
2525 
2526 
2527   static class WorkItemOverlapMerge implements Callable<Void> {
2528     private TableIntegrityErrorHandler handler;
2529     Collection<HbckInfo> overlapgroup;
2530 
2531     WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2532       this.handler = handler;
2533       this.overlapgroup = overlapgroup;
2534     }
2535 
2536     @Override
2537     public Void call() throws Exception {
2538       handler.handleOverlapGroup(overlapgroup);
2539       return null;
2540     }
2541   };
2542 
2543 
2544   /**
2545    * Maintain information about a particular table.
2546    */
2547   public class TableInfo {
2548     TableName tableName;
2549     TreeSet <ServerName> deployedOn;
2550 
2551     // backwards regions
2552     final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2553 
2554     // sidelined big overlapped regions
2555     final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2556 
2557     // region split calculator
2558     final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2559 
2560     // Histogram of different HTableDescriptors found.  Ideally there is only one!
2561     final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2562 
2563     // key = start split, values = set of splits in problem group
2564     final Multimap<byte[], HbckInfo> overlapGroups =
2565       TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2566 
2567     // list of regions derived from meta entries.
2568     private ImmutableList<HRegionInfo> regionsFromMeta = null;
2569 
2570     TableInfo(TableName name) {
2571       this.tableName = name;
2572       deployedOn = new TreeSet <ServerName>();
2573     }
2574 
2575     /**
2576      * @return descriptor common to all regions.  null if are none or multiple!
2577      */
2578     private HTableDescriptor getHTD() {
2579       if (htds.size() == 1) {
2580         return (HTableDescriptor)htds.toArray()[0];
2581       } else {
2582         LOG.error("None/Multiple table descriptors found for table '"
2583           + tableName + "' regions: " + htds);
2584       }
2585       return null;
2586     }
2587 
2588     public void addRegionInfo(HbckInfo hir) {
2589       if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2590         // end key is absolute end key, just add it.
2591         // ignore replicas other than primary for these checks
2592         if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2593         return;
2594       }
2595 
2596       // if not the absolute end key, check for cycle
2597       if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2598         errors.reportError(
2599             ERROR_CODE.REGION_CYCLE,
2600             String.format("The endkey for this region comes before the "
2601                 + "startkey, startkey=%s, endkey=%s",
2602                 Bytes.toStringBinary(hir.getStartKey()),
2603                 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2604         backwards.add(hir);
2605         return;
2606       }
2607 
2608       // main case, add to split calculator
2609       // ignore replicas other than primary for these checks
2610       if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2611     }
2612 
2613     public void addServer(ServerName server) {
2614       this.deployedOn.add(server);
2615     }
2616 
2617     public TableName getName() {
2618       return tableName;
2619     }
2620 
2621     public int getNumRegions() {
2622       return sc.getStarts().size() + backwards.size();
2623     }
2624 
2625     public synchronized ImmutableList<HRegionInfo> getRegionsFromMeta() {
2626       // lazy loaded, synchronized to ensure a single load
2627       if (regionsFromMeta == null) {
2628         List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
2629         for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2630           if (tableName.equals(h.getTableName())) {
2631             if (h.metaEntry != null) {
2632               regions.add((HRegionInfo) h.metaEntry);
2633             }
2634           }
2635         }
2636         regionsFromMeta = Ordering.natural().immutableSortedCopy(regions);
2637       }
2638       
2639       return regionsFromMeta;
2640     }
2641     
2642     private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2643       ErrorReporter errors;
2644 
2645       IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2646         this.errors = errors;
2647         setTableInfo(ti);
2648       }
2649 
2650       @Override
2651       public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2652         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2653             "First region should start with an empty key.  You need to "
2654             + " create a new region and regioninfo in HDFS to plug the hole.",
2655             getTableInfo(), hi);
2656       }
2657 
2658       @Override
2659       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2660         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2661             "Last region should end with an empty key. You need to "
2662                 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2663       }
2664 
2665       @Override
2666       public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2667         errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2668             "Region has the same start and end key.", getTableInfo(), hi);
2669       }
2670 
2671       @Override
2672       public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2673         byte[] key = r1.getStartKey();
2674         // dup start key
2675         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2676             "Multiple regions have the same startkey: "
2677             + Bytes.toStringBinary(key), getTableInfo(), r1);
2678         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2679             "Multiple regions have the same startkey: "
2680             + Bytes.toStringBinary(key), getTableInfo(), r2);
2681       }
2682 
2683       @Override
2684       public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2685         errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2686             "There is an overlap in the region chain.",
2687             getTableInfo(), hi1, hi2);
2688       }
2689 
2690       @Override
2691       public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2692         errors.reportError(
2693             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2694             "There is a hole in the region chain between "
2695                 + Bytes.toStringBinary(holeStart) + " and "
2696                 + Bytes.toStringBinary(holeStop)
2697                 + ".  You need to create a new .regioninfo and region "
2698                 + "dir in hdfs to plug the hole.");
2699       }
2700     };
2701 
2702     /**
2703      * This handler fixes integrity errors from hdfs information.  There are
2704      * basically three classes of integrity problems 1) holes, 2) overlaps, and
2705      * 3) invalid regions.
2706      *
2707      * This class overrides methods that fix holes and the overlap group case.
2708      * Individual cases of particular overlaps are handled by the general
2709      * overlap group merge repair case.
2710      *
2711      * If hbase is online, this forces regions offline before doing merge
2712      * operations.
2713      */
2714     private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2715       Configuration conf;
2716 
2717       boolean fixOverlaps = true;
2718 
2719       HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2720           boolean fixHoles, boolean fixOverlaps) {
2721         super(ti, errors);
2722         this.conf = conf;
2723         this.fixOverlaps = fixOverlaps;
2724         // TODO properly use fixHoles
2725       }
2726 
2727       /**
2728        * This is a special case hole -- when the first region of a table is
2729        * missing from META, HBase doesn't acknowledge the existance of the
2730        * table.
2731        */
2732       @Override
2733       public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2734         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2735             "First region should start with an empty key.  Creating a new " +
2736             "region and regioninfo in HDFS to plug the hole.",
2737             getTableInfo(), next);
2738         HTableDescriptor htd = getTableInfo().getHTD();
2739         // from special EMPTY_START_ROW to next region's startKey
2740         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2741             HConstants.EMPTY_START_ROW, next.getStartKey());
2742 
2743         // TODO test
2744         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2745         LOG.info("Table region start key was not empty.  Created new empty region: "
2746             + newRegion + " " +region);
2747         fixes++;
2748       }
2749 
2750       @Override
2751       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2752         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2753             "Last region should end with an empty key.  Creating a new "
2754                 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2755         HTableDescriptor htd = getTableInfo().getHTD();
2756         // from curEndKey to EMPTY_START_ROW
2757         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2758             HConstants.EMPTY_START_ROW);
2759 
2760         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2761         LOG.info("Table region end key was not empty.  Created new empty region: " + newRegion
2762             + " " + region);
2763         fixes++;
2764       }
2765 
2766       /**
2767        * There is a hole in the hdfs regions that violates the table integrity
2768        * rules.  Create a new empty region that patches the hole.
2769        */
2770       @Override
2771       public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2772         errors.reportError(
2773             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2774             "There is a hole in the region chain between "
2775                 + Bytes.toStringBinary(holeStartKey) + " and "
2776                 + Bytes.toStringBinary(holeStopKey)
2777                 + ".  Creating a new regioninfo and region "
2778                 + "dir in hdfs to plug the hole.");
2779         HTableDescriptor htd = getTableInfo().getHTD();
2780         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2781         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2782         LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2783         fixes++;
2784       }
2785 
2786       /**
2787        * This takes set of overlapping regions and merges them into a single
2788        * region.  This covers cases like degenerate regions, shared start key,
2789        * general overlaps, duplicate ranges, and partial overlapping regions.
2790        *
2791        * Cases:
2792        * - Clean regions that overlap
2793        * - Only .oldlogs regions (can't find start/stop range, or figure out)
2794        *
2795        * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
2796        */
2797       @Override
2798       public void handleOverlapGroup(Collection<HbckInfo> overlap)
2799           throws IOException {
2800         Preconditions.checkNotNull(overlap);
2801         Preconditions.checkArgument(overlap.size() >0);
2802 
2803         if (!this.fixOverlaps) {
2804           LOG.warn("Not attempting to repair overlaps.");
2805           return;
2806         }
2807 
2808         if (overlap.size() > maxMerge) {
2809           LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2810             "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2811           if (sidelineBigOverlaps) {
2812             // we only sideline big overlapped groups that exceeds the max number of regions to merge
2813             sidelineBigOverlaps(overlap);
2814           }
2815           return;
2816         }
2817 
2818         mergeOverlaps(overlap);
2819       }
2820 
2821       void mergeOverlaps(Collection<HbckInfo> overlap)
2822           throws IOException {
2823         String thread = Thread.currentThread().getName();
2824         LOG.info("== [" + thread + "] Merging regions into one region: "
2825           + Joiner.on(",").join(overlap));
2826         // get the min / max range and close all concerned regions
2827         Pair<byte[], byte[]> range = null;
2828         for (HbckInfo hi : overlap) {
2829           if (range == null) {
2830             range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2831           } else {
2832             if (RegionSplitCalculator.BYTES_COMPARATOR
2833                 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2834               range.setFirst(hi.getStartKey());
2835             }
2836             if (RegionSplitCalculator.BYTES_COMPARATOR
2837                 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2838               range.setSecond(hi.getEndKey());
2839             }
2840           }
2841           // need to close files so delete can happen.
2842           LOG.debug("[" + thread + "] Closing region before moving data around: " +  hi);
2843           LOG.debug("[" + thread + "] Contained region dir before close");
2844           debugLsr(hi.getHdfsRegionDir());
2845           try {
2846             LOG.info("[" + thread + "] Closing region: " + hi);
2847             closeRegion(hi);
2848           } catch (IOException ioe) {
2849             LOG.warn("[" + thread + "] Was unable to close region " + hi
2850               + ".  Just continuing... ", ioe);
2851           } catch (InterruptedException e) {
2852             LOG.warn("[" + thread + "] Was unable to close region " + hi
2853               + ".  Just continuing... ", e);
2854           }
2855 
2856           try {
2857             LOG.info("[" + thread + "] Offlining region: " + hi);
2858             offline(hi.getRegionName());
2859           } catch (IOException ioe) {
2860             LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2861               + ".  Just continuing... ", ioe);
2862           }
2863         }
2864 
2865         // create new empty container region.
2866         HTableDescriptor htd = getTableInfo().getHTD();
2867         // from start key to end Key
2868         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2869             range.getSecond());
2870         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2871         LOG.info("[" + thread + "] Created new empty container region: " +
2872             newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2873         debugLsr(region.getRegionFileSystem().getRegionDir());
2874 
2875         // all target regions are closed, should be able to safely cleanup.
2876         boolean didFix= false;
2877         Path target = region.getRegionFileSystem().getRegionDir();
2878         for (HbckInfo contained : overlap) {
2879           LOG.info("[" + thread + "] Merging " + contained  + " into " + target );
2880           int merges = mergeRegionDirs(target, contained);
2881           if (merges > 0) {
2882             didFix = true;
2883           }
2884         }
2885         if (didFix) {
2886           fixes++;
2887         }
2888       }
2889 
2890       /**
2891        * Sideline some regions in a big overlap group so that it
2892        * will have fewer regions, and it is easier to merge them later on.
2893        *
2894        * @param bigOverlap the overlapped group with regions more than maxMerge
2895        * @throws IOException
2896        */
2897       void sidelineBigOverlaps(
2898           Collection<HbckInfo> bigOverlap) throws IOException {
2899         int overlapsToSideline = bigOverlap.size() - maxMerge;
2900         if (overlapsToSideline > maxOverlapsToSideline) {
2901           overlapsToSideline = maxOverlapsToSideline;
2902         }
2903         List<HbckInfo> regionsToSideline =
2904           RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2905         FileSystem fs = FileSystem.get(conf);
2906         for (HbckInfo regionToSideline: regionsToSideline) {
2907           try {
2908             LOG.info("Closing region: " + regionToSideline);
2909             closeRegion(regionToSideline);
2910           } catch (IOException ioe) {
2911             LOG.warn("Was unable to close region " + regionToSideline
2912               + ".  Just continuing... ", ioe);
2913           } catch (InterruptedException e) {
2914             LOG.warn("Was unable to close region " + regionToSideline
2915               + ".  Just continuing... ", e);
2916           }
2917 
2918           try {
2919             LOG.info("Offlining region: " + regionToSideline);
2920             offline(regionToSideline.getRegionName());
2921           } catch (IOException ioe) {
2922             LOG.warn("Unable to offline region from master: " + regionToSideline
2923               + ".  Just continuing... ", ioe);
2924           }
2925 
2926           LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2927           Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2928           if (sidelineRegionDir != null) {
2929             sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2930             LOG.info("After sidelined big overlapped region: "
2931               + regionToSideline.getRegionNameAsString()
2932               + " to " + sidelineRegionDir.toString());
2933             fixes++;
2934           }
2935         }
2936       }
2937     }
2938 
2939     /**
2940      * Check the region chain (from META) of this table.  We are looking for
2941      * holes, overlaps, and cycles.
2942      * @return false if there are errors
2943      * @throws IOException
2944      */
2945     public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2946       // When table is disabled no need to check for the region chain. Some of the regions
2947       // accidently if deployed, this below code might report some issues like missing start
2948       // or end regions or region hole in chain and may try to fix which is unwanted.
2949       if (isTableDisabled(this.tableName)) {
2950         return true;
2951       }
2952       int originalErrorsCount = errors.getErrorList().size();
2953       Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2954       SortedSet<byte[]> splits = sc.getSplits();
2955 
2956       byte[] prevKey = null;
2957       byte[] problemKey = null;
2958 
2959       if (splits.size() == 0) {
2960         // no region for this table
2961         handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
2962       }
2963 
2964       for (byte[] key : splits) {
2965         Collection<HbckInfo> ranges = regions.get(key);
2966         if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2967           for (HbckInfo rng : ranges) {
2968             handler.handleRegionStartKeyNotEmpty(rng);
2969           }
2970         }
2971 
2972         // check for degenerate ranges
2973         for (HbckInfo rng : ranges) {
2974           // special endkey case converts '' to null
2975           byte[] endKey = rng.getEndKey();
2976           endKey = (endKey.length == 0) ? null : endKey;
2977           if (Bytes.equals(rng.getStartKey(),endKey)) {
2978             handler.handleDegenerateRegion(rng);
2979           }
2980         }
2981 
2982         if (ranges.size() == 1) {
2983           // this split key is ok -- no overlap, not a hole.
2984           if (problemKey != null) {
2985             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2986           }
2987           problemKey = null; // fell through, no more problem.
2988         } else if (ranges.size() > 1) {
2989           // set the new problem key group name, if already have problem key, just
2990           // keep using it.
2991           if (problemKey == null) {
2992             // only for overlap regions.
2993             LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2994             problemKey = key;
2995           }
2996           overlapGroups.putAll(problemKey, ranges);
2997 
2998           // record errors
2999           ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
3000           //  this dumb and n^2 but this shouldn't happen often
3001           for (HbckInfo r1 : ranges) {
3002             if (r1.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
3003             subRange.remove(r1);
3004             for (HbckInfo r2 : subRange) {
3005               if (r2.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
3006               if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
3007                 handler.handleDuplicateStartKeys(r1,r2);
3008               } else {
3009                 // overlap
3010                 handler.handleOverlapInRegionChain(r1, r2);
3011               }
3012             }
3013           }
3014 
3015         } else if (ranges.size() == 0) {
3016           if (problemKey != null) {
3017             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3018           }
3019           problemKey = null;
3020 
3021           byte[] holeStopKey = sc.getSplits().higher(key);
3022           // if higher key is null we reached the top.
3023           if (holeStopKey != null) {
3024             // hole
3025             handler.handleHoleInRegionChain(key, holeStopKey);
3026           }
3027         }
3028         prevKey = key;
3029       }
3030 
3031       // When the last region of a table is proper and having an empty end key, 'prevKey'
3032       // will be null.
3033       if (prevKey != null) {
3034         handler.handleRegionEndKeyNotEmpty(prevKey);
3035       }
3036 
3037       // TODO fold this into the TableIntegrityHandler
3038       if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
3039         boolean ok = handleOverlapsParallel(handler, prevKey);
3040         if (!ok) {
3041           return false;
3042         }
3043       } else {
3044         for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3045           handler.handleOverlapGroup(overlap);
3046         }
3047       }
3048 
3049       if (details) {
3050         // do full region split map dump
3051         errors.print("---- Table '"  +  this.tableName
3052             + "': region split map");
3053         dump(splits, regions);
3054         errors.print("---- Table '"  +  this.tableName
3055             + "': overlap groups");
3056         dumpOverlapProblems(overlapGroups);
3057         errors.print("There are " + overlapGroups.keySet().size()
3058             + " overlap groups with " + overlapGroups.size()
3059             + " overlapping regions");
3060       }
3061       if (!sidelinedRegions.isEmpty()) {
3062         LOG.warn("Sidelined big overlapped regions, please bulk load them!");
3063         errors.print("---- Table '"  +  this.tableName
3064             + "': sidelined big overlapped regions");
3065         dumpSidelinedRegions(sidelinedRegions);
3066       }
3067       return errors.getErrorList().size() == originalErrorsCount;
3068     }
3069 
3070     private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
3071         throws IOException {
3072       // we parallelize overlap handler for the case we have lots of groups to fix.  We can
3073       // safely assume each group is independent.
3074       List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
3075       List<Future<Void>> rets;
3076       for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3077         //
3078         merges.add(new WorkItemOverlapMerge(overlap, handler));
3079       }
3080       try {
3081         rets = executor.invokeAll(merges);
3082       } catch (InterruptedException e) {
3083         LOG.error("Overlap merges were interrupted", e);
3084         return false;
3085       }
3086       for(int i=0; i<merges.size(); i++) {
3087         WorkItemOverlapMerge work = merges.get(i);
3088         Future<Void> f = rets.get(i);
3089         try {
3090           f.get();
3091         } catch(ExecutionException e) {
3092           LOG.warn("Failed to merge overlap group" + work, e.getCause());
3093         } catch (InterruptedException e) {
3094           LOG.error("Waiting for overlap merges was interrupted", e);
3095           return false;
3096         }
3097       }
3098       return true;
3099     }
3100 
3101     /**
3102      * This dumps data in a visually reasonable way for visual debugging
3103      *
3104      * @param splits
3105      * @param regions
3106      */
3107     void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
3108       // we display this way because the last end key should be displayed as well.
3109       StringBuilder sb = new StringBuilder();
3110       for (byte[] k : splits) {
3111         sb.setLength(0); // clear out existing buffer, if any.
3112         sb.append(Bytes.toStringBinary(k) + ":\t");
3113         for (HbckInfo r : regions.get(k)) {
3114           sb.append("[ "+ r.toString() + ", "
3115               + Bytes.toStringBinary(r.getEndKey())+ "]\t");
3116         }
3117         errors.print(sb.toString());
3118       }
3119     }
3120   }
3121 
3122   public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
3123     // we display this way because the last end key should be displayed as
3124     // well.
3125     for (byte[] k : regions.keySet()) {
3126       errors.print(Bytes.toStringBinary(k) + ":");
3127       for (HbckInfo r : regions.get(k)) {
3128         errors.print("[ " + r.toString() + ", "
3129             + Bytes.toStringBinary(r.getEndKey()) + "]");
3130       }
3131       errors.print("----");
3132     }
3133   }
3134 
3135   public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
3136     for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
3137       TableName tableName = entry.getValue().getTableName();
3138       Path path = entry.getKey();
3139       errors.print("This sidelined region dir should be bulk loaded: "
3140         + path.toString());
3141       errors.print("Bulk load command looks like: "
3142         + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
3143         + path.toUri().getPath() + " "+ tableName);
3144     }
3145   }
3146 
3147   public Multimap<byte[], HbckInfo> getOverlapGroups(
3148       TableName table) {
3149     TableInfo ti = tablesInfo.get(table);
3150     return ti.overlapGroups;
3151   }
3152 
3153   /**
3154    * Return a list of user-space table names whose metadata have not been
3155    * modified in the last few milliseconds specified by timelag
3156    * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
3157    * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
3158    * milliseconds specified by timelag, then the table is a candidate to be returned.
3159    * @return tables that have not been modified recently
3160    * @throws IOException if an error is encountered
3161    */
3162   HTableDescriptor[] getTables(AtomicInteger numSkipped) {
3163     List<TableName> tableNames = new ArrayList<TableName>();
3164     long now = System.currentTimeMillis();
3165 
3166     for (HbckInfo hbi : regionInfoMap.values()) {
3167       MetaEntry info = hbi.metaEntry;
3168 
3169       // if the start key is zero, then we have found the first region of a table.
3170       // pick only those tables that were not modified in the last few milliseconds.
3171       if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
3172         if (info.modTime + timelag < now) {
3173           tableNames.add(info.getTable());
3174         } else {
3175           numSkipped.incrementAndGet(); // one more in-flux table
3176         }
3177       }
3178     }
3179     return getHTableDescriptors(tableNames);
3180   }
3181 
3182   HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
3183     HTableDescriptor[] htd = new HTableDescriptor[0];
3184     Admin admin = null;
3185     try {
3186       LOG.info("getHTableDescriptors == tableNames => " + tableNames);
3187       admin = new HBaseAdmin(getConf());
3188       htd = admin.getTableDescriptorsByTableName(tableNames);
3189     } catch (IOException e) {
3190       LOG.debug("Exception getting table descriptors", e);
3191     } finally {
3192       if (admin != null) {
3193         try {
3194           admin.close();
3195         } catch (IOException e) {
3196           LOG.debug("Exception closing HBaseAdmin", e);
3197         }
3198       }
3199     }
3200     return htd;
3201   }
3202 
3203   /**
3204    * Gets the entry in regionInfo corresponding to the the given encoded
3205    * region name. If the region has not been seen yet, a new entry is added
3206    * and returned.
3207    */
3208   private synchronized HbckInfo getOrCreateInfo(String name) {
3209     HbckInfo hbi = regionInfoMap.get(name);
3210     if (hbi == null) {
3211       hbi = new HbckInfo(null);
3212       regionInfoMap.put(name, hbi);
3213     }
3214     return hbi;
3215   }
3216 
3217   private void checkAndFixTableLocks() throws IOException {
3218     TableLockChecker checker = new TableLockChecker(createZooKeeperWatcher(), errors);
3219     checker.checkTableLocks();
3220 
3221     if (this.fixTableLocks) {
3222       checker.fixExpiredTableLocks();
3223     }
3224   }
3225 
3226   /**
3227     * Check values in regionInfo for hbase:meta
3228     * Check if zero or more than one regions with hbase:meta are found.
3229     * If there are inconsistencies (i.e. zero or more than one regions
3230     * pretend to be holding the hbase:meta) try to fix that and report an error.
3231     * @throws IOException from HBaseFsckRepair functions
3232     * @throws KeeperException
3233     * @throws InterruptedException
3234     */
3235   boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3236     Map<Integer, HbckInfo> metaRegions = new HashMap<Integer, HbckInfo>();
3237     for (HbckInfo value : regionInfoMap.values()) {
3238       if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3239         metaRegions.put(value.getReplicaId(), value);
3240       }
3241     }
3242     int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME)
3243         .getRegionReplication();
3244     boolean noProblem = true;
3245     // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
3246     // Check the deployed servers. It should be exactly one server for each replica.
3247     for (int i = 0; i < metaReplication; i++) {
3248       HbckInfo metaHbckInfo = metaRegions.remove(i);
3249       List<ServerName> servers = new ArrayList<ServerName>();
3250       if (metaHbckInfo != null) {
3251         servers = metaHbckInfo.deployedOn;
3252       }
3253       if (servers.size() != 1) {
3254         noProblem = false;
3255         if (servers.size() == 0) {
3256           assignMetaReplica(i);
3257         } else if (servers.size() > 1) {
3258           errors
3259           .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
3260                        metaHbckInfo.getReplicaId() + " is found on more than one region.");
3261           if (shouldFixAssignments()) {
3262             errors.print("Trying to fix a problem with hbase:meta, replicaId " +
3263                          metaHbckInfo.getReplicaId() +"..");
3264             setShouldRerun();
3265             // try fix it (treat is a dupe assignment)
3266             HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers);
3267           }
3268         }
3269       }
3270     }
3271     // unassign whatever is remaining in metaRegions. They are excess replicas.
3272     for (Map.Entry<Integer, HbckInfo> entry : metaRegions.entrySet()) {
3273       noProblem = false;
3274       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
3275           "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
3276           ", deployed " + metaRegions.size());
3277       if (shouldFixAssignments()) {
3278         errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
3279             " of hbase:meta..");
3280         setShouldRerun();
3281         unassignMetaReplica(entry.getValue());
3282       }
3283     }
3284     // if noProblem is false, rerun hbck with hopefully fixed META
3285     // if noProblem is true, no errors, so continue normally
3286     return noProblem;
3287   }
3288 
3289   private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException,
3290   KeeperException {
3291     undeployRegions(hi);
3292     ZooKeeperWatcher zkw = createZooKeeperWatcher();
3293     ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId()));
3294   }
3295 
3296   private void assignMetaReplica(int replicaId)
3297       throws IOException, KeeperException, InterruptedException {
3298     errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
3299         replicaId +" is not found on any region.");
3300     if (shouldFixAssignments()) {
3301       errors.print("Trying to fix a problem with hbase:meta..");
3302       setShouldRerun();
3303       // try to fix it (treat it as unassigned region)
3304       HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
3305           HRegionInfo.FIRST_META_REGIONINFO, replicaId);
3306       HBaseFsckRepair.fixUnassigned(admin, h);
3307       HBaseFsckRepair.waitUntilAssigned(admin, h);
3308     }
3309   }
3310 
3311   /**
3312    * Scan hbase:meta, adding all regions found to the regionInfo map.
3313    * @throws IOException if an error is encountered
3314    */
3315   boolean loadMetaEntries() throws IOException {
3316     MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() {
3317       int countRecord = 1;
3318 
3319       // comparator to sort KeyValues with latest modtime
3320       final Comparator<Cell> comp = new Comparator<Cell>() {
3321         @Override
3322         public int compare(Cell k1, Cell k2) {
3323           return (int)(k1.getTimestamp() - k2.getTimestamp());
3324         }
3325       };
3326 
3327       @Override
3328       public boolean visit(Result result) throws IOException {
3329         try {
3330 
3331           // record the latest modification of this META record
3332           long ts =  Collections.max(result.listCells(), comp).getTimestamp();
3333           RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
3334           if (rl == null) {
3335             emptyRegionInfoQualifiers.add(result);
3336             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3337               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3338             return true;
3339           }
3340           ServerName sn = null;
3341           if (rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID) == null ||
3342               rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) {
3343             emptyRegionInfoQualifiers.add(result);
3344             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3345               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3346             return true;
3347           }
3348           HRegionInfo hri = rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo();
3349           if (!(isTableIncluded(hri.getTable())
3350               || hri.isMetaRegion())) {
3351             return true;
3352           }
3353           PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
3354           for (HRegionLocation h : rl.getRegionLocations()) {
3355             if (h == null || h.getRegionInfo() == null) {
3356               continue;
3357             }
3358             sn = h.getServerName();
3359             hri = h.getRegionInfo();
3360 
3361             MetaEntry m = null;
3362             if (hri.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
3363               m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3364             } else {
3365               m = new MetaEntry(hri, sn, ts, null, null);
3366             }
3367             HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3368             if (previous == null) {
3369               regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3370             } else if (previous.metaEntry == null) {
3371               previous.metaEntry = m;
3372             } else {
3373               throw new IOException("Two entries in hbase:meta are same " + previous);
3374             }
3375           }
3376           PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result);
3377           for (HRegionInfo mergeRegion : new HRegionInfo[] {
3378               mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3379             if (mergeRegion != null) {
3380               // This region is already been merged
3381               HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3382               hbInfo.setMerged(true);
3383             }
3384           }
3385 
3386           // show proof of progress to the user, once for every 100 records.
3387           if (countRecord % 100 == 0) {
3388             errors.progress();
3389           }
3390           countRecord++;
3391           return true;
3392         } catch (RuntimeException e) {
3393           LOG.error("Result=" + result);
3394           throw e;
3395         }
3396       }
3397     };
3398     if (!checkMetaOnly) {
3399       // Scan hbase:meta to pick up user regions
3400       MetaTableAccessor.fullScanRegions(connection, visitor);
3401     }
3402 
3403     errors.print("");
3404     return true;
3405   }
3406 
3407   /**
3408    * Stores the regioninfo entries scanned from META
3409    */
3410   static class MetaEntry extends HRegionInfo {
3411     ServerName regionServer;   // server hosting this region
3412     long modTime;          // timestamp of most recent modification metadata
3413     HRegionInfo splitA, splitB; //split daughters
3414 
3415     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
3416       this(rinfo, regionServer, modTime, null, null);
3417     }
3418 
3419     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
3420         HRegionInfo splitA, HRegionInfo splitB) {
3421       super(rinfo);
3422       this.regionServer = regionServer;
3423       this.modTime = modTime;
3424       this.splitA = splitA;
3425       this.splitB = splitB;
3426     }
3427 
3428     @Override
3429     public boolean equals(Object o) {
3430       boolean superEq = super.equals(o);
3431       if (!superEq) {
3432         return superEq;
3433       }
3434 
3435       MetaEntry me = (MetaEntry) o;
3436       if (!regionServer.equals(me.regionServer)) {
3437         return false;
3438       }
3439       return (modTime == me.modTime);
3440     }
3441 
3442     @Override
3443     public int hashCode() {
3444       int hash = Arrays.hashCode(getRegionName());
3445       hash ^= getRegionId();
3446       hash ^= Arrays.hashCode(getStartKey());
3447       hash ^= Arrays.hashCode(getEndKey());
3448       hash ^= Boolean.valueOf(isOffline()).hashCode();
3449       hash ^= getTable().hashCode();
3450       if (regionServer != null) {
3451         hash ^= regionServer.hashCode();
3452       }
3453       hash ^= modTime;
3454       return hash;
3455     }
3456   }
3457 
3458   /**
3459    * Stores the regioninfo entries from HDFS
3460    */
3461   static class HdfsEntry {
3462     HRegionInfo hri;
3463     Path hdfsRegionDir = null;
3464     long hdfsRegionDirModTime  = 0;
3465     boolean hdfsRegioninfoFilePresent = false;
3466     boolean hdfsOnlyEdits = false;
3467   }
3468 
3469   /**
3470    * Stores the regioninfo retrieved from Online region servers.
3471    */
3472   static class OnlineEntry {
3473     HRegionInfo hri;
3474     ServerName hsa;
3475 
3476     @Override
3477     public String toString() {
3478       return hsa.toString() + ";" + hri.getRegionNameAsString();
3479     }
3480   }
3481 
3482   /**
3483    * Maintain information about a particular region.  It gathers information
3484    * from three places -- HDFS, META, and region servers.
3485    */
3486   public static class HbckInfo implements KeyRange {
3487     private MetaEntry metaEntry = null; // info in META
3488     private HdfsEntry hdfsEntry = null; // info in HDFS
3489     private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
3490     private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
3491     private boolean skipChecks = false; // whether to skip further checks to this region info.
3492     private boolean isMerged = false;// whether this region has already been merged into another one
3493     private int deployedReplicaId = HRegionInfo.DEFAULT_REPLICA_ID;
3494     private HRegionInfo primaryHRIForDeployedReplica = null;
3495 
3496     HbckInfo(MetaEntry metaEntry) {
3497       this.metaEntry = metaEntry;
3498     }
3499 
3500     public int getReplicaId() {
3501       if (metaEntry != null) return metaEntry.getReplicaId();
3502       return deployedReplicaId;
3503     }
3504 
3505     public synchronized void addServer(HRegionInfo hri, ServerName server) {
3506       OnlineEntry rse = new OnlineEntry() ;
3507       rse.hri = hri;
3508       rse.hsa = server;
3509       this.deployedEntries.add(rse);
3510       this.deployedOn.add(server);
3511       // save the replicaId that we see deployed in the cluster
3512       this.deployedReplicaId = hri.getReplicaId();
3513       this.primaryHRIForDeployedReplica =
3514           RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
3515     }
3516 
3517     @Override
3518     public synchronized String toString() {
3519       StringBuilder sb = new StringBuilder();
3520       sb.append("{ meta => ");
3521       sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3522       sb.append( ", hdfs => " + getHdfsRegionDir());
3523       sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3524       sb.append( ", replicaId => " + getReplicaId());
3525       sb.append(" }");
3526       return sb.toString();
3527     }
3528 
3529     @Override
3530     public byte[] getStartKey() {
3531       if (this.metaEntry != null) {
3532         return this.metaEntry.getStartKey();
3533       } else if (this.hdfsEntry != null) {
3534         return this.hdfsEntry.hri.getStartKey();
3535       } else {
3536         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3537         return null;
3538       }
3539     }
3540 
3541     @Override
3542     public byte[] getEndKey() {
3543       if (this.metaEntry != null) {
3544         return this.metaEntry.getEndKey();
3545       } else if (this.hdfsEntry != null) {
3546         return this.hdfsEntry.hri.getEndKey();
3547       } else {
3548         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3549         return null;
3550       }
3551     }
3552 
3553     public TableName getTableName() {
3554       if (this.metaEntry != null) {
3555         return this.metaEntry.getTable();
3556       } else if (this.hdfsEntry != null) {
3557         // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3558         // so we get the name from the Path
3559         Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3560         return FSUtils.getTableName(tableDir);
3561       } else {
3562         // return the info from the first online/deployed hri
3563         for (OnlineEntry e : deployedEntries) {
3564           return e.hri.getTable();
3565         }
3566         return null;
3567       }
3568     }
3569 
3570     public String getRegionNameAsString() {
3571       if (metaEntry != null) {
3572         return metaEntry.getRegionNameAsString();
3573       } else if (hdfsEntry != null) {
3574         if (hdfsEntry.hri != null) {
3575           return hdfsEntry.hri.getRegionNameAsString();
3576         }
3577       } else {
3578         // return the info from the first online/deployed hri
3579         for (OnlineEntry e : deployedEntries) {
3580           return e.hri.getRegionNameAsString();
3581         }
3582       }
3583       return null;
3584     }
3585 
3586     public byte[] getRegionName() {
3587       if (metaEntry != null) {
3588         return metaEntry.getRegionName();
3589       } else if (hdfsEntry != null) {
3590         return hdfsEntry.hri.getRegionName();
3591       } else {
3592         // return the info from the first online/deployed hri
3593         for (OnlineEntry e : deployedEntries) {
3594           return e.hri.getRegionName();
3595         }
3596         return null;
3597       }
3598     }
3599 
3600     public HRegionInfo getPrimaryHRIForDeployedReplica() {
3601       return primaryHRIForDeployedReplica;
3602     }
3603 
3604     Path getHdfsRegionDir() {
3605       if (hdfsEntry == null) {
3606         return null;
3607       }
3608       return hdfsEntry.hdfsRegionDir;
3609     }
3610 
3611     boolean containsOnlyHdfsEdits() {
3612       if (hdfsEntry == null) {
3613         return false;
3614       }
3615       return hdfsEntry.hdfsOnlyEdits;
3616     }
3617 
3618     boolean isHdfsRegioninfoPresent() {
3619       if (hdfsEntry == null) {
3620         return false;
3621       }
3622       return hdfsEntry.hdfsRegioninfoFilePresent;
3623     }
3624 
3625     long getModTime() {
3626       if (hdfsEntry == null) {
3627         return 0;
3628       }
3629       return hdfsEntry.hdfsRegionDirModTime;
3630     }
3631 
3632     HRegionInfo getHdfsHRI() {
3633       if (hdfsEntry == null) {
3634         return null;
3635       }
3636       return hdfsEntry.hri;
3637     }
3638 
3639     public void setSkipChecks(boolean skipChecks) {
3640       this.skipChecks = skipChecks;
3641     }
3642 
3643     public boolean isSkipChecks() {
3644       return skipChecks;
3645     }
3646 
3647     public void setMerged(boolean isMerged) {
3648       this.isMerged = isMerged;
3649     }
3650 
3651     public boolean isMerged() {
3652       return this.isMerged;
3653     }
3654   }
3655 
3656   final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3657     @Override
3658     public int compare(HbckInfo l, HbckInfo r) {
3659       if (l == r) {
3660         // same instance
3661         return 0;
3662       }
3663 
3664       int tableCompare = l.getTableName().compareTo(r.getTableName());
3665       if (tableCompare != 0) {
3666         return tableCompare;
3667       }
3668 
3669       int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3670           l.getStartKey(), r.getStartKey());
3671       if (startComparison != 0) {
3672         return startComparison;
3673       }
3674 
3675       // Special case for absolute endkey
3676       byte[] endKey = r.getEndKey();
3677       endKey = (endKey.length == 0) ? null : endKey;
3678       byte[] endKey2 = l.getEndKey();
3679       endKey2 = (endKey2.length == 0) ? null : endKey2;
3680       int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3681           endKey2,  endKey);
3682 
3683       if (endComparison != 0) {
3684         return endComparison;
3685       }
3686 
3687       // use regionId as tiebreaker.
3688       // Null is considered after all possible values so make it bigger.
3689       if (l.hdfsEntry == null && r.hdfsEntry == null) {
3690         return 0;
3691       }
3692       if (l.hdfsEntry == null && r.hdfsEntry != null) {
3693         return 1;
3694       }
3695       // l.hdfsEntry must not be null
3696       if (r.hdfsEntry == null) {
3697         return -1;
3698       }
3699       // both l.hdfsEntry and r.hdfsEntry must not be null.
3700       return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3701     }
3702   };
3703 
3704   /**
3705    * Prints summary of all tables found on the system.
3706    */
3707   private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
3708     StringBuilder sb = new StringBuilder();
3709     errors.print("Summary:");
3710     for (TableInfo tInfo : tablesInfo.values()) {
3711       if (errors.tableHasErrors(tInfo)) {
3712         errors.print("Table " + tInfo.getName() + " is inconsistent.");
3713       } else {
3714         errors.print("  " + tInfo.getName() + " is okay.");
3715       }
3716       errors.print("    Number of regions: " + tInfo.getNumRegions());
3717       sb.setLength(0); // clear out existing buffer, if any.
3718       sb.append("    Deployed on: ");
3719       for (ServerName server : tInfo.deployedOn) {
3720         sb.append(" " + server.toString());
3721       }
3722       errors.print(sb.toString());
3723     }
3724   }
3725 
3726   static ErrorReporter getErrorReporter(
3727       final Configuration conf) throws ClassNotFoundException {
3728     Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3729     return ReflectionUtils.newInstance(reporter, conf);
3730   }
3731 
3732   public interface ErrorReporter {
3733     enum ERROR_CODE {
3734       UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3735       NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META,
3736       NOT_DEPLOYED,
3737       MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3738       FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3739       HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3740       ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3741       WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR, ORPHAN_TABLE_STATE,
3742       NO_TABLE_STATE
3743     }
3744     void clear();
3745     void report(String message);
3746     void reportError(String message);
3747     void reportError(ERROR_CODE errorCode, String message);
3748     void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3749     void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3750     void reportError(
3751       ERROR_CODE errorCode,
3752       String message,
3753       TableInfo table,
3754       HbckInfo info1,
3755       HbckInfo info2
3756     );
3757     int summarize();
3758     void detail(String details);
3759     ArrayList<ERROR_CODE> getErrorList();
3760     void progress();
3761     void print(String message);
3762     void resetErrors();
3763     boolean tableHasErrors(TableInfo table);
3764   }
3765 
3766   static class PrintingErrorReporter implements ErrorReporter {
3767     public int errorCount = 0;
3768     private int showProgress;
3769     // How frequently calls to progress() will create output
3770     private static final int progressThreshold = 100;
3771 
3772     Set<TableInfo> errorTables = new HashSet<TableInfo>();
3773 
3774     // for use by unit tests to verify which errors were discovered
3775     private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3776 
3777     @Override
3778     public void clear() {
3779       errorTables.clear();
3780       errorList.clear();
3781       errorCount = 0;
3782     }
3783 
3784     @Override
3785     public synchronized void reportError(ERROR_CODE errorCode, String message) {
3786       if (errorCode == ERROR_CODE.WRONG_USAGE) {
3787         System.err.println(message);
3788         return;
3789       }
3790 
3791       errorList.add(errorCode);
3792       if (!summary) {
3793         System.out.println("ERROR: " + message);
3794       }
3795       errorCount++;
3796       showProgress = 0;
3797     }
3798 
3799     @Override
3800     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3801       errorTables.add(table);
3802       reportError(errorCode, message);
3803     }
3804 
3805     @Override
3806     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3807                                          HbckInfo info) {
3808       errorTables.add(table);
3809       String reference = "(region " + info.getRegionNameAsString() + ")";
3810       reportError(errorCode, reference + " " + message);
3811     }
3812 
3813     @Override
3814     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3815                                          HbckInfo info1, HbckInfo info2) {
3816       errorTables.add(table);
3817       String reference = "(regions " + info1.getRegionNameAsString()
3818           + " and " + info2.getRegionNameAsString() + ")";
3819       reportError(errorCode, reference + " " + message);
3820     }
3821 
3822     @Override
3823     public synchronized void reportError(String message) {
3824       reportError(ERROR_CODE.UNKNOWN, message);
3825     }
3826 
3827     /**
3828      * Report error information, but do not increment the error count.  Intended for cases
3829      * where the actual error would have been reported previously.
3830      * @param message
3831      */
3832     @Override
3833     public synchronized void report(String message) {
3834       if (! summary) {
3835         System.out.println("ERROR: " + message);
3836       }
3837       showProgress = 0;
3838     }
3839 
3840     @Override
3841     public synchronized int summarize() {
3842       System.out.println(Integer.toString(errorCount) +
3843                          " inconsistencies detected.");
3844       if (errorCount == 0) {
3845         System.out.println("Status: OK");
3846         return 0;
3847       } else {
3848         System.out.println("Status: INCONSISTENT");
3849         return -1;
3850       }
3851     }
3852 
3853     @Override
3854     public ArrayList<ERROR_CODE> getErrorList() {
3855       return errorList;
3856     }
3857 
3858     @Override
3859     public synchronized void print(String message) {
3860       if (!summary) {
3861         System.out.println(message);
3862       }
3863     }
3864 
3865     @Override
3866     public boolean tableHasErrors(TableInfo table) {
3867       return errorTables.contains(table);
3868     }
3869 
3870     @Override
3871     public void resetErrors() {
3872       errorCount = 0;
3873     }
3874 
3875     @Override
3876     public synchronized void detail(String message) {
3877       if (details) {
3878         System.out.println(message);
3879       }
3880       showProgress = 0;
3881     }
3882 
3883     @Override
3884     public synchronized void progress() {
3885       if (showProgress++ == progressThreshold) {
3886         if (!summary) {
3887           System.out.print(".");
3888         }
3889         showProgress = 0;
3890       }
3891     }
3892   }
3893 
3894   /**
3895    * Contact a region server and get all information from it
3896    */
3897   static class WorkItemRegion implements Callable<Void> {
3898     private HBaseFsck hbck;
3899     private ServerName rsinfo;
3900     private ErrorReporter errors;
3901     private HConnection connection;
3902 
3903     WorkItemRegion(HBaseFsck hbck, ServerName info,
3904                    ErrorReporter errors, HConnection connection) {
3905       this.hbck = hbck;
3906       this.rsinfo = info;
3907       this.errors = errors;
3908       this.connection = connection;
3909     }
3910 
3911     @Override
3912     public synchronized Void call() throws IOException {
3913       errors.progress();
3914       try {
3915         BlockingInterface server = connection.getAdmin(rsinfo);
3916 
3917         // list all online regions from this region server
3918         List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
3919         regions = filterRegions(regions);
3920 
3921         if (details) {
3922           errors.detail("RegionServer: " + rsinfo.getServerName() +
3923                            " number of regions: " + regions.size());
3924           for (HRegionInfo rinfo: regions) {
3925             errors.detail("  " + rinfo.getRegionNameAsString() +
3926                              " id: " + rinfo.getRegionId() +
3927                              " encoded_name: " + rinfo.getEncodedName() +
3928                              " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3929                              " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3930           }
3931         }
3932 
3933         // check to see if the existence of this region matches the region in META
3934         for (HRegionInfo r:regions) {
3935           HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3936           hbi.addServer(r, rsinfo);
3937         }
3938       } catch (IOException e) {          // unable to connect to the region server.
3939         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3940           " Unable to fetch region information. " + e);
3941         throw e;
3942       }
3943       return null;
3944     }
3945 
3946     private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
3947       List<HRegionInfo> ret = Lists.newArrayList();
3948       for (HRegionInfo hri : regions) {
3949         if (hri.isMetaTable() || (!hbck.checkMetaOnly
3950             && hbck.isTableIncluded(hri.getTable()))) {
3951           ret.add(hri);
3952         }
3953       }
3954       return ret;
3955     }
3956   }
3957 
3958   /**
3959    * Contact hdfs and get all information about specified table directory into
3960    * regioninfo list.
3961    */
3962   static class WorkItemHdfsDir implements Callable<Void> {
3963     private HBaseFsck hbck;
3964     private FileStatus tableDir;
3965     private ErrorReporter errors;
3966     private FileSystem fs;
3967 
3968     WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
3969                     FileStatus status) {
3970       this.hbck = hbck;
3971       this.fs = fs;
3972       this.tableDir = status;
3973       this.errors = errors;
3974     }
3975 
3976     @Override
3977     public synchronized Void call() throws IOException {
3978       try {
3979         // level 2: <HBASE_DIR>/<table>/*
3980         FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
3981         for (FileStatus regionDir : regionDirs) {
3982           errors.progress();
3983           String encodedName = regionDir.getPath().getName();
3984           // ignore directories that aren't hexadecimal
3985           if (!encodedName.toLowerCase().matches("[0-9a-f]+")) {
3986             continue;
3987           }
3988 
3989           LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
3990           HbckInfo hbi = hbck.getOrCreateInfo(encodedName);
3991           HdfsEntry he = new HdfsEntry();
3992           synchronized (hbi) {
3993             if (hbi.getHdfsRegionDir() != null) {
3994               errors.print("Directory " + encodedName + " duplicate??" +
3995                            hbi.getHdfsRegionDir());
3996             }
3997 
3998             he.hdfsRegionDir = regionDir.getPath();
3999             he.hdfsRegionDirModTime = regionDir.getModificationTime();
4000             Path regioninfoFile = new Path(he.hdfsRegionDir, HRegionFileSystem.REGION_INFO_FILE);
4001             he.hdfsRegioninfoFilePresent = fs.exists(regioninfoFile);
4002             // we add to orphan list when we attempt to read .regioninfo
4003 
4004             // Set a flag if this region contains only edits
4005             // This is special case if a region is left after split
4006             he.hdfsOnlyEdits = true;
4007             FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
4008             Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath());
4009             for (FileStatus subDir : subDirs) {
4010               errors.progress();
4011               String sdName = subDir.getPath().getName();
4012               if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
4013                 he.hdfsOnlyEdits = false;
4014                 break;
4015               }
4016             }
4017             hbi.hdfsEntry = he;
4018           }
4019         }
4020       } catch (IOException e) {
4021         // unable to connect to the region server.
4022         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
4023             + tableDir.getPath().getName()
4024             + " Unable to fetch region information. " + e);
4025         throw e;
4026       }
4027       return null;
4028     }
4029   }
4030 
4031   /**
4032    * Contact hdfs and get all information about specified table directory into
4033    * regioninfo list.
4034    */
4035   static class WorkItemHdfsRegionInfo implements Callable<Void> {
4036     private HbckInfo hbi;
4037     private HBaseFsck hbck;
4038     private ErrorReporter errors;
4039 
4040     WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
4041       this.hbi = hbi;
4042       this.hbck = hbck;
4043       this.errors = errors;
4044     }
4045 
4046     @Override
4047     public synchronized Void call() throws IOException {
4048       // only load entries that haven't been loaded yet.
4049       if (hbi.getHdfsHRI() == null) {
4050         try {
4051           errors.progress();
4052           hbck.loadHdfsRegioninfo(hbi);
4053         } catch (IOException ioe) {
4054           String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
4055               + hbi.getTableName() + " in hdfs dir "
4056               + hbi.getHdfsRegionDir()
4057               + "!  It may be an invalid format or version file.  Treating as "
4058               + "an orphaned regiondir.";
4059           errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
4060           try {
4061             hbck.debugLsr(hbi.getHdfsRegionDir());
4062           } catch (IOException ioe2) {
4063             LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
4064             throw ioe2;
4065           }
4066           hbck.orphanHdfsDirs.add(hbi);
4067           throw ioe;
4068         }
4069       }
4070       return null;
4071     }
4072   };
4073 
4074   /**
4075    * Display the full report from fsck. This displays all live and dead region
4076    * servers, and all known regions.
4077    */
4078   public static void setDisplayFullReport() {
4079     details = true;
4080   }
4081 
4082   /**
4083    * Set summary mode.
4084    * Print only summary of the tables and status (OK or INCONSISTENT)
4085    */
4086   static void setSummary() {
4087     summary = true;
4088   }
4089 
4090   /**
4091    * Set hbase:meta check mode.
4092    * Print only info about hbase:meta table deployment/state
4093    */
4094   void setCheckMetaOnly() {
4095     checkMetaOnly = true;
4096   }
4097 
4098   /**
4099    * Set region boundaries check mode.
4100    */
4101   void setRegionBoundariesCheck() {
4102     checkRegionBoundaries = true;
4103   }
4104 
4105   /**
4106    * Set table locks fix mode.
4107    * Delete table locks held for a long time
4108    */
4109   public void setFixTableLocks(boolean shouldFix) {
4110     fixTableLocks = shouldFix;
4111     fixAny |= shouldFix;
4112   }
4113 
4114   /**
4115    * Check if we should rerun fsck again. This checks if we've tried to
4116    * fix something and we should rerun fsck tool again.
4117    * Display the full report from fsck. This displays all live and dead
4118    * region servers, and all known regions.
4119    */
4120   void setShouldRerun() {
4121     rerun = true;
4122   }
4123 
4124   boolean shouldRerun() {
4125     return rerun;
4126   }
4127 
4128   /**
4129    * Fix inconsistencies found by fsck. This should try to fix errors (if any)
4130    * found by fsck utility.
4131    */
4132   public void setFixAssignments(boolean shouldFix) {
4133     fixAssignments = shouldFix;
4134     fixAny |= shouldFix;
4135   }
4136 
4137   boolean shouldFixAssignments() {
4138     return fixAssignments;
4139   }
4140 
4141   public void setFixMeta(boolean shouldFix) {
4142     fixMeta = shouldFix;
4143     fixAny |= shouldFix;
4144   }
4145 
4146   boolean shouldFixMeta() {
4147     return fixMeta;
4148   }
4149 
4150   public void setFixEmptyMetaCells(boolean shouldFix) {
4151     fixEmptyMetaCells = shouldFix;
4152     fixAny |= shouldFix;
4153   }
4154 
4155   boolean shouldFixEmptyMetaCells() {
4156     return fixEmptyMetaCells;
4157   }
4158 
4159   public void setCheckHdfs(boolean checking) {
4160     checkHdfs = checking;
4161   }
4162 
4163   boolean shouldCheckHdfs() {
4164     return checkHdfs;
4165   }
4166 
4167   public void setFixHdfsHoles(boolean shouldFix) {
4168     fixHdfsHoles = shouldFix;
4169     fixAny |= shouldFix;
4170   }
4171 
4172   boolean shouldFixHdfsHoles() {
4173     return fixHdfsHoles;
4174   }
4175 
4176   public void setFixTableOrphans(boolean shouldFix) {
4177     fixTableOrphans = shouldFix;
4178     fixAny |= shouldFix;
4179   }
4180 
4181   boolean shouldFixTableOrphans() {
4182     return fixTableOrphans;
4183   }
4184 
4185   public void setFixHdfsOverlaps(boolean shouldFix) {
4186     fixHdfsOverlaps = shouldFix;
4187     fixAny |= shouldFix;
4188   }
4189 
4190   boolean shouldFixHdfsOverlaps() {
4191     return fixHdfsOverlaps;
4192   }
4193 
4194   public void setFixHdfsOrphans(boolean shouldFix) {
4195     fixHdfsOrphans = shouldFix;
4196     fixAny |= shouldFix;
4197   }
4198 
4199   boolean shouldFixHdfsOrphans() {
4200     return fixHdfsOrphans;
4201   }
4202 
4203   public void setFixVersionFile(boolean shouldFix) {
4204     fixVersionFile = shouldFix;
4205     fixAny |= shouldFix;
4206   }
4207 
4208   public boolean shouldFixVersionFile() {
4209     return fixVersionFile;
4210   }
4211 
4212   public void setSidelineBigOverlaps(boolean sbo) {
4213     this.sidelineBigOverlaps = sbo;
4214   }
4215 
4216   public boolean shouldSidelineBigOverlaps() {
4217     return sidelineBigOverlaps;
4218   }
4219 
4220   public void setFixSplitParents(boolean shouldFix) {
4221     fixSplitParents = shouldFix;
4222     fixAny |= shouldFix;
4223   }
4224 
4225   boolean shouldFixSplitParents() {
4226     return fixSplitParents;
4227   }
4228 
4229   public void setFixReferenceFiles(boolean shouldFix) {
4230     fixReferenceFiles = shouldFix;
4231     fixAny |= shouldFix;
4232   }
4233 
4234   boolean shouldFixReferenceFiles() {
4235     return fixReferenceFiles;
4236   }
4237 
4238   public boolean shouldIgnorePreCheckPermission() {
4239     return !fixAny || ignorePreCheckPermission;
4240   }
4241 
4242   public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
4243     this.ignorePreCheckPermission = ignorePreCheckPermission;
4244   }
4245 
4246   /**
4247    * @param mm maximum number of regions to merge into a single region.
4248    */
4249   public void setMaxMerge(int mm) {
4250     this.maxMerge = mm;
4251   }
4252 
4253   public int getMaxMerge() {
4254     return maxMerge;
4255   }
4256 
4257   public void setMaxOverlapsToSideline(int mo) {
4258     this.maxOverlapsToSideline = mo;
4259   }
4260 
4261   public int getMaxOverlapsToSideline() {
4262     return maxOverlapsToSideline;
4263   }
4264 
4265   /**
4266    * Only check/fix tables specified by the list,
4267    * Empty list means all tables are included.
4268    */
4269   boolean isTableIncluded(TableName table) {
4270     return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
4271   }
4272 
4273   public void includeTable(TableName table) {
4274     tablesIncluded.add(table);
4275   }
4276 
4277   Set<TableName> getIncludedTables() {
4278     return new HashSet<TableName>(tablesIncluded);
4279   }
4280 
4281   /**
4282    * We are interested in only those tables that have not changed their state in
4283    * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
4284    * @param seconds - the time in seconds
4285    */
4286   public void setTimeLag(long seconds) {
4287     timelag = seconds * 1000; // convert to milliseconds
4288   }
4289 
4290   /**
4291    *
4292    * @param sidelineDir - HDFS path to sideline data
4293    */
4294   public void setSidelineDir(String sidelineDir) {
4295     this.sidelineDir = new Path(sidelineDir);
4296   }
4297 
4298   protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4299     return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4300   }
4301 
4302   public HFileCorruptionChecker getHFilecorruptionChecker() {
4303     return hfcc;
4304   }
4305 
4306   public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4307     this.hfcc = hfcc;
4308   }
4309 
4310   public void setRetCode(int code) {
4311     this.retcode = code;
4312   }
4313 
4314   public int getRetCode() {
4315     return retcode;
4316   }
4317 
4318   protected HBaseFsck printUsageAndExit() {
4319     StringWriter sw = new StringWriter(2048);
4320     PrintWriter out = new PrintWriter(sw);
4321     out.println("Usage: fsck [opts] {only tables}");
4322     out.println(" where [opts] are:");
4323     out.println("   -help Display help options (this)");
4324     out.println("   -details Display full report of all regions.");
4325     out.println("   -timelag <timeInSeconds>  Process only regions that " +
4326                        " have not experienced any metadata updates in the last " +
4327                        " <timeInSeconds> seconds.");
4328     out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4329         " before checking if the fix worked if run with -fix");
4330     out.println("   -summary Print only summary of the tables and status.");
4331     out.println("   -metaonly Only check the state of the hbase:meta table.");
4332     out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4333     out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
4334 
4335     out.println("");
4336     out.println("  Metadata Repair options: (expert features, use with caution!)");
4337     out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
4338     out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
4339     out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
4340     out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
4341         + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4342     out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
4343     out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
4344     out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4345     out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
4346     out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
4347     out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4348     out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
4349     out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4350     out.println("   -fixSplitParents  Try to force offline split parents to be online.");
4351     out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
4352     out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
4353     out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
4354         + " (empty REGIONINFO_QUALIFIER rows)");
4355 
4356     out.println("");
4357     out.println("  Datafile Repair options: (expert features, use with caution!)");
4358     out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
4359     out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
4360 
4361     out.println("");
4362     out.println("  Metadata Repair shortcuts");
4363     out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4364         "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles -fixTableLocks");
4365     out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4366 
4367     out.println("");
4368     out.println("  Table lock options");
4369     out.println("   -fixTableLocks    Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
4370 
4371     out.flush();
4372     errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4373 
4374     setRetCode(-2);
4375     return this;
4376   }
4377 
4378   /**
4379    * Main program
4380    *
4381    * @param args
4382    * @throws Exception
4383    */
4384   public static void main(String[] args) throws Exception {
4385     // create a fsck object
4386     Configuration conf = HBaseConfiguration.create();
4387     Path hbasedir = FSUtils.getRootDir(conf);
4388     URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4389     FSUtils.setFsDefault(conf, new Path(defaultFs));
4390     int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4391     System.exit(ret);
4392   }
4393 
4394   /**
4395    * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
4396    */
4397   static class HBaseFsckTool extends Configured implements Tool {
4398     HBaseFsckTool(Configuration conf) { super(conf); }
4399     @Override
4400     public int run(String[] args) throws Exception {
4401       HBaseFsck hbck = new HBaseFsck(getConf());
4402       hbck.exec(hbck.executor, args);
4403       hbck.close();
4404       return hbck.getRetCode();
4405     }
4406   };
4407 
4408 
4409   public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
4410     ServiceException, InterruptedException {
4411     long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4412 
4413     boolean checkCorruptHFiles = false;
4414     boolean sidelineCorruptHFiles = false;
4415 
4416     // Process command-line args.
4417     for (int i = 0; i < args.length; i++) {
4418       String cmd = args[i];
4419       if (cmd.equals("-help") || cmd.equals("-h")) {
4420         return printUsageAndExit();
4421       } else if (cmd.equals("-details")) {
4422         setDisplayFullReport();
4423       } else if (cmd.equals("-timelag")) {
4424         if (i == args.length - 1) {
4425           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4426           return printUsageAndExit();
4427         }
4428         try {
4429           long timelag = Long.parseLong(args[i+1]);
4430           setTimeLag(timelag);
4431         } catch (NumberFormatException e) {
4432           errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4433           return printUsageAndExit();
4434         }
4435         i++;
4436       } else if (cmd.equals("-sleepBeforeRerun")) {
4437         if (i == args.length - 1) {
4438           errors.reportError(ERROR_CODE.WRONG_USAGE,
4439             "HBaseFsck: -sleepBeforeRerun needs a value.");
4440           return printUsageAndExit();
4441         }
4442         try {
4443           sleepBeforeRerun = Long.parseLong(args[i+1]);
4444         } catch (NumberFormatException e) {
4445           errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4446           return printUsageAndExit();
4447         }
4448         i++;
4449       } else if (cmd.equals("-sidelineDir")) {
4450         if (i == args.length - 1) {
4451           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4452           return printUsageAndExit();
4453         }
4454         i++;
4455         setSidelineDir(args[i]);
4456       } else if (cmd.equals("-fix")) {
4457         errors.reportError(ERROR_CODE.WRONG_USAGE,
4458           "This option is deprecated, please use  -fixAssignments instead.");
4459         setFixAssignments(true);
4460       } else if (cmd.equals("-fixAssignments")) {
4461         setFixAssignments(true);
4462       } else if (cmd.equals("-fixMeta")) {
4463         setFixMeta(true);
4464       } else if (cmd.equals("-noHdfsChecking")) {
4465         setCheckHdfs(false);
4466       } else if (cmd.equals("-fixHdfsHoles")) {
4467         setFixHdfsHoles(true);
4468       } else if (cmd.equals("-fixHdfsOrphans")) {
4469         setFixHdfsOrphans(true);
4470       } else if (cmd.equals("-fixTableOrphans")) {
4471         setFixTableOrphans(true);
4472       } else if (cmd.equals("-fixHdfsOverlaps")) {
4473         setFixHdfsOverlaps(true);
4474       } else if (cmd.equals("-fixVersionFile")) {
4475         setFixVersionFile(true);
4476       } else if (cmd.equals("-sidelineBigOverlaps")) {
4477         setSidelineBigOverlaps(true);
4478       } else if (cmd.equals("-fixSplitParents")) {
4479         setFixSplitParents(true);
4480       } else if (cmd.equals("-ignorePreCheckPermission")) {
4481         setIgnorePreCheckPermission(true);
4482       } else if (cmd.equals("-checkCorruptHFiles")) {
4483         checkCorruptHFiles = true;
4484       } else if (cmd.equals("-sidelineCorruptHFiles")) {
4485         sidelineCorruptHFiles = true;
4486       } else if (cmd.equals("-fixReferenceFiles")) {
4487         setFixReferenceFiles(true);
4488       } else if (cmd.equals("-fixEmptyMetaCells")) {
4489         setFixEmptyMetaCells(true);
4490       } else if (cmd.equals("-repair")) {
4491         // this attempts to merge overlapping hdfs regions, needs testing
4492         // under load
4493         setFixHdfsHoles(true);
4494         setFixHdfsOrphans(true);
4495         setFixMeta(true);
4496         setFixAssignments(true);
4497         setFixHdfsOverlaps(true);
4498         setFixVersionFile(true);
4499         setSidelineBigOverlaps(true);
4500         setFixSplitParents(false);
4501         setCheckHdfs(true);
4502         setFixReferenceFiles(true);
4503         setFixTableLocks(true);
4504       } else if (cmd.equals("-repairHoles")) {
4505         // this will make all missing hdfs regions available but may lose data
4506         setFixHdfsHoles(true);
4507         setFixHdfsOrphans(false);
4508         setFixMeta(true);
4509         setFixAssignments(true);
4510         setFixHdfsOverlaps(false);
4511         setSidelineBigOverlaps(false);
4512         setFixSplitParents(false);
4513         setCheckHdfs(true);
4514       } else if (cmd.equals("-maxOverlapsToSideline")) {
4515         if (i == args.length - 1) {
4516           errors.reportError(ERROR_CODE.WRONG_USAGE,
4517             "-maxOverlapsToSideline needs a numeric value argument.");
4518           return printUsageAndExit();
4519         }
4520         try {
4521           int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
4522           setMaxOverlapsToSideline(maxOverlapsToSideline);
4523         } catch (NumberFormatException e) {
4524           errors.reportError(ERROR_CODE.WRONG_USAGE,
4525             "-maxOverlapsToSideline needs a numeric value argument.");
4526           return printUsageAndExit();
4527         }
4528         i++;
4529       } else if (cmd.equals("-maxMerge")) {
4530         if (i == args.length - 1) {
4531           errors.reportError(ERROR_CODE.WRONG_USAGE,
4532             "-maxMerge needs a numeric value argument.");
4533           return printUsageAndExit();
4534         }
4535         try {
4536           int maxMerge = Integer.parseInt(args[i+1]);
4537           setMaxMerge(maxMerge);
4538         } catch (NumberFormatException e) {
4539           errors.reportError(ERROR_CODE.WRONG_USAGE,
4540             "-maxMerge needs a numeric value argument.");
4541           return printUsageAndExit();
4542         }
4543         i++;
4544       } else if (cmd.equals("-summary")) {
4545         setSummary();
4546       } else if (cmd.equals("-metaonly")) {
4547         setCheckMetaOnly();
4548       } else if (cmd.equals("-boundaries")) {
4549         setRegionBoundariesCheck();
4550       } else if (cmd.equals("-fixTableLocks")) {
4551         setFixTableLocks(true);
4552       } else if (cmd.startsWith("-")) {
4553         errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
4554         return printUsageAndExit();
4555       } else {
4556         includeTable(TableName.valueOf(cmd));
4557         errors.print("Allow checking/fixes for table: " + cmd);
4558       }
4559     }
4560 
4561     errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
4562 
4563     // pre-check current user has FS write permission or not
4564     try {
4565       preCheckPermission();
4566     } catch (AccessDeniedException ace) {
4567       Runtime.getRuntime().exit(-1);
4568     } catch (IOException ioe) {
4569       Runtime.getRuntime().exit(-1);
4570     }
4571 
4572     // do the real work of hbck
4573     connect();
4574 
4575     try {
4576       // if corrupt file mode is on, first fix them since they may be opened later
4577       if (checkCorruptHFiles || sidelineCorruptHFiles) {
4578         LOG.info("Checking all hfiles for corruption");
4579         HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
4580         setHFileCorruptionChecker(hfcc); // so we can get result
4581         Collection<TableName> tables = getIncludedTables();
4582         Collection<Path> tableDirs = new ArrayList<Path>();
4583         Path rootdir = FSUtils.getRootDir(getConf());
4584         if (tables.size() > 0) {
4585           for (TableName t : tables) {
4586             tableDirs.add(FSUtils.getTableDir(rootdir, t));
4587           }
4588         } else {
4589           tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
4590         }
4591         hfcc.checkTables(tableDirs);
4592         hfcc.report(errors);
4593       }
4594 
4595       // check and fix table integrity, region consistency.
4596       int code = onlineHbck();
4597       setRetCode(code);
4598       // If we have changed the HBase state it is better to run hbck again
4599       // to see if we haven't broken something else in the process.
4600       // We run it only once more because otherwise we can easily fall into
4601       // an infinite loop.
4602       if (shouldRerun()) {
4603         try {
4604           LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
4605           Thread.sleep(sleepBeforeRerun);
4606         } catch (InterruptedException ie) {
4607           LOG.warn("Interrupted while sleeping");
4608           return this;
4609         }
4610         // Just report
4611         setFixAssignments(false);
4612         setFixMeta(false);
4613         setFixHdfsHoles(false);
4614         setFixHdfsOverlaps(false);
4615         setFixVersionFile(false);
4616         setFixTableOrphans(false);
4617         errors.resetErrors();
4618         code = onlineHbck();
4619         setRetCode(code);
4620       }
4621     } finally {
4622       IOUtils.cleanup(null, this);
4623     }
4624     return this;
4625   }
4626 
4627   /**
4628    * ls -r for debugging purposes
4629    */
4630   void debugLsr(Path p) throws IOException {
4631     debugLsr(getConf(), p, errors);
4632   }
4633 
4634   /**
4635    * ls -r for debugging purposes
4636    */
4637   public static void debugLsr(Configuration conf,
4638       Path p) throws IOException {
4639     debugLsr(conf, p, new PrintingErrorReporter());
4640   }
4641 
4642   /**
4643    * ls -r for debugging purposes
4644    */
4645   public static void debugLsr(Configuration conf,
4646       Path p, ErrorReporter errors) throws IOException {
4647     if (!LOG.isDebugEnabled() || p == null) {
4648       return;
4649     }
4650     FileSystem fs = p.getFileSystem(conf);
4651 
4652     if (!fs.exists(p)) {
4653       // nothing
4654       return;
4655     }
4656     errors.print(p.toString());
4657 
4658     if (fs.isFile(p)) {
4659       return;
4660     }
4661 
4662     if (fs.getFileStatus(p).isDirectory()) {
4663       FileStatus[] fss= fs.listStatus(p);
4664       for (FileStatus status : fss) {
4665         debugLsr(conf, status.getPath(), errors);
4666       }
4667     }
4668   }
4669 }