View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.io.PrintWriter;
23  import java.io.StringWriter;
24  import java.net.InetAddress;
25  import java.net.URI;
26  import java.util.ArrayList;
27  import java.util.Collection;
28  import java.util.Collections;
29  import java.util.Comparator;
30  import java.util.HashMap;
31  import java.util.HashSet;
32  import java.util.Iterator;
33  import java.util.List;
34  import java.util.Map;
35  import java.util.Map.Entry;
36  import java.util.Set;
37  import java.util.SortedMap;
38  import java.util.SortedSet;
39  import java.util.TreeMap;
40  import java.util.TreeSet;
41  import java.util.concurrent.Callable;
42  import java.util.concurrent.ConcurrentSkipListMap;
43  import java.util.concurrent.ExecutionException;
44  import java.util.concurrent.ExecutorService;
45  import java.util.concurrent.Future;
46  import java.util.concurrent.ScheduledThreadPoolExecutor;
47  import java.util.concurrent.atomic.AtomicBoolean;
48  import java.util.concurrent.atomic.AtomicInteger;
49  
50  import org.apache.commons.logging.Log;
51  import org.apache.commons.logging.LogFactory;
52  import org.apache.hadoop.conf.Configuration;
53  import org.apache.hadoop.conf.Configured;
54  import org.apache.hadoop.fs.FSDataInputStream;
55  import org.apache.hadoop.fs.FSDataOutputStream;
56  import org.apache.hadoop.fs.FileStatus;
57  import org.apache.hadoop.fs.FileSystem;
58  import org.apache.hadoop.fs.Path;
59  import org.apache.hadoop.fs.permission.FsAction;
60  import org.apache.hadoop.fs.permission.FsPermission;
61  import org.apache.hadoop.hbase.Abortable;
62  import org.apache.hadoop.hbase.ClusterStatus;
63  import org.apache.hadoop.hbase.HBaseConfiguration;
64  import org.apache.hadoop.hbase.HColumnDescriptor;
65  import org.apache.hadoop.hbase.HConstants;
66  import org.apache.hadoop.hbase.HRegionInfo;
67  import org.apache.hadoop.hbase.HRegionLocation;
68  import org.apache.hadoop.hbase.HTableDescriptor;
69  import org.apache.hadoop.hbase.KeyValue;
70  import org.apache.hadoop.hbase.MasterNotRunningException;
71  import org.apache.hadoop.hbase.ServerName;
72  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
73  import org.apache.hadoop.hbase.catalog.MetaReader;
74  import org.apache.hadoop.hbase.client.Delete;
75  import org.apache.hadoop.hbase.client.Get;
76  import org.apache.hadoop.hbase.client.HBaseAdmin;
77  import org.apache.hadoop.hbase.client.HConnection;
78  import org.apache.hadoop.hbase.client.HConnectionManager;
79  import org.apache.hadoop.hbase.client.HConnectionManager.HConnectable;
80  import org.apache.hadoop.hbase.client.HTable;
81  import org.apache.hadoop.hbase.client.MetaScanner;
82  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
83  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
84  import org.apache.hadoop.hbase.client.Put;
85  import org.apache.hadoop.hbase.client.Result;
86  import org.apache.hadoop.hbase.client.RowMutations;
87  import org.apache.hadoop.hbase.client.UserProvider;
88  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
89  import org.apache.hadoop.hbase.io.hfile.HFile;
90  import org.apache.hadoop.hbase.ipc.HRegionInterface;
91  import org.apache.hadoop.hbase.master.MasterFileSystem;
92  import org.apache.hadoop.hbase.regionserver.HRegion;
93  import org.apache.hadoop.hbase.regionserver.StoreFile;
94  import org.apache.hadoop.hbase.regionserver.wal.HLog;
95  import org.apache.hadoop.hbase.security.User;
96  import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
97  import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
98  import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
99  import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
100 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
101 import org.apache.hadoop.hbase.zookeeper.RootRegionTracker;
102 import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
103 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
104 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
105 import org.apache.hadoop.io.IOUtils;
106 import org.apache.hadoop.ipc.RemoteException;
107 import org.apache.hadoop.security.AccessControlException;
108 import org.apache.hadoop.util.ReflectionUtils;
109 import org.apache.hadoop.util.Tool;
110 import org.apache.hadoop.util.ToolRunner;
111 import org.apache.zookeeper.KeeperException;
112 
113 import com.google.common.base.Joiner;
114 import com.google.common.base.Preconditions;
115 import com.google.common.collect.Lists;
116 import com.google.common.collect.Multimap;
117 import com.google.common.collect.TreeMultimap;
118 
119 /**
120  * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
121  * table integrity problems in a corrupted HBase.
122  * <p>
123  * Region consistency checks verify that .META., region deployment on region
124  * servers and the state of data in HDFS (.regioninfo files) all are in
125  * accordance.
126  * <p>
127  * Table integrity checks verify that all possible row keys resolve to exactly
128  * one region of a table.  This means there are no individual degenerate
129  * or backwards regions; no holes between regions; and that there are no
130  * overlapping regions.
131  * <p>
132  * The general repair strategy works in two phases:
133  * <ol>
134  * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
135  * <li> Repair Region Consistency with .META. and assignments
136  * </ol>
137  * <p>
138  * For table integrity repairs, the tables' region directories are scanned
139  * for .regioninfo files.  Each table's integrity is then verified.  If there
140  * are any orphan regions (regions with no .regioninfo files) or holes, new
141  * regions are fabricated.  Backwards regions are sidelined as well as empty
142  * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
143  * a new region is created and all data is merged into the new region.
144  * <p>
145  * Table integrity repairs deal solely with HDFS and could potentially be done
146  * offline -- the hbase region servers or master do not need to be running.
147  * This phase can eventually be used to completely reconstruct the META table in
148  * an offline fashion.
149  * <p>
150  * Region consistency requires three conditions -- 1) valid .regioninfo file
151  * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
152  * and 3) a region is deployed only at the regionserver that was assigned to
153  * with proper state in the master.
154  * <p>
155  * Region consistency repairs require hbase to be online so that hbck can
156  * contact the HBase master and region servers.  The hbck#connect() method must
157  * first be called successfully.  Much of the region consistency information
158  * is transient and less risky to repair.
159  * <p>
160  * If hbck is run from the command line, there are a handful of arguments that
161  * can be used to limit the kinds of repairs hbck will do.  See the code in
162  * {@link #printUsageAndExit()} for more details.
163  */
164 public class HBaseFsck extends Configured implements Tool {
165   public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
166   public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
167   private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
168   private static boolean rsSupportsOffline = true;
169   private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
170   private static final int DEFAULT_MAX_MERGE = 5;
171   private static final String TO_BE_LOADED = "to_be_loaded";
172   private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
173 
174 
175   /**********************
176    * Internal resources
177    **********************/
178   private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
179   private ClusterStatus status;
180   private HConnection connection;
181   private HBaseAdmin admin;
182   private HTable meta;
183   // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
184   protected ExecutorService executor;
185   private long startMillis = System.currentTimeMillis();
186   private HFileCorruptionChecker hfcc;
187   private int retcode = 0;
188   private Path HBCK_LOCK_PATH;
189   private FSDataOutputStream hbckOutFd;
190   // This lock is to prevent cleanup of balancer resources twice between
191   // ShutdownHook and the main code. We cleanup only if the connect() is
192   // successful
193   private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
194 
195   /***********
196    * Options
197    ***********/
198   private static boolean details = false; // do we display the full report
199   private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
200   private boolean fixAssignments = false; // fix assignment errors?
201   private boolean fixMeta = false; // fix meta errors?
202   private boolean checkHdfs = true; // load and check fs consistency?
203   private boolean fixHdfsHoles = false; // fix fs holes?
204   private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
205   private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
206   private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
207   private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
208   private boolean fixSplitParents = false; // fix lingering split parents
209   private boolean fixReferenceFiles = false; // fix lingering reference store file
210 
211   // limit checking/fixes to listed tables, if empty attempt to check/fix all
212   // -ROOT- and .META. are always checked
213   private Set<String> tablesIncluded = new HashSet<String>();
214   private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
215   private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline
216   private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
217   private Path sidelineDir = null;
218 
219   private boolean rerun = false; // if we tried to fix something, rerun hbck
220   private static boolean summary = false; // if we want to print less output
221   private boolean checkMetaOnly = false;
222   private boolean checkRegionBoundaries = false;
223   private boolean ignorePreCheckPermission = false; // if pre-check permission
224 
225   /*********
226    * State
227    *********/
228   final private ErrorReporter errors;
229   int fixes = 0;
230 
231   /**
232    * This map contains the state of all hbck items.  It maps from encoded region
233    * name to HbckInfo structure.  The information contained in HbckInfo is used
234    * to detect and correct consistency (hdfs/meta/deployment) problems.
235    */
236   private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
237   private TreeSet<byte[]> disabledTables =
238     new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
239   // Empty regioninfo qualifiers in .META.
240   private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
241 
242   /**
243    * This map from Tablename -> TableInfo contains the structures necessary to
244    * detect table consistency problems (holes, dupes, overlaps).  It is sorted
245    * to prevent dupes.
246    *
247    * If tablesIncluded is empty, this map contains all tables.
248    * Otherwise, it contains only meta tables and tables in tablesIncluded,
249    * unless checkMetaOnly is specified, in which case, it contains only
250    * the meta tables (.META. and -ROOT-).
251    */
252   private SortedMap<String, TableInfo> tablesInfo = new ConcurrentSkipListMap<String,TableInfo>();
253 
254   /**
255    * When initially looking at HDFS, we attempt to find any orphaned data.
256    */
257   private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
258 
259   private Map<String, Set<String>> orphanTableDirs = new HashMap<String, Set<String>>();
260 
261   /**
262    * Constructor
263    *
264    * @param conf Configuration object
265    * @throws MasterNotRunningException if the master is not running
266    * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
267    */
268   public HBaseFsck(Configuration conf) throws MasterNotRunningException,
269       ZooKeeperConnectionException, IOException, ClassNotFoundException {
270     super(conf);
271     errors = getErrorReporter(conf);
272 
273     initialPoolNumThreads();
274   }
275 
276   /**
277    * Constructor
278    *
279    * @param conf
280    *          Configuration object
281    * @throws MasterNotRunningException
282    *           if the master is not running
283    * @throws ZooKeeperConnectionException
284    *           if unable to connect to ZooKeeper
285    */
286   public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
287       ZooKeeperConnectionException, IOException, ClassNotFoundException {
288     super(conf);
289     errors = getErrorReporter(getConf());
290     this.executor = exec;
291   }
292 
293   /**
294    * This method maintains a lock using a file. If the creation fails we return null
295    *
296    * @return FSDataOutputStream object corresponding to the newly opened lock file
297    * @throws IOException
298    */
299   private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
300     try {
301       FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
302       FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
303           HConstants.DATA_FILE_UMASK_KEY);
304       Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
305       fs.mkdirs(tmpDir);
306       HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
307       final FSDataOutputStream out = FSUtils.create(fs, HBCK_LOCK_PATH, defaultPerms, false);
308       out.writeBytes(InetAddress.getLocalHost().toString());
309       out.flush();
310       return out;
311     } catch (IOException exception) {
312       RemoteException e = null;
313       if (exception instanceof RemoteException) {
314         e = (RemoteException)exception;
315       } else if (exception.getCause() instanceof RemoteException) {
316         e = (RemoteException)(exception.getCause());
317       }
318       if(null != e && AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
319         return null;
320       }
321       throw exception;
322     }
323   }
324 
325   private void unlockHbck() {
326     if(hbckLockCleanup.compareAndSet(true, false)){
327       IOUtils.closeStream(hbckOutFd);
328       try{
329         FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
330       } catch(IOException ioe) {
331         LOG.warn("Failed to delete " + HBCK_LOCK_PATH);
332         LOG.debug(ioe);
333       }
334     }
335   }
336 
337   /**
338    * To repair region consistency, one must call connect() in order to repair
339    * online state.
340    */
341   public void connect() throws IOException {
342 
343     // Check if another instance of balancer is running
344     hbckOutFd = checkAndMarkRunningHbck();
345     if (hbckOutFd == null) {
346       setRetCode(-1);
347       LOG.error("Another instance of hbck is running, exiting this instance.[If you are sure" +
348 		      " no other instance is running, delete the lock file " +
349 		      HBCK_LOCK_PATH + " and rerun the tool]");
350       throw new IOException("Duplicate hbck - Abort");
351     }
352 
353     // Make sure to cleanup the lock
354     hbckLockCleanup.set(true);
355 
356     // Add a shutdown hook to this thread, incase user tries to
357     // kill the hbck with a ctrl-c, we want to cleanup the lock so that
358     // it is available for further calls
359     Runtime.getRuntime().addShutdownHook(new Thread() {
360       public void run() {
361           unlockHbck();
362       }
363     });
364     LOG.debug("Launching hbck");
365 
366     admin = new HBaseAdmin(getConf());
367     meta = new HTable(getConf(), HConstants.META_TABLE_NAME);
368     status = admin.getMaster().getClusterStatus();
369     connection = admin.getConnection();
370   }
371 
372   /**
373    * Initial numThreads for {@link #executor}
374    */
375   private void initialPoolNumThreads() {
376     if (executor != null) {
377       executor.shutdown();
378     }
379 
380     int numThreads = getConf().getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
381     executor = new ScheduledThreadPoolExecutor(numThreads);
382   }
383 
384   /**
385    * Get deployed regions according to the region servers.
386    */
387   private void loadDeployedRegions() throws IOException, InterruptedException {
388     // From the master, get a list of all known live region servers
389     Collection<ServerName> regionServers = status.getServers();
390     errors.print("Number of live region servers: " + regionServers.size());
391     if (details) {
392       for (ServerName rsinfo: regionServers) {
393         errors.print("  " + rsinfo.getServerName());
394       }
395     }
396 
397     // From the master, get a list of all dead region servers
398     Collection<ServerName> deadRegionServers = status.getDeadServerNames();
399     errors.print("Number of dead region servers: " + deadRegionServers.size());
400     if (details) {
401       for (ServerName name: deadRegionServers) {
402         errors.print("  " + name);
403       }
404     }
405 
406     // Print the current master name and state
407     errors.print("Master: " + status.getMaster());
408 
409     // Print the list of all backup masters
410     Collection<ServerName> backupMasters = status.getBackupMasters();
411     errors.print("Number of backup masters: " + backupMasters.size());
412     if (details) {
413       for (ServerName name: backupMasters) {
414         errors.print("  " + name);
415       }
416     }
417 
418     // Determine what's deployed
419     processRegionServers(regionServers);
420   }
421 
422   /**
423    * Clear the current state of hbck.
424    */
425   private void clearState() {
426     // Make sure regionInfo is empty before starting
427     fixes = 0;
428     regionInfoMap.clear();
429     emptyRegionInfoQualifiers.clear();
430     disabledTables.clear();
431     errors.clear();
432     tablesInfo.clear();
433     orphanHdfsDirs.clear();
434   }
435 
436   /**
437    * This repair method analyzes hbase data in hdfs and repairs it to satisfy
438    * the table integrity rules.  HBase doesn't need to be online for this
439    * operation to work.
440    */
441   public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
442     // Initial pass to fix orphans.
443     if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
444         || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
445       LOG.info("Loading regioninfos HDFS");
446       // if nothing is happening this should always complete in two iterations.
447       int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
448       int curIter = 0;
449       do {
450         clearState(); // clears hbck state and reset fixes to 0 and.
451         // repair what's on HDFS
452         restoreHdfsIntegrity();
453         curIter++;// limit the number of iterations.
454       } while (fixes > 0 && curIter <= maxIterations);
455 
456       // Repairs should be done in the first iteration and verification in the second.
457       // If there are more than 2 passes, something funny has happened.
458       if (curIter > 2) {
459         if (curIter == maxIterations) {
460           LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
461               + "Tables integrity may not be fully repaired!");
462         } else {
463           LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
464         }
465       }
466     }
467   }
468 
469   /**
470    * This repair method requires the cluster to be online since it contacts
471    * region servers and the masters.  It makes each region's state in HDFS, in
472    * .META., and deployments consistent.
473    *
474    * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable
475    * error.  If 0, we have a clean hbase.
476    */
477   public int onlineConsistencyRepair() throws IOException, KeeperException,
478     InterruptedException {
479     clearState();
480 
481     LOG.info("Loading regionsinfo from the .META. table");
482     boolean success = loadMetaEntries();
483     if (!success) return -1;
484 
485     // Check if .META. is found only once and in the right place
486     if (!checkMetaRegion()) {
487       // Will remove later if we can fix it
488       errors.reportError("Encountered fatal error. Exiting...");
489       return -2;
490     }
491 
492     // get a list of all tables that have not changed recently.
493     if (!checkMetaOnly) {
494       reportTablesInFlux();
495     }
496 
497     // get regions according to what is online on each RegionServer
498     loadDeployedRegions();
499 
500     // load regiondirs and regioninfos from HDFS
501     if (shouldCheckHdfs()) {
502       loadHdfsRegionDirs();
503       loadHdfsRegionInfos();
504     }
505 
506     // Empty cells in .META.?
507     reportEmptyMetaCells();
508 
509     // Get disabled tables from ZooKeeper
510     loadDisabledTables();
511 
512     // fix the orphan tables
513     fixOrphanTables();
514 
515     // Check and fix consistency
516     checkAndFixConsistency();
517 
518     // Check integrity (does not fix)
519     checkIntegrity();
520     return errors.getErrorList().size();
521   }
522 
523   /**
524    * Contacts the master and prints out cluster-wide information
525    * @return 0 on success, non-zero on failure
526    */
527   public int onlineHbck() throws IOException, KeeperException, InterruptedException {
528     // print hbase server version
529     errors.print("Version: " + status.getHBaseVersion());
530     offlineHdfsIntegrityRepair();
531 
532     // turn the balancer off
533     boolean oldBalancer = admin.setBalancerRunning(false, true);
534     try {
535       onlineConsistencyRepair();
536     }
537     finally {
538       admin.setBalancerRunning(oldBalancer, false);
539     }
540 
541     if (checkRegionBoundaries) {
542       checkRegionBoundaries();
543     }
544 
545     offlineReferenceFileRepair();
546 
547     // Remove the hbck lock
548     unlockHbck();
549 
550     // Print table summary
551     printTableSummary(tablesInfo);
552     return errors.summarize();
553   }
554 
555     public static byte[] keyOnly (byte[] b) {
556         if (b == null)
557           return b;
558         int rowlength = Bytes.toShort(b, 0);
559         byte[] result = new byte[rowlength];
560         System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
561         return result;
562       }
563     
564       private static class RegionBoundariesInformation {
565         public byte [] regionName;
566         public byte [] metaFirstKey;
567         public byte [] metaLastKey;
568         public byte [] storesFirstKey;
569         public byte [] storesLastKey;
570         public String toString () {
571           return "regionName=" + Bytes.toStringBinary(regionName) +
572                  "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
573                  "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
574                  "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
575                  "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
576         }
577       }
578     
579       public void checkRegionBoundaries() {
580         try {
581           ByteArrayComparator comparator = new ByteArrayComparator();
582           List<HRegionInfo> regions = MetaScanner.listAllRegions(getConf(), false);
583           final RegionBoundariesInformation currentRegionBoundariesInformation =
584               new RegionBoundariesInformation();
585           for (HRegionInfo regionInfo : regions) {
586             currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
587             // For each region, get the start and stop key from the META and compare them to the
588             // same information from the Stores.
589             Path path = new Path(getConf().get(HConstants.HBASE_DIR) + "/"
590                 + Bytes.toString(regionInfo.getTableName()) + "/"
591                 + regionInfo.getEncodedName() + "/");
592             FileSystem fs = path.getFileSystem(getConf());
593             FileStatus[] files = fs.listStatus(path);
594             // For all the column families in this region...
595             byte[] storeFirstKey = null;
596             byte[] storeLastKey = null;
597             for (FileStatus file : files) {
598               String fileName = file.getPath().toString();
599               fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
600               if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
601                 FileStatus[] storeFiles = fs.listStatus(file.getPath());
602                 // For all the stores in this column family.
603                 for (FileStatus storeFile : storeFiles) {
604                   HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
605                       getConf()));
606                   if ((reader.getFirstKey() != null)
607                       && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
608                           reader.getFirstKey()) > 0))) {
609                     storeFirstKey = reader.getFirstKey();
610                   }
611                   if ((reader.getLastKey() != null)
612                       && ((storeLastKey == null) || (comparator.compare(storeLastKey,
613                           reader.getLastKey())) < 0)) {
614                     storeLastKey = reader.getLastKey();
615                   }
616                   reader.close();
617                 }
618               }
619             }
620             currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
621             currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
622             currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
623             currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
624             if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
625               currentRegionBoundariesInformation.metaFirstKey = null;
626             if (currentRegionBoundariesInformation.metaLastKey.length == 0)
627               currentRegionBoundariesInformation.metaLastKey = null;
628     
629             // For a region to be correct, we need the META start key to be smaller or equal to the
630             // smallest start key from all the stores, and the start key from the next META entry to
631             // be bigger than the last key from all the current stores. First region start key is null;
632             // Last region end key is null; some regions can be empty and not have any store.
633     
634             boolean valid = true;
635             // Checking start key.
636             if ((currentRegionBoundariesInformation.storesFirstKey != null)
637                 && (currentRegionBoundariesInformation.metaFirstKey != null)) {
638               valid = valid
639                   && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
640                     currentRegionBoundariesInformation.metaFirstKey) >= 0;
641             }
642             // Checking stop key.
643             if ((currentRegionBoundariesInformation.storesLastKey != null)
644                 && (currentRegionBoundariesInformation.metaLastKey != null)) {
645               valid = valid
646                   && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
647                     currentRegionBoundariesInformation.metaLastKey) < 0;
648             }
649             if (!valid) {
650               errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
651                 tablesInfo.get(Bytes.toString(regionInfo.getTableName())));
652               LOG.warn("Region's boundaries not alligned between stores and META for:");
653               LOG.warn(currentRegionBoundariesInformation);
654             }
655           }
656         } catch (IOException e) {
657           LOG.error(e);
658         }
659       }
660     
661   /**
662    * Iterates through the list of all orphan/invalid regiondirs.
663    */
664   private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
665     for (HbckInfo hi : orphanHdfsDirs) {
666       LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
667       adoptHdfsOrphan(hi);
668     }
669   }
670 
671   /**
672    * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
673    * these orphans by creating a new region, and moving the column families,
674    * recovered edits, HLogs, into the new region dir.  We determine the region
675    * startkey and endkeys by looking at all of the hfiles inside the column
676    * families to identify the min and max keys. The resulting region will
677    * likely violate table integrity but will be dealt with by merging
678    * overlapping regions.
679    */
680   private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
681     Path p = hi.getHdfsRegionDir();
682     FileSystem fs = p.getFileSystem(getConf());
683     FileStatus[] dirs = fs.listStatus(p);
684     if (dirs == null) {
685       LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
686           p + ". This dir could probably be deleted.");
687       return ;
688     }
689 
690     String tableName = Bytes.toString(hi.getTableName());
691     TableInfo tableInfo = tablesInfo.get(tableName);
692     Preconditions.checkNotNull("Table " + tableName + "' not present!", tableInfo);
693     HTableDescriptor template = tableInfo.getHTD();
694 
695     // find min and max key values
696     Pair<byte[],byte[]> orphanRegionRange = null;
697     for (FileStatus cf : dirs) {
698       String cfName= cf.getPath().getName();
699       // TODO Figure out what the special dirs are
700       if (cfName.startsWith(".") || cfName.equals("splitlog")) continue;
701 
702       FileStatus[] hfiles = fs.listStatus(cf.getPath());
703       for (FileStatus hfile : hfiles) {
704         byte[] start, end;
705         HFile.Reader hf = null;
706         try {
707           CacheConfig cacheConf = new CacheConfig(getConf());
708           hf = HFile.createReader(fs, hfile.getPath(), cacheConf);
709           hf.loadFileInfo();
710           KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
711           start = startKv.getRow();
712           KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
713           end = endKv.getRow();
714         } catch (IOException ioe) {
715           LOG.warn("Problem reading orphan file " + hfile + ", skipping");
716           continue;
717         } catch (NullPointerException ioe) {
718           LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
719           continue;
720         } finally {
721           if (hf != null) {
722             hf.close();
723           }
724         }
725 
726         // expand the range to include the range of all hfiles
727         if (orphanRegionRange == null) {
728           // first range
729           orphanRegionRange = new Pair<byte[], byte[]>(start, end);
730         } else {
731           // TODO add test
732 
733           // expand range only if the hfile is wider.
734           if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
735             orphanRegionRange.setFirst(start);
736           }
737           if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
738             orphanRegionRange.setSecond(end);
739           }
740         }
741       }
742     }
743     if (orphanRegionRange == null) {
744       LOG.warn("No data in dir " + p + ", sidelining data");
745       fixes++;
746       sidelineRegionDir(fs, hi);
747       return;
748     }
749     LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
750         Bytes.toString(orphanRegionRange.getSecond()) + ")");
751 
752     // create new region on hdfs.  move data into place.
753     HRegionInfo hri = new HRegionInfo(template.getName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
754     LOG.info("Creating new region : " + hri);
755     HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
756     Path target = region.getRegionDir();
757 
758     // rename all the data to new region
759     mergeRegionDirs(target, hi);
760     fixes++;
761   }
762 
763   /**
764    * This method determines if there are table integrity errors in HDFS.  If
765    * there are errors and the appropriate "fix" options are enabled, the method
766    * will first correct orphan regions making them into legit regiondirs, and
767    * then reload to merge potentially overlapping regions.
768    *
769    * @return number of table integrity errors found
770    */
771   private int restoreHdfsIntegrity() throws IOException, InterruptedException {
772     // Determine what's on HDFS
773     LOG.info("Loading HBase regioninfo from HDFS...");
774     loadHdfsRegionDirs(); // populating regioninfo table.
775 
776     int errs = errors.getErrorList().size();
777     // First time just get suggestions.
778     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
779     checkHdfsIntegrity(false, false);
780 
781     if (errors.getErrorList().size() == errs) {
782       LOG.info("No integrity errors.  We are done with this phase. Glorious.");
783       return 0;
784     }
785 
786     if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
787       adoptHdfsOrphans(orphanHdfsDirs);
788       // TODO optimize by incrementally adding instead of reloading.
789     }
790 
791     // Make sure there are no holes now.
792     if (shouldFixHdfsHoles()) {
793       clearState(); // this also resets # fixes.
794       loadHdfsRegionDirs();
795       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
796       tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
797     }
798 
799     // Now we fix overlaps
800     if (shouldFixHdfsOverlaps()) {
801       // second pass we fix overlaps.
802       clearState(); // this also resets # fixes.
803       loadHdfsRegionDirs();
804       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
805       tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
806     }
807 
808     return errors.getErrorList().size();
809   }
810 
811   /**
812    * Scan all the store file names to find any lingering reference files,
813    * which refer to some none-exiting files. If "fix" option is enabled,
814    * any lingering reference file will be sidelined if found.
815    * <p>
816    * Lingering reference file prevents a region from opening. It has to
817    * be fixed before a cluster can start properly.
818    */
819   private void offlineReferenceFileRepair() throws IOException {
820     Configuration conf = getConf();
821     Path hbaseRoot = FSUtils.getRootDir(conf);
822     FileSystem fs = hbaseRoot.getFileSystem(conf);
823     Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot);
824     for (Path path: allFiles.values()) {
825       boolean isReference = false;
826       try {
827         isReference = StoreFile.isReference(path);
828       } catch (Throwable t) {
829         // Ignore. Some files may not be store files at all.
830         // For example, files under .oldlogs folder in .META.
831         // Warning message is already logged by
832         // StoreFile#isReference.
833       }
834       if (!isReference) continue;
835 
836       Path referredToFile = StoreFile.getReferredToFile(path);
837       if (fs.exists(referredToFile)) continue;  // good, expected
838 
839       // Found a lingering reference file
840       errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
841         "Found lingering reference file " + path);
842       if (!shouldFixReferenceFiles()) continue;
843 
844       // Now, trying to fix it since requested
845       boolean success = false;
846       String pathStr = path.toString();
847 
848       // A reference file path should be like
849       // ${hbase.rootdir}/table_name/region_id/family_name/referred_file.region_name
850       // Up 3 directories to get the table folder.
851       // So the file will be sidelined to a similar folder structure.
852       int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
853       for (int i = 0; index > 0 && i < 3; i++) {
854         index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index);
855       }
856       if (index > 0) {
857         Path rootDir = getSidelineDir();
858         Path dst = new Path(rootDir, pathStr.substring(index));
859         fs.mkdirs(dst.getParent());
860         LOG.info("Trying to sildeline reference file"
861           + path + " to " + dst);
862         setShouldRerun();
863 
864         success = fs.rename(path, dst);
865       }
866       if (!success) {
867         LOG.error("Failed to sideline reference file " + path);
868       }
869     }
870   }
871 
872   /**
873    * TODO -- need to add tests for this.
874    */
875   private void reportEmptyMetaCells() {
876     errors.print("Number of empty REGIONINFO_QUALIFIER rows in .META.: " +
877       emptyRegionInfoQualifiers.size());
878     if (details) {
879       for (Result r: emptyRegionInfoQualifiers) {
880         errors.print("  " + r);
881       }
882     }
883   }
884 
885   /**
886    * TODO -- need to add tests for this.
887    */
888   private void reportTablesInFlux() {
889     AtomicInteger numSkipped = new AtomicInteger(0);
890     HTableDescriptor[] allTables = getTables(numSkipped);
891     errors.print("Number of Tables: " + allTables.length);
892     if (details) {
893       if (numSkipped.get() > 0) {
894         errors.detail("Number of Tables in flux: " + numSkipped.get());
895       }
896       for (HTableDescriptor td : allTables) {
897         String tableName = td.getNameAsString();
898         errors.detail("  Table: " + tableName + "\t" +
899                            (td.isReadOnly() ? "ro" : "rw") + "\t" +
900                            (td.isRootRegion() ? "ROOT" :
901                             (td.isMetaRegion() ? "META" : "    ")) + "\t" +
902                            " families: " + td.getFamilies().size());
903       }
904     }
905   }
906 
907   public ErrorReporter getErrors() {
908     return errors;
909   }
910 
911   /**
912    * Read the .regioninfo file from the file system.  If there is no
913    * .regioninfo, add it to the orphan hdfs region list.
914    */
915   private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
916     Path regionDir = hbi.getHdfsRegionDir();
917     if (regionDir == null) {
918       LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
919       return;
920     }
921 
922     if (hbi.hdfsEntry.hri != null) {
923       // already loaded data
924       return;
925     }
926 
927     Path regioninfo = new Path(regionDir, HRegion.REGIONINFO_FILE);
928     FileSystem fs = regioninfo.getFileSystem(getConf());
929 
930     FSDataInputStream in = fs.open(regioninfo);
931     HRegionInfo hri = new HRegionInfo();
932     hri.readFields(in);
933     in.close();
934     LOG.debug("HRegionInfo read: " + hri.toString());
935     hbi.hdfsEntry.hri = hri;
936   }
937 
938   /**
939    * Exception thrown when a integrity repair operation fails in an
940    * unresolvable way.
941    */
942   public static class RegionRepairException extends IOException {
943     private static final long serialVersionUID = 1L;
944     final IOException ioe;
945     public RegionRepairException(String s, IOException ioe) {
946       super(s);
947       this.ioe = ioe;
948     }
949   }
950 
951   /**
952    * Populate hbi's from regionInfos loaded from file system.
953    */
954   private SortedMap<String, TableInfo> loadHdfsRegionInfos() throws IOException, InterruptedException {
955     tablesInfo.clear(); // regenerating the data
956     // generate region split structure
957     Collection<HbckInfo> hbckInfos = regionInfoMap.values();
958 
959     // Parallelized read of .regioninfo files.
960     List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
961     List<Future<Void>> hbiFutures;
962 
963     for (HbckInfo hbi : hbckInfos) {
964       WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
965       hbis.add(work);
966     }
967 
968     // Submit and wait for completion
969     hbiFutures = executor.invokeAll(hbis);
970 
971     for(int i=0; i<hbiFutures.size(); i++) {
972       WorkItemHdfsRegionInfo work = hbis.get(i);
973       Future<Void> f = hbiFutures.get(i);
974       try {
975         f.get();
976       } catch(ExecutionException e) {
977         LOG.warn("Failed to read .regioninfo file for region " +
978               work.hbi.getRegionNameAsString(), e.getCause());
979       }
980     }
981 
982     // serialized table info gathering.
983     for (HbckInfo hbi: hbckInfos) {
984 
985       if (hbi.getHdfsHRI() == null) {
986         // was an orphan
987         continue;
988       }
989 
990 
991       // get table name from hdfs, populate various HBaseFsck tables.
992       String tableName = Bytes.toString(hbi.getTableName());
993       if (tableName == null) {
994         // There was an entry in META not in the HDFS?
995         LOG.warn("tableName was null for: " + hbi);
996         continue;
997       }
998 
999       TableInfo modTInfo = tablesInfo.get(tableName);
1000       if (modTInfo == null) {
1001         // only executed once per table.
1002         modTInfo = new TableInfo(tableName);
1003         Path hbaseRoot = FSUtils.getRootDir(getConf());
1004         tablesInfo.put(tableName, modTInfo);
1005         try {
1006           HTableDescriptor htd =
1007               FSTableDescriptors.getTableDescriptorFromFs(hbaseRoot.getFileSystem(getConf()),
1008               hbaseRoot, tableName);
1009           modTInfo.htds.add(htd);
1010         } catch (IOException ioe) {
1011           if (!orphanTableDirs.containsKey(tableName)) {
1012             LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1013             //should only report once for each table
1014             errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1015                 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1016             Set<String> columns = new HashSet<String>();
1017             orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1018           }
1019         }
1020       }
1021       if (!hbi.isSkipChecks()) {
1022         modTInfo.addRegionInfo(hbi);
1023       }
1024     }
1025 
1026     return tablesInfo;
1027   }
1028 
1029   /**
1030    * To get the column family list according to the column family dirs
1031    * @param columns
1032    * @param hbi
1033    * @return
1034    * @throws IOException
1035    */
1036   private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1037     Path regionDir = hbi.getHdfsRegionDir();
1038     FileSystem fs = regionDir.getFileSystem(getConf());
1039     FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1040     for (FileStatus subdir : subDirs) {
1041       String columnfamily = subdir.getPath().getName();
1042       columns.add(columnfamily);
1043     }
1044     return columns;
1045   }
1046 
1047   /**
1048    * To fabricate a .tableinfo file with following contents<br>
1049    * 1. the correct tablename <br>
1050    * 2. the correct colfamily list<br>
1051    * 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1052    * @param tableName
1053    * @throws IOException
1054    */
1055   private boolean fabricateTableInfo(String tableName, Set<String> columns) throws IOException {
1056     if (columns ==null || columns.isEmpty()) return false;
1057     HTableDescriptor htd = new HTableDescriptor(tableName);
1058     for (String columnfamimly : columns) {
1059       htd.addFamily(new HColumnDescriptor(columnfamimly));
1060     }
1061     FSTableDescriptors.createTableDescriptor(htd, getConf(), true);
1062     return true;
1063   }
1064 
1065   /**
1066    * To fix orphan table by creating a .tableinfo file under tableDir <br>
1067    * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1068    * 2. else create a default .tableinfo file with following items<br>
1069    * &nbsp;2.1 the correct tablename <br>
1070    * &nbsp;2.2 the correct colfamily list<br>
1071    * &nbsp;2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1072    * @throws IOException
1073    */
1074   public void fixOrphanTables() throws IOException {
1075     if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1076 
1077       Path hbaseRoot = FSUtils.getRootDir(getConf());
1078       List<String> tmpList = new ArrayList<String>();
1079       tmpList.addAll(orphanTableDirs.keySet());
1080       HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1081       Iterator<Entry<String, Set<String>>> iter = orphanTableDirs.entrySet().iterator();
1082       int j = 0;
1083       int numFailedCase = 0;
1084       while (iter.hasNext()) {
1085         Entry<String, Set<String>> entry = (Entry<String, Set<String>>) iter.next();
1086         String tableName = entry.getKey();
1087         LOG.info("Trying to fix orphan table error: " + tableName);
1088         if (j < htds.length) {
1089           if (tableName.equals(Bytes.toString(htds[j].getName()))) {
1090             HTableDescriptor htd = htds[j];
1091             LOG.info("fixing orphan table: " + tableName + " from cache");
1092             FSTableDescriptors.createTableDescriptor(
1093                 hbaseRoot.getFileSystem(getConf()), hbaseRoot, htd, true);
1094             j++;
1095             iter.remove();
1096           }
1097         } else {
1098           if (fabricateTableInfo(tableName, entry.getValue())) {
1099             LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1100             LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1101             iter.remove();
1102           } else {
1103             LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1104             numFailedCase++;
1105           }
1106         }
1107         fixes++;
1108       }
1109 
1110       if (orphanTableDirs.isEmpty()) {
1111         // all orphanTableDirs are luckily recovered
1112         // re-run doFsck after recovering the .tableinfo file
1113         setShouldRerun();
1114         LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1115       } else if (numFailedCase > 0) {
1116         LOG.error("Failed to fix " + numFailedCase
1117             + " OrphanTables with default .tableinfo files");
1118       }
1119 
1120     }
1121     //cleanup the list
1122     orphanTableDirs.clear();
1123 
1124   }
1125 
1126   /**
1127    * This borrows code from MasterFileSystem.bootstrap()
1128    * 
1129    * @return an open .META. HRegion
1130    */
1131   private HRegion createNewRootAndMeta() throws IOException {
1132     Path rootdir = new Path(getConf().get(HConstants.HBASE_DIR));
1133     Configuration c = getConf();
1134     HRegionInfo rootHRI = new HRegionInfo(HRegionInfo.ROOT_REGIONINFO);
1135     MasterFileSystem.setInfoFamilyCachingForRoot(false);
1136     HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1137     MasterFileSystem.setInfoFamilyCachingForMeta(false);
1138     HRegion root = HRegion.createHRegion(rootHRI, rootdir, c,
1139         HTableDescriptor.ROOT_TABLEDESC);
1140     HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c,
1141         HTableDescriptor.META_TABLEDESC);
1142     MasterFileSystem.setInfoFamilyCachingForRoot(true);
1143     MasterFileSystem.setInfoFamilyCachingForMeta(true);
1144 
1145     // Add first region from the META table to the ROOT region.
1146     HRegion.addRegionToMETA(root, meta);
1147     root.close();
1148     root.getLog().closeAndDelete();
1149     return meta;
1150   }
1151 
1152   /**
1153    * Generate set of puts to add to new meta.  This expects the tables to be 
1154    * clean with no overlaps or holes.  If there are any problems it returns null.
1155    * 
1156    * @return An array list of puts to do in bulk, null if tables have problems
1157    */
1158   private ArrayList<Put> generatePuts(SortedMap<String, TableInfo> tablesInfo) throws IOException {
1159     ArrayList<Put> puts = new ArrayList<Put>();
1160     boolean hasProblems = false;
1161     for (Entry<String, TableInfo> e : tablesInfo.entrySet()) {
1162       String name = e.getKey();
1163 
1164       // skip "-ROOT-" and ".META."
1165       if (Bytes.compareTo(Bytes.toBytes(name), HConstants.ROOT_TABLE_NAME) == 0
1166           || Bytes.compareTo(Bytes.toBytes(name), HConstants.META_TABLE_NAME) == 0) {
1167         continue;
1168       }
1169 
1170       TableInfo ti = e.getValue();
1171       for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1172           .entrySet()) {
1173         Collection<HbckInfo> his = spl.getValue();
1174         int sz = his.size();
1175         if (sz != 1) {
1176           // problem
1177           LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1178               + " had " +  sz + " regions instead of exactly 1." );
1179           hasProblems = true;
1180           continue;
1181         }
1182 
1183         // add the row directly to meta.
1184         HbckInfo hi = his.iterator().next();
1185         HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
1186         Put p = new Put(hri.getRegionName());
1187         p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
1188             Writables.getBytes(hri));
1189         puts.add(p);
1190       }
1191     }
1192     return hasProblems ? null : puts;
1193   }
1194 
1195   /**
1196    * Suggest fixes for each table
1197    */
1198   private void suggestFixes(SortedMap<String, TableInfo> tablesInfo) throws IOException {
1199     for (TableInfo tInfo : tablesInfo.values()) {
1200       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1201       tInfo.checkRegionChain(handler);
1202     }
1203   }
1204 
1205   /**
1206    * Rebuilds meta from information in hdfs/fs.  Depends on configuration
1207    * settings passed into hbck constructor to point to a particular fs/dir.
1208    * 
1209    * @param fix flag that determines if method should attempt to fix holes
1210    * @return true if successful, false if attempt failed.
1211    */
1212   public boolean rebuildMeta(boolean fix) throws IOException,
1213       InterruptedException {
1214 
1215     // TODO check to make sure hbase is offline. (or at least the table
1216     // currently being worked on is off line)
1217 
1218     // Determine what's on HDFS
1219     LOG.info("Loading HBase regioninfo from HDFS...");
1220     loadHdfsRegionDirs(); // populating regioninfo table.
1221 
1222     int errs = errors.getErrorList().size();
1223     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1224     checkHdfsIntegrity(false, false);
1225 
1226     // make sure ok.
1227     if (errors.getErrorList().size() != errs) {
1228       // While in error state, iterate until no more fixes possible
1229       while(true) {
1230         fixes = 0;
1231         suggestFixes(tablesInfo);
1232         errors.clear();
1233         loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1234         checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1235 
1236         int errCount = errors.getErrorList().size();
1237 
1238         if (fixes == 0) {
1239           if (errCount > 0) {
1240             return false; // failed to fix problems.
1241           } else {
1242             break; // no fixes and no problems? drop out and fix stuff!
1243           }
1244         }
1245       }
1246     }
1247 
1248     // we can rebuild, move old root and meta out of the way and start
1249     LOG.info("HDFS regioninfo's seems good.  Sidelining old .META.");
1250     Path backupDir = sidelineOldRootAndMeta();
1251 
1252     LOG.info("Creating new .META.");
1253     HRegion meta = createNewRootAndMeta();
1254 
1255     // populate meta
1256     List<Put> puts = generatePuts(tablesInfo);
1257     if (puts == null) {
1258       LOG.fatal("Problem encountered when creating new .META. entries.  " +
1259         "You may need to restore the previously sidelined -ROOT- and .META.");
1260       return false;
1261     }
1262     meta.put(puts.toArray(new Put[0]));
1263     meta.close();
1264     meta.getLog().closeAndDelete();
1265     LOG.info("Success! .META. table rebuilt.");
1266     LOG.info("Old -ROOT- and .META. are moved into " + backupDir);
1267     return true;
1268   }
1269 
1270   private SortedMap<String, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1271       boolean fixOverlaps) throws IOException {
1272     LOG.info("Checking HBase region split map from HDFS data...");
1273     for (TableInfo tInfo : tablesInfo.values()) {
1274       TableIntegrityErrorHandler handler;
1275       if (fixHoles || fixOverlaps) {
1276         handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1277           fixHoles, fixOverlaps);
1278       } else {
1279         handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1280       }
1281       if (!tInfo.checkRegionChain(handler)) {
1282         // should dump info as well.
1283         errors.report("Found inconsistency in table " + tInfo.getName());
1284       }
1285     }
1286     return tablesInfo;
1287   }
1288 
1289   private Path getSidelineDir() throws IOException {
1290     if (sidelineDir == null) {
1291       Path hbaseDir = FSUtils.getRootDir(getConf());
1292       Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1293       sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1294           + startMillis);
1295     }
1296     return sidelineDir;
1297   }
1298 
1299   /**
1300    * Sideline a region dir (instead of deleting it)
1301    */
1302   Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1303     return sidelineRegionDir(fs, null, hi);
1304   }
1305 
1306   /**
1307    * Sideline a region dir (instead of deleting it)
1308    *
1309    * @param parentDir if specified, the region will be sidelined to
1310    * folder like .../parentDir/<table name>/<region name>. The purpose
1311    * is to group together similar regions sidelined, for example, those
1312    * regions should be bulk loaded back later on. If null, it is ignored.
1313    */
1314   Path sidelineRegionDir(FileSystem fs,
1315       String parentDir, HbckInfo hi) throws IOException {
1316     String tableName = Bytes.toString(hi.getTableName());
1317     Path regionDir = hi.getHdfsRegionDir();
1318 
1319     if (!fs.exists(regionDir)) {
1320       LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1321       return null;
1322     }
1323 
1324     Path rootDir = getSidelineDir();
1325     if (parentDir != null) {
1326       rootDir = new Path(rootDir, parentDir);
1327     }
1328     Path sidelineTableDir= new Path(rootDir, tableName);
1329     Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1330     fs.mkdirs(sidelineRegionDir);
1331     boolean success = false;
1332     FileStatus[] cfs =  fs.listStatus(regionDir);
1333     if (cfs == null) {
1334       LOG.info("Region dir is empty: " + regionDir);
1335     } else {
1336       for (FileStatus cf : cfs) {
1337         Path src = cf.getPath();
1338         Path dst =  new Path(sidelineRegionDir, src.getName());
1339         if (fs.isFile(src)) {
1340           // simple file
1341           success = fs.rename(src, dst);
1342           if (!success) {
1343             String msg = "Unable to rename file " + src +  " to " + dst;
1344             LOG.error(msg);
1345             throw new IOException(msg);
1346           }
1347           continue;
1348         }
1349 
1350         // is a directory.
1351         fs.mkdirs(dst);
1352 
1353         LOG.info("Sidelining files from " + src + " into containing region " + dst);
1354         // FileSystem.rename is inconsistent with directories -- if the
1355         // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1356         // it moves the src into the dst dir resulting in (foo/a/b).  If
1357         // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1358         FileStatus[] hfiles = fs.listStatus(src);
1359         if (hfiles != null && hfiles.length > 0) {
1360           for (FileStatus hfile : hfiles) {
1361             success = fs.rename(hfile.getPath(), dst);
1362             if (!success) {
1363               String msg = "Unable to rename file " + src +  " to " + dst;
1364               LOG.error(msg);
1365               throw new IOException(msg);
1366             }
1367           }
1368         }
1369         LOG.debug("Sideline directory contents:");
1370         debugLsr(sidelineRegionDir);
1371       }
1372     }
1373 
1374     LOG.info("Removing old region dir: " + regionDir);
1375     success = fs.delete(regionDir, true);
1376     if (!success) {
1377       String msg = "Unable to delete dir " + regionDir;
1378       LOG.error(msg);
1379       throw new IOException(msg);
1380     }
1381     return sidelineRegionDir;
1382   }
1383 
1384   /**
1385    * Side line an entire table.
1386    */
1387   void sidelineTable(FileSystem fs, byte[] table, Path hbaseDir,
1388       Path backupHbaseDir) throws IOException {
1389     String tableName = Bytes.toString(table);
1390     Path tableDir = new Path(hbaseDir, tableName);
1391     if (fs.exists(tableDir)) {
1392       Path backupTableDir= new Path(backupHbaseDir, tableName);
1393       boolean success = fs.rename(tableDir, backupTableDir);
1394       if (!success) {
1395         throw new IOException("Failed to move  " + tableName + " from " 
1396             +  tableDir.getName() + " to " + backupTableDir.getName());
1397       }
1398     } else {
1399       LOG.info("No previous " + tableName +  " exists.  Continuing.");
1400     }
1401   }
1402 
1403   /**
1404    * @return Path to backup of original directory
1405    */
1406   Path sidelineOldRootAndMeta() throws IOException {
1407     // put current -ROOT- and .META. aside.
1408     Path hbaseDir = new Path(getConf().get(HConstants.HBASE_DIR));
1409     FileSystem fs = hbaseDir.getFileSystem(getConf());
1410     Path backupDir = getSidelineDir();
1411     fs.mkdirs(backupDir);
1412 
1413     sidelineTable(fs, HConstants.ROOT_TABLE_NAME, hbaseDir, backupDir);
1414     try {
1415       sidelineTable(fs, HConstants.META_TABLE_NAME, hbaseDir, backupDir);
1416     } catch (IOException e) {
1417       LOG.error("Attempt to sideline meta failed, attempt to revert...", e);
1418       try {
1419         // move it back.
1420         sidelineTable(fs, HConstants.ROOT_TABLE_NAME, backupDir, hbaseDir);
1421         LOG.warn("... revert succeed.  -ROOT- and .META. still in "
1422             + "original state.");
1423       } catch (IOException ioe) {
1424         LOG.fatal("... failed to sideline root and meta and failed to restore "
1425             + "prevoius state.  Currently in inconsistent state.  To restore "
1426             + "try to rename -ROOT- in " + backupDir.getName() + " to " 
1427             + hbaseDir.getName() + ".", ioe);
1428       }
1429       throw e; // throw original exception
1430     }
1431     return backupDir;
1432   }
1433 
1434   /**
1435    * Load the list of disabled tables in ZK into local set.
1436    * @throws ZooKeeperConnectionException
1437    * @throws IOException
1438    */
1439   private void loadDisabledTables()
1440   throws ZooKeeperConnectionException, IOException {
1441     HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1442       @Override
1443       public Void connect(HConnection connection) throws IOException {
1444         ZooKeeperWatcher zkw = connection.getZooKeeperWatcher();
1445         try {
1446           for (String tableName : ZKTableReadOnly.getDisabledOrDisablingTables(zkw)) {
1447             disabledTables.add(Bytes.toBytes(tableName));
1448           }
1449         } catch (KeeperException ke) {
1450           throw new IOException(ke);
1451         }
1452         return null;
1453       }
1454     });
1455   }
1456 
1457   /**
1458    * Check if the specified region's table is disabled.
1459    */
1460   private boolean isTableDisabled(HRegionInfo regionInfo) {
1461     return disabledTables.contains(regionInfo.getTableName());
1462   }
1463 
1464   /**
1465    * Scan HDFS for all regions, recording their information into
1466    * regionInfoMap
1467    */
1468   public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1469     Path rootDir = new Path(getConf().get(HConstants.HBASE_DIR));
1470     FileSystem fs = rootDir.getFileSystem(getConf());
1471 
1472     // list all tables from HDFS
1473     List<FileStatus> tableDirs = Lists.newArrayList();
1474 
1475     boolean foundVersionFile = false;
1476     FileStatus[] files = fs.listStatus(rootDir);
1477     for (FileStatus file : files) {
1478       String dirName = file.getPath().getName();
1479       if (dirName.equals(HConstants.VERSION_FILE_NAME)) {
1480         foundVersionFile = true;
1481       } else {
1482         if ((!checkMetaOnly && isTableIncluded(dirName)) ||
1483             dirName.equals("-ROOT-") ||
1484             dirName.equals(".META.")) {
1485           tableDirs.add(file);
1486         }
1487       }
1488     }
1489 
1490     // verify that version file exists
1491     if (!foundVersionFile) {
1492       errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1493           "Version file does not exist in root dir " + rootDir);
1494       if (shouldFixVersionFile()) {
1495         LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1496             + " file.");
1497         setShouldRerun();
1498         FSUtils.setVersion(fs, rootDir, getConf().getInt(
1499             HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1500             HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1501             HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1502       }
1503     }
1504 
1505     // level 1:  <HBASE_DIR>/*
1506     List<WorkItemHdfsDir> dirs = new ArrayList<WorkItemHdfsDir>(tableDirs.size());
1507     List<Future<Void>> dirsFutures;
1508 
1509     for (FileStatus tableDir : tableDirs) {
1510       LOG.debug("Loading region dirs from " +tableDir.getPath());
1511       dirs.add(new WorkItemHdfsDir(this, fs, errors, tableDir));
1512     }
1513 
1514     // Invoke and wait for Callables to complete
1515     dirsFutures = executor.invokeAll(dirs);
1516 
1517     for(Future<Void> f: dirsFutures) {
1518       try {
1519         f.get();
1520       } catch(ExecutionException e) {
1521         LOG.warn("Could not load region dir " , e.getCause());
1522       }
1523     }
1524   }
1525 
1526   /**
1527    * Record the location of the ROOT region as found in ZooKeeper,
1528    * as if it were in a META table. This is so that we can check
1529    * deployment of ROOT.
1530    */
1531   private boolean recordRootRegion() throws IOException {
1532     HRegionLocation rootLocation = connection.locateRegion(
1533       HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
1534 
1535     // Check if Root region is valid and existing
1536     if (rootLocation == null || rootLocation.getRegionInfo() == null ||
1537         rootLocation.getHostname() == null) {
1538       errors.reportError(ERROR_CODE.NULL_ROOT_REGION,
1539         "Root Region or some of its attributes are null.");
1540       return false;
1541     }
1542     ServerName sn;
1543     try {
1544       sn = getRootRegionServerName();
1545     } catch (InterruptedException e) {
1546       throw new IOException("Interrupted", e);
1547     }
1548     MetaEntry m =
1549       new MetaEntry(rootLocation.getRegionInfo(), sn, System.currentTimeMillis());
1550     HbckInfo hbInfo = new HbckInfo(m);
1551     regionInfoMap.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo);
1552     return true;
1553   }
1554 
1555   private ServerName getRootRegionServerName()
1556   throws IOException, InterruptedException {
1557     RootRegionTracker rootRegionTracker =
1558       new RootRegionTracker(this.connection.getZooKeeperWatcher(), new Abortable() {
1559         @Override
1560         public void abort(String why, Throwable e) {
1561           LOG.error(why, e);
1562           System.exit(1);
1563         }
1564         @Override
1565         public boolean isAborted(){
1566           return false;
1567         }
1568         
1569       });
1570     rootRegionTracker.start();
1571     ServerName sn = null;
1572     try {
1573       sn = rootRegionTracker.getRootRegionLocation();
1574     } finally {
1575       rootRegionTracker.stop();
1576     }
1577     return sn;
1578   }
1579 
1580   /**
1581    * Contacts each regionserver and fetches metadata about regions.
1582    * @param regionServerList - the list of region servers to connect to
1583    * @throws IOException if a remote or network exception occurs
1584    */
1585   void processRegionServers(Collection<ServerName> regionServerList)
1586     throws IOException, InterruptedException {
1587 
1588     List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1589     List<Future<Void>> workFutures;
1590 
1591     // loop to contact each region server in parallel
1592     for (ServerName rsinfo: regionServerList) {
1593       workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1594     }
1595     
1596     workFutures = executor.invokeAll(workItems);
1597 
1598     for(int i=0; i<workFutures.size(); i++) {
1599       WorkItemRegion item = workItems.get(i);
1600       Future<Void> f = workFutures.get(i);
1601       try {
1602         f.get();
1603       } catch(ExecutionException e) {
1604         LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1605             e.getCause());
1606       }
1607     }
1608   }
1609 
1610   /**
1611    * Check consistency of all regions that have been found in previous phases.
1612    */
1613   private void checkAndFixConsistency()
1614   throws IOException, KeeperException, InterruptedException {
1615     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1616       checkRegionConsistency(e.getKey(), e.getValue());
1617     }
1618   }
1619 
1620   private void preCheckPermission() throws IOException, AccessControlException {
1621     if (shouldIgnorePreCheckPermission()) {
1622       return;
1623     }
1624 
1625     Configuration conf = getConf();
1626     Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR));
1627     FileSystem fs = hbaseDir.getFileSystem(conf);
1628     UserProvider provider = UserProvider.instantiate(conf);
1629     User user = provider.getCurrent();
1630     FileStatus[] files = fs.listStatus(hbaseDir);
1631     for (FileStatus file : files) {
1632       try {
1633         FSUtils.checkAccess(user, file, FsAction.WRITE);
1634       } catch (AccessControlException ace) {
1635         LOG.warn("Got AccessControlException when preCheckPermission ", ace);
1636         errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + user.getShortName()
1637           + " does not have write perms to " + file.getPath()
1638           + ". Please rerun hbck as hdfs user " + file.getOwner());
1639         throw new AccessControlException(ace);
1640       }
1641     }
1642   }
1643 
1644   /**
1645    * Deletes region from meta table
1646    */
1647   private void deleteMetaRegion(HbckInfo hi) throws IOException {
1648     Delete d = new Delete(hi.metaEntry.getRegionName());
1649     meta.delete(d);
1650     meta.flushCommits();
1651     LOG.info("Deleted " + hi.metaEntry.getRegionNameAsString() + " from META" );
1652   }
1653 
1654   /**
1655    * Reset the split parent region info in meta table
1656    */
1657   private void resetSplitParent(HbckInfo hi) throws IOException {
1658     RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1659     Delete d = new Delete(hi.metaEntry.getRegionName());
1660     d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1661     d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1662     mutations.add(d);
1663 
1664     Put p = new Put(hi.metaEntry.getRegionName());
1665     HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1666     hri.setOffline(false);
1667     hri.setSplit(false);
1668     p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
1669       Writables.getBytes(hri));
1670     mutations.add(p);
1671 
1672     meta.mutateRow(mutations);
1673     meta.flushCommits();
1674     LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1675   }
1676 
1677   /**
1678    * This backwards-compatibility wrapper for permanently offlining a region
1679    * that should not be alive.  If the region server does not support the
1680    * "offline" method, it will use the closest unassign method instead.  This
1681    * will basically work until one attempts to disable or delete the affected
1682    * table.  The problem has to do with in-memory only master state, so
1683    * restarting the HMaster or failing over to another should fix this.
1684    */
1685   private void offline(byte[] regionName) throws IOException {
1686     String regionString = Bytes.toStringBinary(regionName);
1687     if (!rsSupportsOffline) {
1688       LOG.warn("Using unassign region " + regionString
1689           + " instead of using offline method, you should"
1690           + " restart HMaster after these repairs");
1691       admin.unassign(regionName, true);
1692       return;
1693     }
1694 
1695     // first time we assume the rs's supports #offline.
1696     try {
1697       LOG.info("Offlining region " + regionString);
1698       admin.getMaster().offline(regionName);
1699     } catch (IOException ioe) {
1700       String notFoundMsg = "java.lang.NoSuchMethodException: " +
1701         "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1702       if (ioe.getMessage().contains(notFoundMsg)) {
1703         LOG.warn("Using unassign region " + regionString
1704             + " instead of using offline method, you should"
1705             + " restart HMaster after these repairs");
1706         rsSupportsOffline = false; // in the future just use unassign
1707         admin.unassign(regionName, true);
1708         return;
1709       }
1710       throw ioe;
1711     }
1712   }
1713 
1714   private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1715     for (OnlineEntry rse : hi.deployedEntries) {
1716       LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
1717       try {
1718         HBaseFsckRepair.closeRegionSilentlyAndWait(admin, rse.hsa, rse.hri);
1719         offline(rse.hri.getRegionName());
1720       } catch (IOException ioe) {
1721         LOG.warn("Got exception when attempting to offline region "
1722             + Bytes.toString(rse.hri.getRegionName()), ioe);
1723       }
1724     }
1725   }
1726 
1727   /**
1728    * Attempts to undeploy a region from a region server based in information in
1729    * META.  Any operations that modify the file system should make sure that
1730    * its corresponding region is not deployed to prevent data races.
1731    *
1732    * A separate call is required to update the master in-memory region state
1733    * kept in the AssignementManager.  Because disable uses this state instead of
1734    * that found in META, we can't seem to cleanly disable/delete tables that
1735    * have been hbck fixed.  When used on a version of HBase that does not have
1736    * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master
1737    * restart or failover may be required.
1738    */
1739   @SuppressWarnings("deprecation")
1740   private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
1741     if (hi.metaEntry == null && hi.hdfsEntry == null) {
1742       undeployRegions(hi);
1743       return;
1744     }
1745 
1746     // get assignment info and hregioninfo from meta.
1747     Get get = new Get(hi.getRegionName());
1748     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1749     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1750     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1751     Result r = meta.get(get);
1752     byte[] value = r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1753     byte[] startcodeBytes = r.getValue(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1754     if (value == null || startcodeBytes == null) {
1755       errors.reportError("Unable to close region "
1756           + hi.getRegionNameAsString() +  " because meta does not "
1757           + "have handle to reach it.");
1758       return;
1759     }
1760     long startcode = Bytes.toLong(startcodeBytes);
1761 
1762     ServerName hsa = new ServerName(Bytes.toString(value), startcode);
1763     byte[] hriVal = r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1764     HRegionInfo hri= Writables.getHRegionInfoOrNull(hriVal);
1765     if (hri == null) {
1766       LOG.warn("Unable to close region " + hi.getRegionNameAsString()
1767           + " because META had invalid or missing "
1768           + HConstants.CATALOG_FAMILY_STR + ":"
1769           + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
1770           + " qualifier value.");
1771       return;
1772     }
1773 
1774     // close the region -- close files and remove assignment
1775     HBaseFsckRepair.closeRegionSilentlyAndWait(admin, hsa, hri);
1776   }
1777 
1778   private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1779     KeeperException, InterruptedException {
1780     // If we are trying to fix the errors
1781     if (shouldFixAssignments()) {
1782       errors.print(msg);
1783       undeployRegions(hbi);
1784       setShouldRerun();
1785       HRegionInfo hri = hbi.getHdfsHRI();
1786       if (hri == null) {
1787         hri = hbi.metaEntry;
1788       }
1789       HBaseFsckRepair.fixUnassigned(admin, hri);
1790       HBaseFsckRepair.waitUntilAssigned(admin, hri);
1791     }
1792   }
1793 
1794   /**
1795    * Check a single region for consistency and correct deployment.
1796    */
1797   private void checkRegionConsistency(final String key, final HbckInfo hbi)
1798   throws IOException, KeeperException, InterruptedException {
1799     String descriptiveName = hbi.toString();
1800 
1801     boolean inMeta = hbi.metaEntry != null;
1802     // In case not checking HDFS, assume the region is on HDFS
1803     boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
1804     boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
1805     boolean isDeployed = !hbi.deployedOn.isEmpty();
1806     boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
1807     boolean deploymentMatchesMeta =
1808       hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
1809       hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
1810     boolean splitParent =
1811       (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
1812     boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
1813     boolean recentlyModified = inHdfs &&
1814       hbi.getModTime() + timelag > System.currentTimeMillis();
1815 
1816     // ========== First the healthy cases =============
1817     if (hbi.containsOnlyHdfsEdits()) {
1818       return;
1819     }
1820     if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
1821       return;
1822     } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
1823       LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
1824         "tabled that is not deployed");
1825       return;
1826     } else if (recentlyModified) {
1827       LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
1828       return;
1829     }
1830     // ========== Cases where the region is not in META =============
1831     else if (!inMeta && !inHdfs && !isDeployed) {
1832       // We shouldn't have record of this region at all then!
1833       assert false : "Entry for region with no data";
1834     } else if (!inMeta && !inHdfs && isDeployed) {
1835       errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
1836           + descriptiveName + ", key=" + key + ", not on HDFS or in META but " +
1837           "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1838       if (shouldFixAssignments()) {
1839         undeployRegions(hbi);
1840       }
1841 
1842     } else if (!inMeta && inHdfs && !isDeployed) {
1843       errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
1844           + descriptiveName + " on HDFS, but not listed in META " +
1845           "or deployed on any region server");
1846       // restore region consistency of an adopted orphan
1847       if (shouldFixMeta()) {
1848         if (!hbi.isHdfsRegioninfoPresent()) {
1849           LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
1850               +  " in table integrity repair phase if -fixHdfsOrphans was" +
1851               " used.");
1852           return;
1853         }
1854 
1855         LOG.info("Patching .META. with .regioninfo: " + hbi.getHdfsHRI());
1856         HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1857 
1858         tryAssignmentRepair(hbi, "Trying to reassign region...");
1859       }
1860 
1861     } else if (!inMeta && inHdfs && isDeployed) {
1862       errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
1863           + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1864       debugLsr(hbi.getHdfsRegionDir());
1865       if (shouldFixMeta()) {
1866         if (!hbi.isHdfsRegioninfoPresent()) {
1867           LOG.error("This should have been repaired in table integrity repair phase");
1868           return;
1869         }
1870 
1871         LOG.info("Patching .META. with with .regioninfo: " + hbi.getHdfsHRI());
1872         HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1873 
1874         tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1875       }
1876 
1877     // ========== Cases where the region is in META =============
1878     } else if (inMeta && inHdfs && !isDeployed && splitParent) {
1879       // check whether this is an actual error, or just transient state where parent
1880       // is not cleaned
1881       if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
1882         // check that split daughters are there
1883         HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
1884         HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
1885         if (infoA != null && infoB != null) {
1886           // we already processed or will process daughters. Move on, nothing to see here.
1887           hbi.setSkipChecks(true);
1888           return;
1889         }
1890       }
1891       errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
1892           + descriptiveName + " is a split parent in META, in HDFS, "
1893           + "and not deployed on any region server. This could be transient.");
1894       if (shouldFixSplitParents()) {
1895         setShouldRerun();
1896         resetSplitParent(hbi);
1897       }
1898     } else if (inMeta && !inHdfs && !isDeployed) {
1899       errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
1900           + descriptiveName + " found in META, but not in HDFS "
1901           + "or deployed on any region server.");
1902       if (shouldFixMeta()) {
1903         deleteMetaRegion(hbi);
1904       }
1905     } else if (inMeta && !inHdfs && isDeployed) {
1906       errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
1907           + " found in META, but not in HDFS, " +
1908           "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1909       // We treat HDFS as ground truth.  Any information in meta is transient
1910       // and equivalent data can be regenerated.  So, lets unassign and remove
1911       // these problems from META.
1912       if (shouldFixAssignments()) {
1913         errors.print("Trying to fix unassigned region...");
1914         closeRegion(hbi);// Close region will cause RS to abort.
1915       }
1916       if (shouldFixMeta()) {
1917         // wait for it to complete
1918         deleteMetaRegion(hbi);
1919       }
1920     } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
1921       errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
1922           + " not deployed on any region server.");
1923       tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1924     } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
1925       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
1926           "Region " + descriptiveName + " should not be deployed according " +
1927           "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1928       if (shouldFixAssignments()) {
1929         errors.print("Trying to close the region " + descriptiveName);
1930         setShouldRerun();
1931         HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1932       }
1933     } else if (inMeta && inHdfs && isMultiplyDeployed) {
1934       errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
1935           + " is listed in META on region server " + hbi.metaEntry.regionServer
1936           + " but is multiply assigned to region servers " +
1937           Joiner.on(", ").join(hbi.deployedOn));
1938       // If we are trying to fix the errors
1939       if (shouldFixAssignments()) {
1940         errors.print("Trying to fix assignment error...");
1941         setShouldRerun();
1942         HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1943       }
1944     } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
1945       errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
1946           + descriptiveName + " listed in META on region server " +
1947           hbi.metaEntry.regionServer + " but found on region server " +
1948           hbi.deployedOn.get(0));
1949       // If we are trying to fix the errors
1950       if (shouldFixAssignments()) {
1951         errors.print("Trying to fix assignment error...");
1952         setShouldRerun();
1953         HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1954         HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
1955       }
1956     } else {
1957       errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
1958           " is in an unforeseen state:" +
1959           " inMeta=" + inMeta +
1960           " inHdfs=" + inHdfs +
1961           " isDeployed=" + isDeployed +
1962           " isMultiplyDeployed=" + isMultiplyDeployed +
1963           " deploymentMatchesMeta=" + deploymentMatchesMeta +
1964           " shouldBeDeployed=" + shouldBeDeployed);
1965     }
1966   }
1967 
1968   /**
1969    * Checks tables integrity. Goes over all regions and scans the tables.
1970    * Collects all the pieces for each table and checks if there are missing,
1971    * repeated or overlapping ones.
1972    * @throws IOException
1973    */
1974   SortedMap<String, TableInfo> checkIntegrity() throws IOException {
1975     tablesInfo = new TreeMap<String,TableInfo> ();
1976     List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
1977     LOG.debug("There are " + regionInfoMap.size() + " region info entries");
1978     for (HbckInfo hbi : regionInfoMap.values()) {
1979       // Check only valid, working regions
1980       if (hbi.metaEntry == null) {
1981         // this assumes that consistency check has run loadMetaEntry
1982         noHDFSRegionInfos.add(hbi);
1983         Path p = hbi.getHdfsRegionDir();
1984         if (p == null) {
1985           errors.report("No regioninfo in Meta or HDFS. " + hbi);
1986         }
1987 
1988         // TODO test.
1989         continue;
1990       }
1991       if (hbi.metaEntry.regionServer == null) {
1992         errors.detail("Skipping region because no region server: " + hbi);
1993         continue;
1994       }
1995       if (hbi.metaEntry.isOffline()) {
1996         errors.detail("Skipping region because it is offline: " + hbi);
1997         continue;
1998       }
1999       if (hbi.containsOnlyHdfsEdits()) {
2000         errors.detail("Skipping region because it only contains edits" + hbi);
2001         continue;
2002       }
2003 
2004       // Missing regionDir or over-deployment is checked elsewhere. Include
2005       // these cases in modTInfo, so we can evaluate those regions as part of
2006       // the region chain in META
2007       //if (hbi.foundRegionDir == null) continue;
2008       //if (hbi.deployedOn.size() != 1) continue;
2009       if (hbi.deployedOn.size() == 0) continue;
2010 
2011       // We should be safe here
2012       String tableName = hbi.metaEntry.getTableNameAsString();
2013       TableInfo modTInfo = tablesInfo.get(tableName);
2014       if (modTInfo == null) {
2015         modTInfo = new TableInfo(tableName);
2016       }
2017       for (ServerName server : hbi.deployedOn) {
2018         modTInfo.addServer(server);
2019       }
2020 
2021       if (!hbi.isSkipChecks()) {
2022         modTInfo.addRegionInfo(hbi);
2023       }
2024 
2025       tablesInfo.put(tableName, modTInfo);
2026     }
2027 
2028     for (TableInfo tInfo : tablesInfo.values()) {
2029       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2030       if (!tInfo.checkRegionChain(handler)) {
2031         errors.report("Found inconsistency in table " + tInfo.getName());
2032       }
2033     }
2034     return tablesInfo;
2035   }
2036 
2037   /**
2038    * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2039    * @return number of file move fixes done to merge regions.
2040    */
2041   public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2042     int fileMoves = 0;
2043     String thread = Thread.currentThread().getName();
2044     LOG.debug("[" + thread + "] Contained region dir after close and pause");
2045     debugLsr(contained.getHdfsRegionDir());
2046 
2047     // rename the contained into the container.
2048     FileSystem fs = targetRegionDir.getFileSystem(getConf());
2049     FileStatus[] dirs = null;
2050     try { 
2051       dirs = fs.listStatus(contained.getHdfsRegionDir());
2052     } catch (FileNotFoundException fnfe) {
2053       // region we are attempting to merge in is not present!  Since this is a merge, there is
2054       // no harm skipping this region if it does not exist.
2055       if (!fs.exists(contained.getHdfsRegionDir())) {
2056         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir() 
2057             + " is missing. Assuming already sidelined or moved.");
2058       } else {
2059         sidelineRegionDir(fs, contained);
2060       }
2061       return fileMoves;
2062     }
2063 
2064     if (dirs == null) {
2065       if (!fs.exists(contained.getHdfsRegionDir())) {
2066         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir() 
2067             + " already sidelined.");
2068       } else {
2069         sidelineRegionDir(fs, contained);
2070       }
2071       return fileMoves;
2072     }
2073 
2074     for (FileStatus cf : dirs) {
2075       Path src = cf.getPath();
2076       Path dst =  new Path(targetRegionDir, src.getName());
2077 
2078       if (src.getName().equals(HRegion.REGIONINFO_FILE)) {
2079         // do not copy the old .regioninfo file.
2080         continue;
2081       }
2082 
2083       if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2084         // do not copy the .oldlogs files
2085         continue;
2086       }
2087 
2088       LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2089       // FileSystem.rename is inconsistent with directories -- if the
2090       // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2091       // it moves the src into the dst dir resulting in (foo/a/b).  If
2092       // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2093       for (FileStatus hfile : fs.listStatus(src)) {
2094         boolean success = fs.rename(hfile.getPath(), dst);
2095         if (success) {
2096           fileMoves++;
2097         }
2098       }
2099       LOG.debug("[" + thread + "] Sideline directory contents:");
2100       debugLsr(targetRegionDir);
2101     }
2102 
2103     // if all success.
2104     sidelineRegionDir(fs, contained);
2105     LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2106         getSidelineDir());
2107     debugLsr(contained.getHdfsRegionDir());
2108 
2109     return fileMoves;
2110   }
2111 
2112 
2113   static class WorkItemOverlapMerge implements Callable<Void> {
2114     private TableIntegrityErrorHandler handler;
2115     Collection<HbckInfo> overlapgroup;
2116     
2117     WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2118       this.handler = handler;
2119       this.overlapgroup = overlapgroup;
2120     }
2121     
2122     @Override
2123     public Void call() throws Exception {
2124       handler.handleOverlapGroup(overlapgroup);
2125       return null;
2126     }
2127   };
2128   
2129   
2130   /**
2131    * Maintain information about a particular table.
2132    */
2133   public class TableInfo {
2134     String tableName;
2135     TreeSet <ServerName> deployedOn;
2136 
2137     // backwards regions
2138     final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2139 
2140     // sidelined big overlapped regions
2141     final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2142 
2143     // region split calculator
2144     final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2145 
2146     // Histogram of different HTableDescriptors found.  Ideally there is only one!
2147     final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2148 
2149     // key = start split, values = set of splits in problem group
2150     final Multimap<byte[], HbckInfo> overlapGroups =
2151       TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2152 
2153     TableInfo(String name) {
2154       this.tableName = name;
2155       deployedOn = new TreeSet <ServerName>();
2156     }
2157 
2158     /**
2159      * @return descriptor common to all regions.  null if are none or multiple!
2160      */
2161     private HTableDescriptor getHTD() {
2162       if (htds.size() == 1) {
2163         return (HTableDescriptor)htds.toArray()[0];
2164       } else {
2165         LOG.error("None/Multiple table descriptors found for table '"
2166           + tableName + "' regions: " + htds);
2167       }
2168       return null;
2169     }
2170 
2171     public void addRegionInfo(HbckInfo hir) {
2172       if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2173         // end key is absolute end key, just add it.
2174         sc.add(hir);
2175         return;
2176       }
2177 
2178       // if not the absolute end key, check for cycle 
2179       if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2180         errors.reportError(
2181             ERROR_CODE.REGION_CYCLE,
2182             String.format("The endkey for this region comes before the "
2183                 + "startkey, startkey=%s, endkey=%s",
2184                 Bytes.toStringBinary(hir.getStartKey()),
2185                 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2186         backwards.add(hir);
2187         return;
2188       }
2189 
2190       // main case, add to split calculator
2191       sc.add(hir);
2192     }
2193 
2194     public void addServer(ServerName server) {
2195       this.deployedOn.add(server);
2196     }
2197 
2198     public String getName() {
2199       return tableName;
2200     }
2201 
2202     public int getNumRegions() {
2203       return sc.getStarts().size() + backwards.size();
2204     }
2205 
2206     private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2207       ErrorReporter errors;
2208 
2209       IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2210         this.errors = errors;
2211         setTableInfo(ti);
2212       }
2213 
2214       @Override
2215       public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2216         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2217             "First region should start with an empty key.  You need to "
2218             + " create a new region and regioninfo in HDFS to plug the hole.",
2219             getTableInfo(), hi);
2220       }
2221       
2222       @Override
2223       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2224         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2225             "Last region should end with an empty key. You need to "
2226                 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2227       }
2228 
2229       @Override
2230       public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2231         errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2232             "Region has the same start and end key.", getTableInfo(), hi);
2233       }
2234 
2235       @Override
2236       public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2237         byte[] key = r1.getStartKey();
2238         // dup start key
2239         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2240             "Multiple regions have the same startkey: "
2241             + Bytes.toStringBinary(key), getTableInfo(), r1);
2242         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2243             "Multiple regions have the same startkey: "
2244             + Bytes.toStringBinary(key), getTableInfo(), r2);
2245       }
2246 
2247       @Override
2248       public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2249         errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2250             "There is an overlap in the region chain.",
2251             getTableInfo(), hi1, hi2);
2252       }
2253 
2254       @Override
2255       public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2256         errors.reportError(
2257             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2258             "There is a hole in the region chain between "
2259                 + Bytes.toStringBinary(holeStart) + " and "
2260                 + Bytes.toStringBinary(holeStop)
2261                 + ".  You need to create a new .regioninfo and region "
2262                 + "dir in hdfs to plug the hole.");
2263       }
2264     };
2265 
2266     /**
2267      * This handler fixes integrity errors from hdfs information.  There are
2268      * basically three classes of integrity problems 1) holes, 2) overlaps, and
2269      * 3) invalid regions.
2270      *
2271      * This class overrides methods that fix holes and the overlap group case.
2272      * Individual cases of particular overlaps are handled by the general
2273      * overlap group merge repair case.
2274      *
2275      * If hbase is online, this forces regions offline before doing merge
2276      * operations.
2277      */
2278     private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2279       Configuration conf;
2280 
2281       boolean fixOverlaps = true;
2282 
2283       HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2284           boolean fixHoles, boolean fixOverlaps) {
2285         super(ti, errors);
2286         this.conf = conf;
2287         this.fixOverlaps = fixOverlaps;
2288         // TODO properly use fixHoles
2289       }
2290 
2291       /**
2292        * This is a special case hole -- when the first region of a table is
2293        * missing from META, HBase doesn't acknowledge the existance of the
2294        * table.
2295        */
2296       public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2297         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2298             "First region should start with an empty key.  Creating a new " +
2299             "region and regioninfo in HDFS to plug the hole.",
2300             getTableInfo(), next);
2301         HTableDescriptor htd = getTableInfo().getHTD();
2302         // from special EMPTY_START_ROW to next region's startKey
2303         HRegionInfo newRegion = new HRegionInfo(htd.getName(),
2304             HConstants.EMPTY_START_ROW, next.getStartKey());
2305 
2306         // TODO test
2307         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2308         LOG.info("Table region start key was not empty.  Created new empty region: "
2309             + newRegion + " " +region);
2310         fixes++;
2311       }
2312 
2313       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2314         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2315             "Last region should end with an empty key. Creating a new "
2316                 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2317         HTableDescriptor htd = getTableInfo().getHTD();
2318         // from curEndKey to EMPTY_START_ROW
2319         HRegionInfo newRegion = new HRegionInfo(htd.getName(), curEndKey,
2320             HConstants.EMPTY_START_ROW);
2321 
2322         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2323         LOG.info("Table region end key was not empty. Created new empty region: " + newRegion
2324             + " " + region);
2325         fixes++;
2326       }
2327       
2328       /**
2329        * There is a hole in the hdfs regions that violates the table integrity
2330        * rules.  Create a new empty region that patches the hole.
2331        */
2332       public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2333         errors.reportError(
2334             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2335             "There is a hole in the region chain between "
2336                 + Bytes.toStringBinary(holeStartKey) + " and "
2337                 + Bytes.toStringBinary(holeStopKey)
2338                 + ".  Creating a new regioninfo and region "
2339                 + "dir in hdfs to plug the hole.");
2340         HTableDescriptor htd = getTableInfo().getHTD();
2341         HRegionInfo newRegion = new HRegionInfo(htd.getName(), holeStartKey, holeStopKey);
2342         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2343         LOG.info("Plugged hold by creating new empty region: "+ newRegion + " " +region);
2344         fixes++;
2345       }
2346 
2347       /**
2348        * This takes set of overlapping regions and merges them into a single
2349        * region.  This covers cases like degenerate regions, shared start key,
2350        * general overlaps, duplicate ranges, and partial overlapping regions.
2351        *
2352        * Cases:
2353        * - Clean regions that overlap
2354        * - Only .oldlogs regions (can't find start/stop range, or figure out)
2355        * 
2356        * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
2357        */
2358       @Override
2359       public void handleOverlapGroup(Collection<HbckInfo> overlap)
2360           throws IOException {
2361         Preconditions.checkNotNull(overlap);
2362         Preconditions.checkArgument(overlap.size() >0);
2363 
2364         if (!this.fixOverlaps) {
2365           LOG.warn("Not attempting to repair overlaps.");
2366           return;
2367         }
2368 
2369         if (overlap.size() > maxMerge) {
2370           LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2371             "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2372           if (sidelineBigOverlaps) {
2373             // we only sideline big overlapped groups that exceeds the max number of regions to merge
2374             sidelineBigOverlaps(overlap);
2375           }
2376           return;
2377         }
2378 
2379         mergeOverlaps(overlap);
2380       }
2381 
2382       void mergeOverlaps(Collection<HbckInfo> overlap)
2383           throws IOException {
2384         String thread = Thread.currentThread().getName();
2385         LOG.info("== [" + thread + "] Merging regions into one region: "
2386           + Joiner.on(",").join(overlap));
2387         // get the min / max range and close all concerned regions
2388         Pair<byte[], byte[]> range = null;
2389         for (HbckInfo hi : overlap) {
2390           if (range == null) {
2391             range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2392           } else {
2393             if (RegionSplitCalculator.BYTES_COMPARATOR
2394                 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2395               range.setFirst(hi.getStartKey());
2396             }
2397             if (RegionSplitCalculator.BYTES_COMPARATOR
2398                 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2399               range.setSecond(hi.getEndKey());
2400             }
2401           }
2402           // need to close files so delete can happen.
2403           LOG.debug("[" + thread + "] Closing region before moving data around: " +  hi);
2404           LOG.debug("[" + thread + "] Contained region dir before close");
2405           debugLsr(hi.getHdfsRegionDir());
2406           try {
2407             LOG.info("[" + thread + "] Closing region: " + hi);
2408             closeRegion(hi);
2409           } catch (IOException ioe) {
2410             LOG.warn("[" + thread + "] Was unable to close region " + hi
2411               + ".  Just continuing... ", ioe);
2412           } catch (InterruptedException e) {
2413             LOG.warn("[" + thread + "] Was unable to close region " + hi
2414               + ".  Just continuing... ", e);
2415           }
2416 
2417           try {
2418             LOG.info("[" + thread + "] Offlining region: " + hi);
2419             offline(hi.getRegionName());
2420           } catch (IOException ioe) {
2421             LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2422               + ".  Just continuing... ", ioe);
2423           }
2424         }
2425 
2426         // create new empty container region.
2427         HTableDescriptor htd = getTableInfo().getHTD();
2428         // from start key to end Key
2429         HRegionInfo newRegion = new HRegionInfo(htd.getName(), range.getFirst(),
2430             range.getSecond());
2431         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2432         LOG.info("[" + thread + "] Created new empty container region: " +
2433             newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2434         debugLsr(region.getRegionDir());
2435 
2436         // all target regions are closed, should be able to safely cleanup.
2437         boolean didFix= false;
2438         Path target = region.getRegionDir();
2439         for (HbckInfo contained : overlap) {
2440           LOG.info("[" + thread + "] Merging " + contained  + " into " + target );
2441           int merges = mergeRegionDirs(target, contained);
2442           if (merges > 0) {
2443             didFix = true;
2444           }
2445         }
2446         if (didFix) {
2447           fixes++;
2448         }
2449       }
2450 
2451       /**
2452        * Sideline some regions in a big overlap group so that it
2453        * will have fewer regions, and it is easier to merge them later on.
2454        *
2455        * @param bigOverlap the overlapped group with regions more than maxMerge
2456        * @throws IOException
2457        */
2458       void sidelineBigOverlaps(
2459           Collection<HbckInfo> bigOverlap) throws IOException {
2460         int overlapsToSideline = bigOverlap.size() - maxMerge;
2461         if (overlapsToSideline > maxOverlapsToSideline) {
2462           overlapsToSideline = maxOverlapsToSideline;
2463         }
2464         List<HbckInfo> regionsToSideline =
2465           RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2466         FileSystem fs = FileSystem.get(conf);
2467         for (HbckInfo regionToSideline: regionsToSideline) {
2468           try {
2469             LOG.info("Closing region: " + regionToSideline);
2470             closeRegion(regionToSideline);
2471           } catch (IOException ioe) {
2472             LOG.warn("Was unable to close region " + regionToSideline
2473               + ".  Just continuing... ", ioe);
2474           } catch (InterruptedException e) {
2475             LOG.warn("Was unable to close region " + regionToSideline
2476               + ".  Just continuing... ", e);
2477           }
2478 
2479           try {
2480             LOG.info("Offlining region: " + regionToSideline);
2481             offline(regionToSideline.getRegionName());
2482           } catch (IOException ioe) {
2483             LOG.warn("Unable to offline region from master: " + regionToSideline
2484               + ".  Just continuing... ", ioe);
2485           }
2486 
2487           LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2488           Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2489           if (sidelineRegionDir != null) {
2490             sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2491             LOG.info("After sidelined big overlapped region: "
2492               + regionToSideline.getRegionNameAsString()
2493               + " to " + sidelineRegionDir.toString());
2494             fixes++;
2495           }
2496         }
2497       }
2498     }
2499 
2500     /**
2501      * Check the region chain (from META) of this table.  We are looking for
2502      * holes, overlaps, and cycles.
2503      * @return false if there are errors
2504      * @throws IOException
2505      */
2506     public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2507       // When table is disabled no need to check for the region chain. Some of the regions
2508       // accidently if deployed, this below code might report some issues like missing start
2509       // or end regions or region hole in chain and may try to fix which is unwanted.
2510       if (disabledTables.contains(this.tableName.getBytes())) {
2511         return true;
2512       }
2513       int originalErrorsCount = errors.getErrorList().size();
2514       Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2515       SortedSet<byte[]> splits = sc.getSplits();
2516 
2517       byte[] prevKey = null;
2518       byte[] problemKey = null;
2519       for (byte[] key : splits) {
2520         Collection<HbckInfo> ranges = regions.get(key);
2521         if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2522           for (HbckInfo rng : ranges) {
2523             handler.handleRegionStartKeyNotEmpty(rng);
2524           }
2525         }
2526 
2527         // check for degenerate ranges
2528         for (HbckInfo rng : ranges) {
2529           // special endkey case converts '' to null
2530           byte[] endKey = rng.getEndKey();
2531           endKey = (endKey.length == 0) ? null : endKey;
2532           if (Bytes.equals(rng.getStartKey(),endKey)) {
2533             handler.handleDegenerateRegion(rng);
2534           }
2535         }
2536 
2537         if (ranges.size() == 1) {
2538           // this split key is ok -- no overlap, not a hole.
2539           if (problemKey != null) {
2540             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2541           }
2542           problemKey = null; // fell through, no more problem.
2543         } else if (ranges.size() > 1) {
2544           // set the new problem key group name, if already have problem key, just
2545           // keep using it.
2546           if (problemKey == null) {
2547             // only for overlap regions.
2548             LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2549             problemKey = key;
2550           }
2551           overlapGroups.putAll(problemKey, ranges);
2552 
2553           // record errors
2554           ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
2555           //  this dumb and n^2 but this shouldn't happen often
2556           for (HbckInfo r1 : ranges) {
2557             subRange.remove(r1);
2558             for (HbckInfo r2 : subRange) {
2559               if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
2560                 handler.handleDuplicateStartKeys(r1,r2);
2561               } else {
2562                 // overlap
2563                 handler.handleOverlapInRegionChain(r1, r2);
2564               }
2565             }
2566           }
2567 
2568         } else if (ranges.size() == 0) {
2569           if (problemKey != null) {
2570             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2571           }
2572           problemKey = null;
2573 
2574           byte[] holeStopKey = sc.getSplits().higher(key);
2575           // if higher key is null we reached the top.
2576           if (holeStopKey != null) {
2577             // hole
2578             handler.handleHoleInRegionChain(key, holeStopKey);
2579           }
2580         }
2581         prevKey = key;
2582       }
2583 
2584       // When the last region of a table is proper and having an empty end key, 'prevKey'
2585       // will be null.
2586       if (prevKey != null) {
2587         handler.handleRegionEndKeyNotEmpty(prevKey);
2588       }
2589 
2590       // TODO fold this into the TableIntegrityHandler
2591       if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
2592         LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
2593             " false to run serially.");
2594         boolean ok = handleOverlapsParallel(handler, prevKey);
2595         if (!ok) {
2596           return false;
2597         }
2598       } else {
2599         LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
2600             " true to run in parallel.");
2601         for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2602           handler.handleOverlapGroup(overlap);
2603         }
2604       }
2605 
2606       if (details) {
2607         // do full region split map dump
2608         errors.print("---- Table '"  +  this.tableName
2609             + "': region split map");
2610         dump(splits, regions);
2611         errors.print("---- Table '"  +  this.tableName
2612             + "': overlap groups");
2613         dumpOverlapProblems(overlapGroups);
2614         errors.print("There are " + overlapGroups.keySet().size()
2615             + " overlap groups with " + overlapGroups.size()
2616             + " overlapping regions");
2617       }
2618       if (!sidelinedRegions.isEmpty()) {
2619         LOG.warn("Sidelined big overlapped regions, please bulk load them!");
2620         errors.print("---- Table '"  +  this.tableName
2621             + "': sidelined big overlapped regions");
2622         dumpSidelinedRegions(sidelinedRegions);
2623       }
2624       return errors.getErrorList().size() == originalErrorsCount;
2625     }
2626 
2627     private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
2628         throws IOException {
2629       // we parallelize overlap handler for the case we have lots of groups to fix.  We can
2630       // safely assume each group is independent. 
2631       List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
2632       List<Future<Void>> rets;
2633       for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2634         // 
2635         merges.add(new WorkItemOverlapMerge(overlap, handler));
2636       }
2637       try {
2638         rets = executor.invokeAll(merges);
2639       } catch (InterruptedException e) {
2640         e.printStackTrace();
2641         LOG.error("Overlap merges were interrupted", e);
2642         return false;
2643       }
2644       for(int i=0; i<merges.size(); i++) {
2645         WorkItemOverlapMerge work = merges.get(i);
2646         Future<Void> f = rets.get(i);
2647         try {
2648           f.get();
2649         } catch(ExecutionException e) {
2650           LOG.warn("Failed to merge overlap group" + work, e.getCause());
2651         } catch (InterruptedException e) {
2652           LOG.error("Waiting for overlap merges was interrupted", e);
2653           return false;
2654         }
2655       }
2656       return true;
2657     }
2658 
2659     /**
2660      * This dumps data in a visually reasonable way for visual debugging
2661      * 
2662      * @param splits
2663      * @param regions
2664      */
2665     void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
2666       // we display this way because the last end key should be displayed as well.
2667       StringBuilder sb = new StringBuilder();
2668       for (byte[] k : splits) {
2669         sb.setLength(0); // clear out existing buffer, if any.
2670         sb.append(Bytes.toStringBinary(k) + ":\t");
2671         for (HbckInfo r : regions.get(k)) {
2672           sb.append("[ "+ r.toString() + ", "
2673               + Bytes.toStringBinary(r.getEndKey())+ "]\t");
2674         }
2675         errors.print(sb.toString());
2676       }
2677     }
2678   }
2679 
2680   public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
2681     // we display this way because the last end key should be displayed as
2682     // well.
2683     for (byte[] k : regions.keySet()) {
2684       errors.print(Bytes.toStringBinary(k) + ":");
2685       for (HbckInfo r : regions.get(k)) {
2686         errors.print("[ " + r.toString() + ", "
2687             + Bytes.toStringBinary(r.getEndKey()) + "]");
2688       }
2689       errors.print("----");
2690     }
2691   }
2692 
2693   public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
2694     for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
2695       String tableName = Bytes.toStringBinary(entry.getValue().getTableName());
2696       Path path = entry.getKey();
2697       errors.print("This sidelined region dir should be bulk loaded: "
2698         + path.toString());
2699       errors.print("Bulk load command looks like: "
2700         + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
2701         + path.toUri().getPath() + " "+ tableName);
2702     }
2703   }
2704 
2705   public Multimap<byte[], HbckInfo> getOverlapGroups(
2706       String table) {
2707     TableInfo ti = tablesInfo.get(table);
2708     return ti.overlapGroups;
2709   }
2710 
2711   /**
2712    * Return a list of user-space table names whose metadata have not been
2713    * modified in the last few milliseconds specified by timelag
2714    * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
2715    * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
2716    * milliseconds specified by timelag, then the table is a candidate to be returned.
2717    * @return tables that have not been modified recently
2718    * @throws IOException if an error is encountered
2719    */
2720    HTableDescriptor[] getTables(AtomicInteger numSkipped) {
2721     List<String> tableNames = new ArrayList<String>();
2722     long now = System.currentTimeMillis();
2723 
2724     for (HbckInfo hbi : regionInfoMap.values()) {
2725       MetaEntry info = hbi.metaEntry;
2726 
2727       // if the start key is zero, then we have found the first region of a table.
2728       // pick only those tables that were not modified in the last few milliseconds.
2729       if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2730         if (info.modTime + timelag < now) {
2731           tableNames.add(info.getTableNameAsString());
2732         } else {
2733           numSkipped.incrementAndGet(); // one more in-flux table
2734         }
2735       }
2736     }
2737     return getHTableDescriptors(tableNames);
2738   }
2739 
2740    HTableDescriptor[] getHTableDescriptors(List<String> tableNames) {
2741     HTableDescriptor[] htd = new HTableDescriptor[0];
2742      try {
2743        LOG.info("getHTableDescriptors == tableNames => " + tableNames);
2744        htd = new HBaseAdmin(getConf()).getTableDescriptors(tableNames);
2745      } catch (IOException e) {
2746        LOG.debug("Exception getting table descriptors", e);
2747      }
2748      return htd;
2749   }
2750 
2751 
2752   /**
2753    * Gets the entry in regionInfo corresponding to the the given encoded
2754    * region name. If the region has not been seen yet, a new entry is added
2755    * and returned.
2756    */
2757   private synchronized HbckInfo getOrCreateInfo(String name) {
2758     HbckInfo hbi = regionInfoMap.get(name);
2759     if (hbi == null) {
2760       hbi = new HbckInfo(null);
2761       regionInfoMap.put(name, hbi);
2762     }
2763     return hbi;
2764   }
2765 
2766   /**
2767     * Check values in regionInfo for .META.
2768     * Check if zero or more than one regions with META are found.
2769     * If there are inconsistencies (i.e. zero or more than one regions
2770     * pretend to be holding the .META.) try to fix that and report an error.
2771     * @throws IOException from HBaseFsckRepair functions
2772    * @throws KeeperException
2773    * @throws InterruptedException
2774     */
2775   boolean checkMetaRegion()
2776     throws IOException, KeeperException, InterruptedException {
2777     List <HbckInfo> metaRegions = Lists.newArrayList();
2778     for (HbckInfo value : regionInfoMap.values()) {
2779       if (value.metaEntry.isMetaRegion()) {
2780         metaRegions.add(value);
2781       }
2782     }
2783 
2784     // If something is wrong
2785     if (metaRegions.size() != 1) {
2786       HRegionLocation rootLocation = connection.locateRegion(
2787         HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
2788       HbckInfo root =
2789           regionInfoMap.get(rootLocation.getRegionInfo().getEncodedName());
2790 
2791       // If there is no region holding .META.
2792       if (metaRegions.size() == 0) {
2793         errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
2794         if (shouldFixAssignments()) {
2795           errors.print("Trying to fix a problem with .META...");
2796           setShouldRerun();
2797           // try to fix it (treat it as unassigned region)
2798           HBaseFsckRepair.fixUnassigned(admin, root.metaEntry);
2799           HBaseFsckRepair.waitUntilAssigned(admin, root.getHdfsHRI());
2800         }
2801       }
2802       // If there are more than one regions pretending to hold the .META.
2803       else if (metaRegions.size() > 1) {
2804         errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
2805         if (shouldFixAssignments()) {
2806           errors.print("Trying to fix a problem with .META...");
2807           setShouldRerun();
2808           // try fix it (treat is a dupe assignment)
2809           List <ServerName> deployedOn = Lists.newArrayList();
2810           for (HbckInfo mRegion : metaRegions) {
2811             deployedOn.add(mRegion.metaEntry.regionServer);
2812           }
2813           HBaseFsckRepair.fixMultiAssignment(admin, root.metaEntry, deployedOn);
2814         }
2815       }
2816       // rerun hbck with hopefully fixed META
2817       return false;
2818     }
2819     // no errors, so continue normally
2820     return true;
2821   }
2822 
2823   /**
2824    * Scan .META. and -ROOT-, adding all regions found to the regionInfo map.
2825    * @throws IOException if an error is encountered
2826    */
2827   boolean loadMetaEntries() throws IOException {
2828 
2829     // get a list of all regions from the master. This involves
2830     // scanning the META table
2831     if (!recordRootRegion()) {
2832       // Will remove later if we can fix it
2833       errors.reportError("Fatal error: unable to get root region location. Exiting...");
2834       return false;
2835     }
2836 
2837     MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
2838       int countRecord = 1;
2839 
2840       // comparator to sort KeyValues with latest modtime
2841       final Comparator<KeyValue> comp = new Comparator<KeyValue>() {
2842         public int compare(KeyValue k1, KeyValue k2) {
2843           return (int)(k1.getTimestamp() - k2.getTimestamp());
2844         }
2845       };
2846 
2847       public boolean processRow(Result result) throws IOException {
2848         try {
2849 
2850           // record the latest modification of this META record
2851           long ts =  Collections.max(result.list(), comp).getTimestamp();
2852           Pair<HRegionInfo, ServerName> pair = MetaReader.parseCatalogResult(result);
2853           if (pair == null || pair.getFirst() == null) {
2854             emptyRegionInfoQualifiers.add(result);
2855             return true;
2856           }
2857           ServerName sn = null;
2858           if (pair.getSecond() != null) {
2859             sn = pair.getSecond();
2860           }
2861           HRegionInfo hri = pair.getFirst();
2862           if (!(isTableIncluded(hri.getTableNameAsString())
2863               || hri.isMetaRegion() || hri.isRootRegion())) {
2864             return true;
2865           }
2866           PairOfSameType<HRegionInfo> daughters = MetaReader.getDaughterRegions(result);
2867           MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
2868           HbckInfo hbInfo = new HbckInfo(m);
2869           HbckInfo previous = regionInfoMap.put(hri.getEncodedName(), hbInfo);
2870           if (previous != null) {
2871             throw new IOException("Two entries in META are same " + previous);
2872           }
2873 
2874           // show proof of progress to the user, once for every 100 records.
2875           if (countRecord % 100 == 0) {
2876             errors.progress();
2877           }
2878           countRecord++;
2879           return true;
2880         } catch (RuntimeException e) {
2881           LOG.error("Result=" + result);
2882           throw e;
2883         }
2884       }
2885     };
2886 
2887     // Scan -ROOT- to pick up META regions
2888     MetaScanner.metaScan(getConf(), null, visitor, null, null,
2889       Integer.MAX_VALUE, HConstants.ROOT_TABLE_NAME);
2890 
2891     if (!checkMetaOnly) {
2892       // Scan .META. to pick up user regions
2893       MetaScanner.metaScan(getConf(), visitor);
2894     }
2895 
2896     errors.print("");
2897     return true;
2898   }
2899 
2900   /**
2901    * Stores the regioninfo entries scanned from META
2902    */
2903   static class MetaEntry extends HRegionInfo {
2904     ServerName regionServer;   // server hosting this region
2905     long modTime;          // timestamp of most recent modification metadata
2906     HRegionInfo splitA, splitB; //split daughters
2907 
2908     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
2909       this(rinfo, regionServer, modTime, null, null);
2910     }
2911 
2912     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
2913         HRegionInfo splitA, HRegionInfo splitB) {
2914       super(rinfo);
2915       this.regionServer = regionServer;
2916       this.modTime = modTime;
2917       this.splitA = splitA;
2918       this.splitB = splitB;
2919     }
2920 
2921     public boolean equals(Object o) {
2922       boolean superEq = super.equals(o);
2923       if (!superEq) {
2924         return superEq;
2925       }
2926 
2927       MetaEntry me = (MetaEntry) o;
2928       if (!regionServer.equals(me.regionServer)) {
2929         return false;
2930       }
2931       return (modTime == me.modTime);
2932     }
2933   }
2934 
2935   /**
2936    * Stores the regioninfo entries from HDFS
2937    */
2938   static class HdfsEntry {
2939     HRegionInfo hri;
2940     Path hdfsRegionDir = null;
2941     long hdfsRegionDirModTime  = 0;
2942     boolean hdfsRegioninfoFilePresent = false;
2943     boolean hdfsOnlyEdits = false;
2944   }
2945 
2946   /**
2947    * Stores the regioninfo retrieved from Online region servers.
2948    */
2949   static class OnlineEntry {
2950     HRegionInfo hri;
2951     ServerName hsa;
2952 
2953     public String toString() {
2954       return hsa.toString() + ";" + hri.getRegionNameAsString();
2955     }
2956   }
2957 
2958   /**
2959    * Maintain information about a particular region.  It gathers information
2960    * from three places -- HDFS, META, and region servers.
2961    */
2962   public static class HbckInfo implements KeyRange {
2963     private MetaEntry metaEntry = null; // info in META
2964     private HdfsEntry hdfsEntry = null; // info in HDFS
2965     private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
2966     private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
2967     private boolean skipChecks = false; // whether to skip further checks to this region info.
2968 
2969     HbckInfo(MetaEntry metaEntry) {
2970       this.metaEntry = metaEntry;
2971     }
2972 
2973     public synchronized void addServer(HRegionInfo hri, ServerName server) {
2974       OnlineEntry rse = new OnlineEntry() ;
2975       rse.hri = hri;
2976       rse.hsa = server;
2977       this.deployedEntries.add(rse);
2978       this.deployedOn.add(server);
2979     }
2980 
2981     public synchronized String toString() {
2982       StringBuilder sb = new StringBuilder();
2983       sb.append("{ meta => ");
2984       sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
2985       sb.append( ", hdfs => " + getHdfsRegionDir());
2986       sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
2987       sb.append(" }");
2988       return sb.toString();
2989     }
2990 
2991     @Override
2992     public byte[] getStartKey() {
2993       if (this.metaEntry != null) {
2994         return this.metaEntry.getStartKey();
2995       } else if (this.hdfsEntry != null) {
2996         return this.hdfsEntry.hri.getStartKey();
2997       } else {
2998         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
2999         return null;
3000       }
3001     }
3002 
3003     @Override
3004     public byte[] getEndKey() {
3005       if (this.metaEntry != null) {
3006         return this.metaEntry.getEndKey();
3007       } else if (this.hdfsEntry != null) {
3008         return this.hdfsEntry.hri.getEndKey();
3009       } else {
3010         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3011         return null;
3012       }
3013     }
3014 
3015     public byte[] getTableName() {
3016       if (this.metaEntry != null) {
3017         return this.metaEntry.getTableName();
3018       } else if (this.hdfsEntry != null) {
3019         // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3020         // so we get the name from the Path
3021         Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3022         return Bytes.toBytes(tableDir.getName());
3023       } else {
3024         // Currently no code exercises this path, but we could add one for
3025         // getting table name from OnlineEntry
3026         return null;
3027       }
3028     }
3029 
3030     public String getRegionNameAsString() {
3031       if (metaEntry != null) {
3032         return metaEntry.getRegionNameAsString();
3033       } else if (hdfsEntry != null) {
3034         if (hdfsEntry.hri != null) {
3035           return hdfsEntry.hri.getRegionNameAsString();
3036         }
3037       }
3038       return null;
3039     }
3040 
3041     public byte[] getRegionName() {
3042       if (metaEntry != null) {
3043         return metaEntry.getRegionName();
3044       } else if (hdfsEntry != null) {
3045         return hdfsEntry.hri.getRegionName();
3046       } else {
3047         return null;
3048       }
3049     }
3050 
3051     Path getHdfsRegionDir() {
3052       if (hdfsEntry == null) {
3053         return null;
3054       }
3055       return hdfsEntry.hdfsRegionDir;
3056     }
3057 
3058     boolean containsOnlyHdfsEdits() {
3059       if (hdfsEntry == null) {
3060         return false;
3061       }
3062       return hdfsEntry.hdfsOnlyEdits;
3063     }
3064 
3065     boolean isHdfsRegioninfoPresent() {
3066       if (hdfsEntry == null) {
3067         return false;
3068       }
3069       return hdfsEntry.hdfsRegioninfoFilePresent;
3070     }
3071 
3072     long getModTime() {
3073       if (hdfsEntry == null) {
3074         return 0;
3075       }
3076       return hdfsEntry.hdfsRegionDirModTime;
3077     }
3078 
3079     HRegionInfo getHdfsHRI() {
3080       if (hdfsEntry == null) {
3081         return null;
3082       }
3083       return hdfsEntry.hri;
3084     }
3085 
3086     public void setSkipChecks(boolean skipChecks) {
3087       this.skipChecks = skipChecks;
3088     }
3089 
3090     public boolean isSkipChecks() {
3091       return skipChecks;
3092     }
3093   }
3094 
3095   final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3096     @Override
3097     public int compare(HbckInfo l, HbckInfo r) {
3098       if (l == r) {
3099         // same instance
3100         return 0;
3101       }
3102 
3103       int tableCompare = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3104           l.getTableName(), r.getTableName());
3105       if (tableCompare != 0) {
3106         return tableCompare;
3107       }
3108 
3109       int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3110           l.getStartKey(), r.getStartKey());
3111       if (startComparison != 0) {
3112         return startComparison;
3113       }
3114 
3115       // Special case for absolute endkey
3116       byte[] endKey = r.getEndKey();
3117       endKey = (endKey.length == 0) ? null : endKey;
3118       byte[] endKey2 = l.getEndKey();
3119       endKey2 = (endKey2.length == 0) ? null : endKey2;
3120       int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3121           endKey2,  endKey);
3122 
3123       if (endComparison != 0) {
3124         return endComparison;
3125       }
3126 
3127       // use regionId as tiebreaker.
3128       // Null is considered after all possible values so make it bigger.
3129       if (l.hdfsEntry == null && r.hdfsEntry == null) {
3130         return 0;
3131       }
3132       if (l.hdfsEntry == null && r.hdfsEntry != null) {
3133         return 1;
3134       }
3135       // l.hdfsEntry must not be null
3136       if (r.hdfsEntry == null) {
3137         return -1;
3138       }
3139       // both l.hdfsEntry and r.hdfsEntry must not be null.
3140       return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3141     }
3142   };
3143 
3144   /**
3145    * Prints summary of all tables found on the system.
3146    */
3147   private void printTableSummary(SortedMap<String, TableInfo> tablesInfo) {
3148     StringBuilder sb = new StringBuilder();
3149     errors.print("Summary:");
3150     for (TableInfo tInfo : tablesInfo.values()) {
3151       if (errors.tableHasErrors(tInfo)) {
3152         errors.print("Table " + tInfo.getName() + " is inconsistent.");
3153       } else {
3154         errors.print("  " + tInfo.getName() + " is okay.");
3155       }
3156       errors.print("    Number of regions: " + tInfo.getNumRegions());
3157       sb.setLength(0); // clear out existing buffer, if any.
3158       sb.append("    Deployed on: ");
3159       for (ServerName server : tInfo.deployedOn) {
3160         sb.append(" " + server.toString());
3161       }
3162       errors.print(sb.toString());
3163     }
3164   }
3165 
3166   static ErrorReporter getErrorReporter(
3167       final Configuration conf) throws ClassNotFoundException {
3168     Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3169     return (ErrorReporter)ReflectionUtils.newInstance(reporter, conf);
3170   }
3171 
3172   public interface ErrorReporter {
3173     public static enum ERROR_CODE {
3174       UNKNOWN, NO_META_REGION, NULL_ROOT_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3175       NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
3176       MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3177       FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3178       HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3179       ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3180       WRONG_USAGE, BOUNDARIES_ERROR
3181     }
3182     public void clear();
3183     public void report(String message);
3184     public void reportError(String message);
3185     public void reportError(ERROR_CODE errorCode, String message);
3186     public void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3187     public void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3188     public void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info1, HbckInfo info2);
3189     public int summarize();
3190     public void detail(String details);
3191     public ArrayList<ERROR_CODE> getErrorList();
3192     public void progress();
3193     public void print(String message);
3194     public void resetErrors();
3195     public boolean tableHasErrors(TableInfo table);
3196   }
3197 
3198   static class PrintingErrorReporter implements ErrorReporter {
3199     public int errorCount = 0;
3200     private int showProgress;
3201 
3202     Set<TableInfo> errorTables = new HashSet<TableInfo>();
3203 
3204     // for use by unit tests to verify which errors were discovered
3205     private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3206 
3207     public void clear() {
3208       errorTables.clear();
3209       errorList.clear();
3210       errorCount = 0;
3211     }
3212 
3213     public synchronized void reportError(ERROR_CODE errorCode, String message) {
3214       if (errorCode == ERROR_CODE.WRONG_USAGE) {
3215         System.err.println(message);
3216         return;
3217       }
3218 
3219       errorList.add(errorCode);
3220       if (!summary) {
3221         System.out.println("ERROR: " + message);
3222       }
3223       errorCount++;
3224       showProgress = 0;
3225     }
3226 
3227     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3228       errorTables.add(table);
3229       reportError(errorCode, message);
3230     }
3231     
3232     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3233                                          HbckInfo info) {
3234       errorTables.add(table);
3235       String reference = "(region " + info.getRegionNameAsString() + ")";
3236       reportError(errorCode, reference + " " + message);
3237     }
3238 
3239     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3240                                          HbckInfo info1, HbckInfo info2) {
3241       errorTables.add(table);
3242       String reference = "(regions " + info1.getRegionNameAsString()
3243           + " and " + info2.getRegionNameAsString() + ")";
3244       reportError(errorCode, reference + " " + message);
3245     }
3246 
3247     public synchronized void reportError(String message) {
3248       reportError(ERROR_CODE.UNKNOWN, message);
3249     }
3250 
3251     /**
3252      * Report error information, but do not increment the error count.  Intended for cases
3253      * where the actual error would have been reported previously.
3254      * @param message
3255      */
3256     public synchronized void report(String message) {
3257       if (! summary) {
3258         System.out.println("ERROR: " + message);
3259       }
3260       showProgress = 0;
3261     }
3262 
3263     public synchronized int summarize() {
3264       System.out.println(Integer.toString(errorCount) +
3265                          " inconsistencies detected.");
3266       if (errorCount == 0) {
3267         System.out.println("Status: OK");
3268         return 0;
3269       } else {
3270         System.out.println("Status: INCONSISTENT");
3271         return -1;
3272       }
3273     }
3274 
3275     public ArrayList<ERROR_CODE> getErrorList() {
3276       return errorList;
3277     }
3278 
3279     public synchronized void print(String message) {
3280       if (!summary) {
3281         System.out.println(message);
3282       }
3283     }
3284 
3285     @Override
3286     public boolean tableHasErrors(TableInfo table) {
3287       return errorTables.contains(table);
3288     }
3289 
3290     @Override
3291     public void resetErrors() {
3292       errorCount = 0;
3293     }
3294 
3295     public synchronized void detail(String message) {
3296       if (details) {
3297         System.out.println(message);
3298       }
3299       showProgress = 0;
3300     }
3301 
3302     public synchronized void progress() {
3303       if (showProgress++ == 10) {
3304         if (!summary) {
3305           System.out.print(".");
3306         }
3307         showProgress = 0;
3308       }
3309     }
3310   }
3311 
3312   /**
3313    * Contact a region server and get all information from it
3314    */
3315   static class WorkItemRegion implements Callable<Void> {
3316     private HBaseFsck hbck;
3317     private ServerName rsinfo;
3318     private ErrorReporter errors;
3319     private HConnection connection;
3320 
3321     WorkItemRegion(HBaseFsck hbck, ServerName info,
3322                    ErrorReporter errors, HConnection connection) {
3323       this.hbck = hbck;
3324       this.rsinfo = info;
3325       this.errors = errors;
3326       this.connection = connection;
3327     }
3328 
3329     @Override
3330     public synchronized Void call() throws IOException {
3331       errors.progress();
3332       try {
3333         HRegionInterface server =
3334             connection.getHRegionConnection(rsinfo.getHostname(), rsinfo.getPort());
3335 
3336         // list all online regions from this region server
3337         List<HRegionInfo> regions = server.getOnlineRegions();
3338         regions = filterRegions(regions);
3339         if (details) {
3340           errors.detail("RegionServer: " + rsinfo.getServerName() +
3341                            " number of regions: " + regions.size());
3342           for (HRegionInfo rinfo: regions) {
3343             errors.detail("  " + rinfo.getRegionNameAsString() +
3344                              " id: " + rinfo.getRegionId() +
3345                              " encoded_name: " + rinfo.getEncodedName() +
3346                              " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3347                              " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3348           }
3349         }
3350 
3351         // check to see if the existence of this region matches the region in META
3352         for (HRegionInfo r:regions) {
3353           HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3354           hbi.addServer(r, rsinfo);
3355         }
3356       } catch (IOException e) {          // unable to connect to the region server. 
3357         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3358           " Unable to fetch region information. " + e);
3359         throw e;
3360       }
3361       return null;
3362     }
3363 
3364     private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
3365       List<HRegionInfo> ret = Lists.newArrayList();
3366       for (HRegionInfo hri : regions) {
3367         if (hri.isMetaTable() || (!hbck.checkMetaOnly
3368             && hbck.isTableIncluded(hri.getTableNameAsString()))) {
3369           ret.add(hri);
3370         }
3371       }
3372       return ret;
3373     }
3374   }
3375 
3376   /**
3377    * Contact hdfs and get all information about specified table directory into
3378    * regioninfo list.
3379    */
3380   static class WorkItemHdfsDir implements Callable<Void> {
3381     private HBaseFsck hbck;
3382     private FileStatus tableDir;
3383     private ErrorReporter errors;
3384     private FileSystem fs;
3385 
3386     WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors, 
3387                     FileStatus status) {
3388       this.hbck = hbck;
3389       this.fs = fs;
3390       this.tableDir = status;
3391       this.errors = errors;
3392     }
3393 
3394     @Override
3395     public synchronized Void call() throws IOException {
3396       try {
3397         String tableName = tableDir.getPath().getName();
3398         // ignore hidden files
3399         if (tableName.startsWith(".") &&
3400             !tableName.equals( Bytes.toString(HConstants.META_TABLE_NAME))) {
3401           return null;
3402         }
3403         // level 2: <HBASE_DIR>/<table>/*
3404         FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
3405         for (FileStatus regionDir : regionDirs) {
3406           String encodedName = regionDir.getPath().getName();
3407           // ignore directories that aren't hexadecimal
3408           if (!encodedName.toLowerCase().matches("[0-9a-f]+")) {
3409             continue;
3410           }
3411 
3412           LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
3413           HbckInfo hbi = hbck.getOrCreateInfo(encodedName);
3414           HdfsEntry he = new HdfsEntry();
3415           synchronized (hbi) {
3416             if (hbi.getHdfsRegionDir() != null) {
3417               errors.print("Directory " + encodedName + " duplicate??" +
3418                            hbi.getHdfsRegionDir());
3419             }
3420 
3421             he.hdfsRegionDir = regionDir.getPath();
3422             he.hdfsRegionDirModTime = regionDir.getModificationTime();
3423             Path regioninfoFile = new Path(he.hdfsRegionDir, HRegion.REGIONINFO_FILE);
3424             he.hdfsRegioninfoFilePresent = fs.exists(regioninfoFile);
3425             // we add to orphan list when we attempt to read .regioninfo
3426 
3427             // Set a flag if this region contains only edits
3428             // This is special case if a region is left after split
3429             he.hdfsOnlyEdits = true;
3430             FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
3431             Path ePath = HLog.getRegionDirRecoveredEditsDir(regionDir.getPath());
3432             for (FileStatus subDir : subDirs) {
3433               String sdName = subDir.getPath().getName();
3434               if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
3435                 he.hdfsOnlyEdits = false;
3436                 break;
3437               }
3438             }
3439             hbi.hdfsEntry = he;
3440           }
3441         }
3442       } catch (IOException e) {
3443         // unable to connect to the region server.
3444         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
3445             + tableDir.getPath().getName()
3446             + " Unable to fetch region information. " + e);
3447         throw e;
3448       }
3449       return null;
3450     }
3451   }
3452 
3453   /**
3454    * Contact hdfs and get all information about specified table directory into
3455    * regioninfo list.
3456    */
3457   static class WorkItemHdfsRegionInfo implements Callable<Void> {
3458     private HbckInfo hbi;
3459     private HBaseFsck hbck;
3460     private ErrorReporter errors;
3461 
3462     WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
3463       this.hbi = hbi;
3464       this.hbck = hbck;
3465       this.errors = errors;
3466     }
3467 
3468     @Override
3469     public synchronized Void call() throws IOException {
3470       // only load entries that haven't been loaded yet.
3471       if (hbi.getHdfsHRI() == null) {
3472         try {
3473           hbck.loadHdfsRegioninfo(hbi);
3474         } catch (IOException ioe) {
3475           String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3476               + Bytes.toString(hbi.getTableName()) + " in hdfs dir "
3477               + hbi.getHdfsRegionDir()
3478               + "!  It may be an invalid format or version file.  Treating as "
3479               + "an orphaned regiondir.";
3480           errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3481           try {
3482             hbck.debugLsr(hbi.getHdfsRegionDir());
3483           } catch (IOException ioe2) {
3484             LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3485             throw ioe2;
3486           }
3487           hbck.orphanHdfsDirs.add(hbi);
3488           throw ioe;
3489         }
3490       }
3491       return null;
3492     }
3493   };
3494 
3495   /**
3496    * Display the full report from fsck. This displays all live and dead region
3497    * servers, and all known regions.
3498    */
3499   public void setDisplayFullReport() {
3500     details = true;
3501   }
3502 
3503   /**
3504    * Set summary mode.
3505    * Print only summary of the tables and status (OK or INCONSISTENT)
3506    */
3507   void setSummary() {
3508     summary = true;
3509   }
3510 
3511   /**
3512    * Set META check mode.
3513    * Print only info about META table deployment/state
3514    */
3515   void setCheckMetaOnly() {
3516     checkMetaOnly = true;
3517   }
3518 
3519   /**
3520    * Check if we should rerun fsck again. This checks if we've tried to
3521    * fix something and we should rerun fsck tool again.
3522    * Display the full report from fsck. This displays all live and dead
3523    * region servers, and all known regions.
3524    */
3525   void setShouldRerun() {
3526     rerun = true;
3527   }
3528 
3529   boolean shouldRerun() {
3530     return rerun;
3531   }
3532 
3533   /**
3534    * Fix inconsistencies found by fsck. This should try to fix errors (if any)
3535    * found by fsck utility.
3536    */
3537   public void setFixAssignments(boolean shouldFix) {
3538     fixAssignments = shouldFix;
3539   }
3540 
3541   boolean shouldFixAssignments() {
3542     return fixAssignments;
3543   }
3544 
3545   public void setFixMeta(boolean shouldFix) {
3546     fixMeta = shouldFix;
3547   }
3548 
3549   boolean shouldFixMeta() {
3550     return fixMeta;
3551   }
3552 
3553   public void setCheckHdfs(boolean checking) {
3554     checkHdfs = checking;
3555   }
3556 
3557   boolean shouldCheckHdfs() {
3558     return checkHdfs;
3559   }
3560 
3561   public void setFixHdfsHoles(boolean shouldFix) {
3562     fixHdfsHoles = shouldFix;
3563   }
3564 
3565   boolean shouldFixHdfsHoles() {
3566     return fixHdfsHoles;
3567   }
3568 
3569   public void setFixTableOrphans(boolean shouldFix) {
3570     fixTableOrphans = shouldFix;
3571   }
3572 
3573   boolean shouldFixTableOrphans() {
3574     return fixTableOrphans;
3575   }
3576 
3577   public void setFixHdfsOverlaps(boolean shouldFix) {
3578     fixHdfsOverlaps = shouldFix;
3579   }
3580 
3581   boolean shouldFixHdfsOverlaps() {
3582     return fixHdfsOverlaps;
3583   }
3584 
3585   public void setFixHdfsOrphans(boolean shouldFix) {
3586     fixHdfsOrphans = shouldFix;
3587   }
3588 
3589   boolean shouldFixHdfsOrphans() {
3590     return fixHdfsOrphans;
3591   }
3592 
3593   public void setFixVersionFile(boolean shouldFix) {
3594     fixVersionFile = shouldFix;
3595   }
3596 
3597   public boolean shouldFixVersionFile() {
3598     return fixVersionFile;
3599   }
3600 
3601   public void setSidelineBigOverlaps(boolean sbo) {
3602     this.sidelineBigOverlaps = sbo;
3603   }
3604 
3605   public boolean shouldSidelineBigOverlaps() {
3606     return sidelineBigOverlaps;
3607   }
3608 
3609   public void setFixSplitParents(boolean shouldFix) {
3610     fixSplitParents = shouldFix;
3611   }
3612 
3613   boolean shouldFixSplitParents() {
3614     return fixSplitParents;
3615   }
3616 
3617   public void setFixReferenceFiles(boolean shouldFix) {
3618     fixReferenceFiles = shouldFix;
3619   }
3620 
3621   boolean shouldFixReferenceFiles() {
3622     return fixReferenceFiles;
3623   }
3624 
3625   public boolean shouldIgnorePreCheckPermission() {
3626     return ignorePreCheckPermission;
3627   }
3628 
3629   public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3630     this.ignorePreCheckPermission = ignorePreCheckPermission;
3631   }
3632 
3633   /**
3634    * @param mm maximum number of regions to merge into a single region.
3635    */
3636   public void setMaxMerge(int mm) {
3637     this.maxMerge = mm;
3638   }
3639 
3640   public int getMaxMerge() {
3641     return maxMerge;
3642   }
3643 
3644   public void setMaxOverlapsToSideline(int mo) {
3645     this.maxOverlapsToSideline = mo;
3646   }
3647 
3648   public int getMaxOverlapsToSideline() {
3649     return maxOverlapsToSideline;
3650   }
3651 
3652   /**
3653    * Only check/fix tables specified by the list,
3654    * Empty list means all tables are included.
3655    */
3656   boolean isTableIncluded(String table) {
3657     return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
3658   }
3659 
3660   public void includeTable(String table) {
3661     tablesIncluded.add(table);
3662   }
3663 
3664   Set<String> getIncludedTables() {
3665     return new HashSet<String>(tablesIncluded);
3666   }
3667 
3668   /**
3669    * We are interested in only those tables that have not changed their state in
3670    * META during the last few seconds specified by hbase.admin.fsck.timelag
3671    * @param seconds - the time in seconds
3672    */
3673   public void setTimeLag(long seconds) {
3674     timelag = seconds * 1000; // convert to milliseconds
3675   }
3676 
3677   /**
3678    * 
3679    * @param sidelineDir - HDFS path to sideline data
3680    */
3681   public void setSidelineDir(String sidelineDir) {
3682     this.sidelineDir = new Path(sidelineDir);
3683   }
3684 
3685   protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
3686     return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
3687   }
3688 
3689   public HFileCorruptionChecker getHFilecorruptionChecker() {
3690     return hfcc;
3691   }
3692 
3693   public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
3694     this.hfcc = hfcc;
3695   }
3696 
3697   /**
3698    * Set region boundaries check mode.
3699    */
3700   void setRegionBoundariesCheck() {
3701     checkRegionBoundaries = true;
3702   }
3703 
3704   public void setRetCode(int code) {
3705     this.retcode = code;
3706   }
3707 
3708   public int getRetCode() {
3709     return retcode;
3710   }
3711 
3712   protected HBaseFsck printUsageAndExit() {
3713     StringWriter sw = new StringWriter(2048);
3714     PrintWriter out = new PrintWriter(sw);
3715     out.println("Usage: fsck [opts] {only tables}");
3716     out.println(" where [opts] are:");
3717     out.println("   -help Display help options (this)");
3718     out.println("   -details Display full report of all regions.");
3719     out.println("   -timelag <timeInSeconds>  Process only regions that " +
3720                        " have not experienced any metadata updates in the last " +
3721                        " <timeInSeconds> seconds.");
3722     out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
3723         " before checking if the fix worked if run with -fix");
3724     out.println("   -summary Print only summary of the tables and status.");
3725     out.println("   -metaonly Only check the state of ROOT and META tables.");
3726     out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta and root.");
3727 
3728     out.println("");
3729     out.println("  Metadata Repair options: (expert features, use with caution!)");
3730     out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
3731     out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
3732     out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
3733     out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
3734         + " Assumes META region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
3735     out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
3736     out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
3737     out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
3738     out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
3739     out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
3740     out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
3741     out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
3742     out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
3743     out.println("   -fixSplitParents  Try to force offline split parents to be online.");
3744     out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
3745     out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
3746     out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
3747 
3748     out.println("");
3749     out.println("  Datafile Repair options: (expert features, use with caution!)");
3750     out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
3751     out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
3752 
3753     out.println("");
3754     out.println("  Metadata Repair shortcuts");
3755     out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
3756         "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles");
3757     out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
3758 
3759     out.flush();
3760     errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
3761 
3762     setRetCode(-2);
3763     return this;
3764   }
3765 
3766   /**
3767    * Main program
3768    *
3769    * @param args
3770    * @throws Exception
3771    */
3772   public static void main(String[] args) throws Exception {
3773     // create a fsck object
3774     Configuration conf = HBaseConfiguration.create();
3775     Path hbasedir = new Path(conf.get(HConstants.HBASE_DIR));
3776     URI defaultFs = hbasedir.getFileSystem(conf).getUri();
3777     conf.set("fs.defaultFS", defaultFs.toString());     // for hadoop 0.21+
3778     conf.set("fs.default.name", defaultFs.toString());  // for hadoop 0.20
3779     int ret = ToolRunner.run(new HBaseFsck(conf), args);
3780     System.exit(ret);
3781   }
3782 
3783   @Override
3784   public int run(String[] args) throws Exception {
3785     // reset the numThreads due to user may set it via generic options
3786     initialPoolNumThreads();
3787     
3788     exec(executor, args);
3789     return getRetCode();
3790   }
3791 
3792   public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
3793     InterruptedException {
3794     long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
3795 
3796     boolean checkCorruptHFiles = false;
3797     boolean sidelineCorruptHFiles = false;
3798 
3799     // Process command-line args.
3800     for (int i = 0; i < args.length; i++) {
3801       String cmd = args[i];
3802       if (cmd.equals("-help") || cmd.equals("-h")) {
3803         return printUsageAndExit();
3804       } else if (cmd.equals("-details")) {
3805         setDisplayFullReport();
3806       } else if (cmd.equals("-timelag")) {
3807         if (i == args.length - 1) {
3808           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
3809           return printUsageAndExit();
3810         }
3811         try {
3812           long timelag = Long.parseLong(args[i+1]);
3813           setTimeLag(timelag);
3814         } catch (NumberFormatException e) {
3815           errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
3816           return printUsageAndExit();
3817         }
3818         i++;
3819       } else if (cmd.equals("-sleepBeforeRerun")) {
3820         if (i == args.length - 1) {
3821           errors.reportError(ERROR_CODE.WRONG_USAGE,
3822             "HBaseFsck: -sleepBeforeRerun needs a value.");
3823           return printUsageAndExit();
3824         }
3825         try {
3826           sleepBeforeRerun = Long.parseLong(args[i+1]);
3827         } catch (NumberFormatException e) {
3828           errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
3829           return printUsageAndExit();
3830         }
3831         i++;
3832       } else if (cmd.equals("-sidelineDir")) {
3833         if (i == args.length - 1) {
3834           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
3835           return printUsageAndExit();
3836         }
3837         i++;
3838         setSidelineDir(args[i]);
3839       } else if (cmd.equals("-fix")) {
3840         errors.reportError(ERROR_CODE.WRONG_USAGE,
3841           "This option is deprecated, please use  -fixAssignments instead.");
3842         setFixAssignments(true);
3843       } else if (cmd.equals("-fixAssignments")) {
3844         setFixAssignments(true);
3845       } else if (cmd.equals("-fixMeta")) {
3846         setFixMeta(true);
3847       } else if (cmd.equals("-noHdfsChecking")) {
3848         setCheckHdfs(false);
3849       } else if (cmd.equals("-fixHdfsHoles")) {
3850         setFixHdfsHoles(true);
3851       } else if (cmd.equals("-fixHdfsOrphans")) {
3852         setFixHdfsOrphans(true);
3853       } else if (cmd.equals("-fixTableOrphans")) {
3854         setFixTableOrphans(true);
3855       } else if (cmd.equals("-fixHdfsOverlaps")) {
3856         setFixHdfsOverlaps(true);
3857       } else if (cmd.equals("-fixVersionFile")) {
3858         setFixVersionFile(true);
3859       } else if (cmd.equals("-sidelineBigOverlaps")) {
3860         setSidelineBigOverlaps(true);
3861       } else if (cmd.equals("-fixSplitParents")) {
3862         setFixSplitParents(true);
3863       } else if (cmd.equals("-ignorePreCheckPermission")) {
3864         setIgnorePreCheckPermission(true);
3865       } else if (cmd.equals("-checkCorruptHFiles")) {
3866         checkCorruptHFiles = true;
3867       } else if (cmd.equals("-sidelineCorruptHFiles")) {
3868         sidelineCorruptHFiles = true;
3869       } else if (cmd.equals("-fixReferenceFiles")) {
3870         setFixReferenceFiles(true);
3871       } else if (cmd.equals("-repair")) {
3872         // this attempts to merge overlapping hdfs regions, needs testing
3873         // under load
3874         setFixHdfsHoles(true);
3875         setFixHdfsOrphans(true);
3876         setFixMeta(true);
3877         setFixAssignments(true);
3878         setFixHdfsOverlaps(true);
3879         setFixVersionFile(true);
3880         setSidelineBigOverlaps(true);
3881         setFixSplitParents(false);
3882         setCheckHdfs(true);
3883         setFixReferenceFiles(true);
3884       } else if (cmd.equals("-repairHoles")) {
3885         // this will make all missing hdfs regions available but may lose data
3886         setFixHdfsHoles(true);
3887         setFixHdfsOrphans(false);
3888         setFixMeta(true);
3889         setFixAssignments(true);
3890         setFixHdfsOverlaps(false);
3891         setSidelineBigOverlaps(false);
3892         setFixSplitParents(false);
3893         setCheckHdfs(true);
3894       } else if (cmd.equals("-maxOverlapsToSideline")) {
3895         if (i == args.length - 1) {
3896           errors.reportError(ERROR_CODE.WRONG_USAGE,
3897             "-maxOverlapsToSideline needs a numeric value argument.");
3898           return printUsageAndExit();
3899         }
3900         try {
3901           int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
3902           setMaxOverlapsToSideline(maxOverlapsToSideline);
3903         } catch (NumberFormatException e) {
3904           errors.reportError(ERROR_CODE.WRONG_USAGE,
3905             "-maxOverlapsToSideline needs a numeric value argument.");
3906           return printUsageAndExit();
3907         }
3908         i++;
3909       } else if (cmd.equals("-maxMerge")) {
3910         if (i == args.length - 1) {
3911           errors.reportError(ERROR_CODE.WRONG_USAGE,
3912             "-maxMerge needs a numeric value argument.");
3913           return printUsageAndExit();
3914         }
3915         try {
3916           int maxMerge = Integer.parseInt(args[i+1]);
3917           setMaxMerge(maxMerge);
3918         } catch (NumberFormatException e) {
3919           errors.reportError(ERROR_CODE.WRONG_USAGE,
3920             "-maxMerge needs a numeric value argument.");
3921           return printUsageAndExit();
3922         }
3923         i++;
3924       } else if (cmd.equals("-summary")) {
3925         setSummary();
3926       } else if (cmd.equals("-metaonly")) {
3927         setCheckMetaOnly();
3928       } else if (cmd.equals("-boundaries")) {
3929         setRegionBoundariesCheck();
3930       } else if (cmd.startsWith("-")) {
3931         errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
3932         return printUsageAndExit();
3933       } else {
3934         includeTable(cmd);
3935         errors.print("Allow checking/fixes for table: " + cmd);
3936       }
3937     }
3938 
3939     // pre-check current user has FS write permission or not
3940     try {
3941       preCheckPermission();
3942     } catch (AccessControlException ace) {
3943       Runtime.getRuntime().exit(-1);
3944     } catch (IOException ioe) {
3945       Runtime.getRuntime().exit(-1);
3946     }
3947 
3948     // do the real work of hbck
3949     connect();
3950 
3951     try {
3952       // if corrupt file mode is on, first fix them since they may be opened later
3953       if (checkCorruptHFiles || sidelineCorruptHFiles) {
3954         LOG.info("Checking all hfiles for corruption");
3955         HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
3956         setHFileCorruptionChecker(hfcc); // so we can get result
3957         Collection<String> tables = getIncludedTables();
3958         Collection<Path> tableDirs = new ArrayList<Path>();
3959         Path rootdir = FSUtils.getRootDir(getConf());
3960         if (tables.size() > 0) {
3961           for (String t : tables) {
3962             tableDirs.add(FSUtils.getTablePath(rootdir, t));
3963           }
3964         } else {
3965           tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
3966         }
3967         hfcc.checkTables(tableDirs);
3968         hfcc.report(errors);
3969       }
3970 
3971       // check and fix table integrity, region consistency.
3972       int code = onlineHbck();
3973       setRetCode(code);
3974       // If we have changed the HBase state it is better to run hbck again
3975       // to see if we haven't broken something else in the process.
3976       // We run it only once more because otherwise we can easily fall into
3977       // an infinite loop.
3978       if (shouldRerun()) {
3979         try {
3980           LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
3981           Thread.sleep(sleepBeforeRerun);
3982         } catch (InterruptedException ie) {
3983           return this;
3984         }
3985         // Just report
3986         setFixAssignments(false);
3987         setFixMeta(false);
3988         setFixHdfsHoles(false);
3989         setFixHdfsOverlaps(false);
3990         setFixVersionFile(false);
3991         setFixTableOrphans(false);
3992         errors.resetErrors();
3993         code = onlineHbck();
3994         setRetCode(code);
3995       }
3996     } finally {
3997       IOUtils.cleanup(null, connection, meta, admin);
3998     }
3999     return this;
4000   }
4001 
4002   /**
4003    * ls -r for debugging purposes
4004    */
4005   void debugLsr(Path p) throws IOException {
4006     debugLsr(getConf(), p, errors);
4007   }
4008 
4009   /**
4010    * ls -r for debugging purposes
4011    */
4012   public static void debugLsr(Configuration conf,
4013       Path p) throws IOException {
4014     debugLsr(conf, p, new PrintingErrorReporter());
4015   }
4016 
4017   /**
4018    * ls -r for debugging purposes
4019    */
4020   public static void debugLsr(Configuration conf,
4021       Path p, ErrorReporter errors) throws IOException {
4022     if (!LOG.isDebugEnabled() || p == null) {
4023       return;
4024     }
4025     FileSystem fs = p.getFileSystem(conf);
4026 
4027     if (!fs.exists(p)) {
4028       // nothing
4029       return;
4030     }
4031     errors.print(p.toString());
4032 
4033     if (fs.isFile(p)) {
4034       return;
4035     }
4036 
4037     if (fs.getFileStatus(p).isDir()) {
4038       FileStatus[] fss= fs.listStatus(p);
4039       for (FileStatus status : fss) {
4040         debugLsr(conf, status.getPath(), errors);
4041       }
4042     }
4043   }
4044 }