001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.io.FileNotFoundException;
021import java.io.IOException;
022import java.io.PrintWriter;
023import java.io.StringWriter;
024import java.net.InetAddress;
025import java.net.URI;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.Collections;
029import java.util.Comparator;
030import java.util.HashMap;
031import java.util.HashSet;
032import java.util.Iterator;
033import java.util.List;
034import java.util.Map;
035import java.util.Map.Entry;
036import java.util.Set;
037import java.util.SortedMap;
038import java.util.SortedSet;
039import java.util.TreeMap;
040import java.util.TreeSet;
041import java.util.concurrent.Callable;
042import java.util.concurrent.ConcurrentSkipListMap;
043import java.util.concurrent.ExecutionException;
044import java.util.concurrent.ExecutorService;
045import java.util.concurrent.Future;
046import java.util.concurrent.ScheduledThreadPoolExecutor;
047import java.util.concurrent.atomic.AtomicBoolean;
048import java.util.concurrent.atomic.AtomicInteger;
049
050import org.apache.commons.logging.Log;
051import org.apache.commons.logging.LogFactory;
052import org.apache.hadoop.conf.Configuration;
053import org.apache.hadoop.conf.Configured;
054import org.apache.hadoop.fs.FSDataInputStream;
055import org.apache.hadoop.fs.FSDataOutputStream;
056import org.apache.hadoop.fs.FileStatus;
057import org.apache.hadoop.fs.FileSystem;
058import org.apache.hadoop.fs.Path;
059import org.apache.hadoop.fs.permission.FsAction;
060import org.apache.hadoop.fs.permission.FsPermission;
061import org.apache.hadoop.hbase.Abortable;
062import org.apache.hadoop.hbase.ClusterStatus;
063import org.apache.hadoop.hbase.HBaseConfiguration;
064import org.apache.hadoop.hbase.HColumnDescriptor;
065import org.apache.hadoop.hbase.HConstants;
066import org.apache.hadoop.hbase.HRegionInfo;
067import org.apache.hadoop.hbase.HRegionLocation;
068import org.apache.hadoop.hbase.HTableDescriptor;
069import org.apache.hadoop.hbase.KeyValue;
070import org.apache.hadoop.hbase.MasterNotRunningException;
071import org.apache.hadoop.hbase.ServerName;
072import org.apache.hadoop.hbase.ZooKeeperConnectionException;
073import org.apache.hadoop.hbase.catalog.MetaReader;
074import org.apache.hadoop.hbase.client.Delete;
075import org.apache.hadoop.hbase.client.Get;
076import org.apache.hadoop.hbase.client.HBaseAdmin;
077import org.apache.hadoop.hbase.client.HConnection;
078import org.apache.hadoop.hbase.client.HConnectionManager;
079import org.apache.hadoop.hbase.client.HConnectionManager.HConnectable;
080import org.apache.hadoop.hbase.client.HTable;
081import org.apache.hadoop.hbase.client.MetaScanner;
082import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
083import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
084import org.apache.hadoop.hbase.client.Put;
085import org.apache.hadoop.hbase.client.Result;
086import org.apache.hadoop.hbase.client.RowMutations;
087import org.apache.hadoop.hbase.client.UserProvider;
088import org.apache.hadoop.hbase.io.hfile.CacheConfig;
089import org.apache.hadoop.hbase.io.hfile.HFile;
090import org.apache.hadoop.hbase.ipc.HRegionInterface;
091import org.apache.hadoop.hbase.master.MasterFileSystem;
092import org.apache.hadoop.hbase.regionserver.HRegion;
093import org.apache.hadoop.hbase.regionserver.StoreFile;
094import org.apache.hadoop.hbase.regionserver.wal.HLog;
095import org.apache.hadoop.hbase.security.User;
096import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
097import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
098import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
099import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
100import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
101import org.apache.hadoop.hbase.zookeeper.RootRegionTracker;
102import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
103import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
104import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
105import org.apache.hadoop.io.IOUtils;
106import org.apache.hadoop.ipc.RemoteException;
107import org.apache.hadoop.security.AccessControlException;
108import org.apache.hadoop.util.ReflectionUtils;
109import org.apache.hadoop.util.Tool;
110import org.apache.hadoop.util.ToolRunner;
111import org.apache.zookeeper.KeeperException;
112
113import com.google.common.base.Joiner;
114import com.google.common.base.Preconditions;
115import com.google.common.collect.Lists;
116import com.google.common.collect.Multimap;
117import com.google.common.collect.TreeMultimap;
118
119/**
120 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
121 * table integrity problems in a corrupted HBase.
122 * <p>
123 * Region consistency checks verify that .META., region deployment on region
124 * servers and the state of data in HDFS (.regioninfo files) all are in
125 * accordance.
126 * <p>
127 * Table integrity checks verify that all possible row keys resolve to exactly
128 * one region of a table.  This means there are no individual degenerate
129 * or backwards regions; no holes between regions; and that there are no
130 * overlapping regions.
131 * <p>
132 * The general repair strategy works in two phases:
133 * <ol>
134 * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
135 * <li> Repair Region Consistency with .META. and assignments
136 * </ol>
137 * <p>
138 * For table integrity repairs, the tables' region directories are scanned
139 * for .regioninfo files.  Each table's integrity is then verified.  If there
140 * are any orphan regions (regions with no .regioninfo files) or holes, new
141 * regions are fabricated.  Backwards regions are sidelined as well as empty
142 * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
143 * a new region is created and all data is merged into the new region.
144 * <p>
145 * Table integrity repairs deal solely with HDFS and could potentially be done
146 * offline -- the hbase region servers or master do not need to be running.
147 * This phase can eventually be used to completely reconstruct the META table in
148 * an offline fashion.
149 * <p>
150 * Region consistency requires three conditions -- 1) valid .regioninfo file
151 * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
152 * and 3) a region is deployed only at the regionserver that was assigned to
153 * with proper state in the master.
154 * <p>
155 * Region consistency repairs require hbase to be online so that hbck can
156 * contact the HBase master and region servers.  The hbck#connect() method must
157 * first be called successfully.  Much of the region consistency information
158 * is transient and less risky to repair.
159 * <p>
160 * If hbck is run from the command line, there are a handful of arguments that
161 * can be used to limit the kinds of repairs hbck will do.  See the code in
162 * {@link #printUsageAndExit()} for more details.
163 */
164public class HBaseFsck extends Configured implements Tool {
165  public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
166  public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
167  private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
168  private static boolean rsSupportsOffline = true;
169  private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
170  private static final int DEFAULT_MAX_MERGE = 5;
171  private static final String TO_BE_LOADED = "to_be_loaded";
172  private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
173
174
175  /**********************
176   * Internal resources
177   **********************/
178  private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
179  private ClusterStatus status;
180  private HConnection connection;
181  private HBaseAdmin admin;
182  private HTable meta;
183  // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
184  protected ExecutorService executor;
185  private long startMillis = System.currentTimeMillis();
186  private HFileCorruptionChecker hfcc;
187  private int retcode = 0;
188  private Path HBCK_LOCK_PATH;
189  private FSDataOutputStream hbckOutFd;
190  // This lock is to prevent cleanup of balancer resources twice between
191  // ShutdownHook and the main code. We cleanup only if the connect() is
192  // successful
193  private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
194
195  /***********
196   * Options
197   ***********/
198  private static boolean details = false; // do we display the full report
199  private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
200  private boolean fixAssignments = false; // fix assignment errors?
201  private boolean fixMeta = false; // fix meta errors?
202  private boolean checkHdfs = true; // load and check fs consistency?
203  private boolean fixHdfsHoles = false; // fix fs holes?
204  private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
205  private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
206  private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
207  private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
208  private boolean fixSplitParents = false; // fix lingering split parents
209  private boolean fixReferenceFiles = false; // fix lingering reference store file
210
211  // limit checking/fixes to listed tables, if empty attempt to check/fix all
212  // -ROOT- and .META. are always checked
213  private Set<String> tablesIncluded = new HashSet<String>();
214  private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
215  private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline
216  private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
217  private Path sidelineDir = null;
218
219  private boolean rerun = false; // if we tried to fix something, rerun hbck
220  private static boolean summary = false; // if we want to print less output
221  private boolean checkMetaOnly = false;
222  private boolean checkRegionBoundaries = false;
223  private boolean ignorePreCheckPermission = false; // if pre-check permission
224
225  /*********
226   * State
227   *********/
228  final private ErrorReporter errors;
229  int fixes = 0;
230
231  /**
232   * This map contains the state of all hbck items.  It maps from encoded region
233   * name to HbckInfo structure.  The information contained in HbckInfo is used
234   * to detect and correct consistency (hdfs/meta/deployment) problems.
235   */
236  private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
237  private TreeSet<byte[]> disabledTables =
238    new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
239  // Empty regioninfo qualifiers in .META.
240  private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
241
242  /**
243   * This map from Tablename -> TableInfo contains the structures necessary to
244   * detect table consistency problems (holes, dupes, overlaps).  It is sorted
245   * to prevent dupes.
246   *
247   * If tablesIncluded is empty, this map contains all tables.
248   * Otherwise, it contains only meta tables and tables in tablesIncluded,
249   * unless checkMetaOnly is specified, in which case, it contains only
250   * the meta tables (.META. and -ROOT-).
251   */
252  private SortedMap<String, TableInfo> tablesInfo = new ConcurrentSkipListMap<String,TableInfo>();
253
254  /**
255   * When initially looking at HDFS, we attempt to find any orphaned data.
256   */
257  private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
258
259  private Map<String, Set<String>> orphanTableDirs = new HashMap<String, Set<String>>();
260
261  /**
262   * Constructor
263   *
264   * @param conf Configuration object
265   * @throws MasterNotRunningException if the master is not running
266   * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
267   */
268  public HBaseFsck(Configuration conf) throws MasterNotRunningException,
269      ZooKeeperConnectionException, IOException, ClassNotFoundException {
270    super(conf);
271    errors = getErrorReporter(conf);
272
273    initialPoolNumThreads();
274  }
275
276  /**
277   * Constructor
278   *
279   * @param conf
280   *          Configuration object
281   * @throws MasterNotRunningException
282   *           if the master is not running
283   * @throws ZooKeeperConnectionException
284   *           if unable to connect to ZooKeeper
285   */
286  public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
287      ZooKeeperConnectionException, IOException, ClassNotFoundException {
288    super(conf);
289    errors = getErrorReporter(getConf());
290    this.executor = exec;
291  }
292
293  /**
294   * This method maintains a lock using a file. If the creation fails we return null
295   *
296   * @return FSDataOutputStream object corresponding to the newly opened lock file
297   * @throws IOException
298   */
299  private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
300    try {
301      FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
302      FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
303          HConstants.DATA_FILE_UMASK_KEY);
304      Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
305      fs.mkdirs(tmpDir);
306      HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
307      final FSDataOutputStream out = FSUtils.create(fs, HBCK_LOCK_PATH, defaultPerms, false);
308      out.writeBytes(InetAddress.getLocalHost().toString());
309      out.flush();
310      return out;
311    } catch (IOException exception) {
312      RemoteException e = null;
313      if (exception instanceof RemoteException) {
314        e = (RemoteException)exception;
315      } else if (exception.getCause() instanceof RemoteException) {
316        e = (RemoteException)(exception.getCause());
317      }
318      if(null != e && AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
319        return null;
320      }
321      throw exception;
322    }
323  }
324
325  private void unlockHbck() {
326    if(hbckLockCleanup.compareAndSet(true, false)){
327      IOUtils.closeStream(hbckOutFd);
328      try{
329        FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
330      } catch(IOException ioe) {
331        LOG.warn("Failed to delete " + HBCK_LOCK_PATH);
332        LOG.debug(ioe);
333      }
334    }
335  }
336
337  /**
338   * To repair region consistency, one must call connect() in order to repair
339   * online state.
340   */
341  public void connect() throws IOException {
342
343    // Check if another instance of balancer is running
344    hbckOutFd = checkAndMarkRunningHbck();
345    if (hbckOutFd == null) {
346      setRetCode(-1);
347      LOG.error("Another instance of hbck is running, exiting this instance.[If you are sure" +
348          " no other instance is running, delete the lock file " +
349          HBCK_LOCK_PATH + " and rerun the tool]");
350      throw new IOException("Duplicate hbck - Abort");
351    }
352
353    // Make sure to cleanup the lock
354    hbckLockCleanup.set(true);
355
356    // Add a shutdown hook to this thread, incase user tries to
357    // kill the hbck with a ctrl-c, we want to cleanup the lock so that
358    // it is available for further calls
359    Runtime.getRuntime().addShutdownHook(new Thread() {
360      public void run() {
361          unlockHbck();
362      }
363    });
364    LOG.debug("Launching hbck");
365
366    admin = new HBaseAdmin(getConf());
367    meta = new HTable(getConf(), HConstants.META_TABLE_NAME);
368    status = admin.getMaster().getClusterStatus();
369    connection = admin.getConnection();
370  }
371
372  /**
373   * Initial numThreads for {@link #executor}
374   */
375  private void initialPoolNumThreads() {
376    if (executor != null) {
377      executor.shutdown();
378    }
379
380    int numThreads = getConf().getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
381    executor = new ScheduledThreadPoolExecutor(numThreads);
382  }
383
384  /**
385   * Get deployed regions according to the region servers.
386   */
387  private void loadDeployedRegions() throws IOException, InterruptedException {
388    // From the master, get a list of all known live region servers
389    Collection<ServerName> regionServers = status.getServers();
390    errors.print("Number of live region servers: " + regionServers.size());
391    if (details) {
392      for (ServerName rsinfo: regionServers) {
393        errors.print("  " + rsinfo.getServerName());
394      }
395    }
396
397    // From the master, get a list of all dead region servers
398    Collection<ServerName> deadRegionServers = status.getDeadServerNames();
399    errors.print("Number of dead region servers: " + deadRegionServers.size());
400    if (details) {
401      for (ServerName name: deadRegionServers) {
402        errors.print("  " + name);
403      }
404    }
405
406    // Print the current master name and state
407    errors.print("Master: " + status.getMaster());
408
409    // Print the list of all backup masters
410    Collection<ServerName> backupMasters = status.getBackupMasters();
411    errors.print("Number of backup masters: " + backupMasters.size());
412    if (details) {
413      for (ServerName name: backupMasters) {
414        errors.print("  " + name);
415      }
416    }
417
418    // Determine what's deployed
419    processRegionServers(regionServers);
420  }
421
422  /**
423   * Clear the current state of hbck.
424   */
425  private void clearState() {
426    // Make sure regionInfo is empty before starting
427    fixes = 0;
428    regionInfoMap.clear();
429    emptyRegionInfoQualifiers.clear();
430    disabledTables.clear();
431    errors.clear();
432    tablesInfo.clear();
433    orphanHdfsDirs.clear();
434  }
435
436  /**
437   * This repair method analyzes hbase data in hdfs and repairs it to satisfy
438   * the table integrity rules.  HBase doesn't need to be online for this
439   * operation to work.
440   */
441  public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
442    // Initial pass to fix orphans.
443    if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
444        || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
445      LOG.info("Loading regioninfos HDFS");
446      // if nothing is happening this should always complete in two iterations.
447      int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
448      int curIter = 0;
449      do {
450        clearState(); // clears hbck state and reset fixes to 0 and.
451        // repair what's on HDFS
452        restoreHdfsIntegrity();
453        curIter++;// limit the number of iterations.
454      } while (fixes > 0 && curIter <= maxIterations);
455
456      // Repairs should be done in the first iteration and verification in the second.
457      // If there are more than 2 passes, something funny has happened.
458      if (curIter > 2) {
459        if (curIter == maxIterations) {
460          LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
461              + "Tables integrity may not be fully repaired!");
462        } else {
463          LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
464        }
465      }
466    }
467  }
468
469  /**
470   * This repair method requires the cluster to be online since it contacts
471   * region servers and the masters.  It makes each region's state in HDFS, in
472   * .META., and deployments consistent.
473   *
474   * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable
475   * error.  If 0, we have a clean hbase.
476   */
477  public int onlineConsistencyRepair() throws IOException, KeeperException,
478    InterruptedException {
479    clearState();
480
481    LOG.info("Loading regionsinfo from the .META. table");
482    boolean success = loadMetaEntries();
483    if (!success) return -1;
484
485    // Check if .META. is found only once and in the right place
486    if (!checkMetaRegion()) {
487      // Will remove later if we can fix it
488      errors.reportError("Encountered fatal error. Exiting...");
489      return -2;
490    }
491
492    // get a list of all tables that have not changed recently.
493    if (!checkMetaOnly) {
494      reportTablesInFlux();
495    }
496
497    // get regions according to what is online on each RegionServer
498    loadDeployedRegions();
499
500    // load regiondirs and regioninfos from HDFS
501    if (shouldCheckHdfs()) {
502      loadHdfsRegionDirs();
503      loadHdfsRegionInfos();
504    }
505
506    // Empty cells in .META.?
507    reportEmptyMetaCells();
508
509    // Get disabled tables from ZooKeeper
510    loadDisabledTables();
511
512    // fix the orphan tables
513    fixOrphanTables();
514
515    // Check and fix consistency
516    checkAndFixConsistency();
517
518    // Check integrity (does not fix)
519    checkIntegrity();
520    return errors.getErrorList().size();
521  }
522
523  /**
524   * Contacts the master and prints out cluster-wide information
525   * @return 0 on success, non-zero on failure
526   */
527  public int onlineHbck() throws IOException, KeeperException, InterruptedException {
528    // print hbase server version
529    errors.print("Version: " + status.getHBaseVersion());
530    offlineHdfsIntegrityRepair();
531
532    // turn the balancer off
533    boolean oldBalancer = admin.setBalancerRunning(false, true);
534    try {
535      onlineConsistencyRepair();
536    }
537    finally {
538      admin.setBalancerRunning(oldBalancer, false);
539    }
540
541    if (checkRegionBoundaries) {
542      checkRegionBoundaries();
543    }
544
545    offlineReferenceFileRepair();
546
547    // Remove the hbck lock
548    unlockHbck();
549
550    // Print table summary
551    printTableSummary(tablesInfo);
552    return errors.summarize();
553  }
554
555    public static byte[] keyOnly (byte[] b) {
556        if (b == null)
557          return b;
558        int rowlength = Bytes.toShort(b, 0);
559        byte[] result = new byte[rowlength];
560        System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
561        return result;
562      }
563    
564      private static class RegionBoundariesInformation {
565        public byte [] regionName;
566        public byte [] metaFirstKey;
567        public byte [] metaLastKey;
568        public byte [] storesFirstKey;
569        public byte [] storesLastKey;
570        public String toString () {
571          return "regionName=" + Bytes.toStringBinary(regionName) +
572                 "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
573                 "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
574                 "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
575                 "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
576        }
577      }
578    
579      public void checkRegionBoundaries() {
580        try {
581          ByteArrayComparator comparator = new ByteArrayComparator();
582          List<HRegionInfo> regions = MetaScanner.listAllRegions(getConf(), false);
583          final RegionBoundariesInformation currentRegionBoundariesInformation =
584              new RegionBoundariesInformation();
585          for (HRegionInfo regionInfo : regions) {
586            currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
587            // For each region, get the start and stop key from the META and compare them to the
588            // same information from the Stores.
589            Path path = new Path(getConf().get(HConstants.HBASE_DIR) + "/"
590                + Bytes.toString(regionInfo.getTableName()) + "/"
591                + regionInfo.getEncodedName() + "/");
592            FileSystem fs = path.getFileSystem(getConf());
593            FileStatus[] files = fs.listStatus(path);
594            // For all the column families in this region...
595            byte[] storeFirstKey = null;
596            byte[] storeLastKey = null;
597            for (FileStatus file : files) {
598              String fileName = file.getPath().toString();
599              fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
600              if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
601                FileStatus[] storeFiles = fs.listStatus(file.getPath());
602                // For all the stores in this column family.
603                for (FileStatus storeFile : storeFiles) {
604                  HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
605                      getConf()));
606                  if ((reader.getFirstKey() != null)
607                      && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
608                          reader.getFirstKey()) > 0))) {
609                    storeFirstKey = reader.getFirstKey();
610                  }
611                  if ((reader.getLastKey() != null)
612                      && ((storeLastKey == null) || (comparator.compare(storeLastKey,
613                          reader.getLastKey())) < 0)) {
614                    storeLastKey = reader.getLastKey();
615                  }
616                  reader.close();
617                }
618              }
619            }
620            currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
621            currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
622            currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
623            currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
624            if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
625              currentRegionBoundariesInformation.metaFirstKey = null;
626            if (currentRegionBoundariesInformation.metaLastKey.length == 0)
627              currentRegionBoundariesInformation.metaLastKey = null;
628    
629            // For a region to be correct, we need the META start key to be smaller or equal to the
630            // smallest start key from all the stores, and the start key from the next META entry to
631            // be bigger than the last key from all the current stores. First region start key is null;
632            // Last region end key is null; some regions can be empty and not have any store.
633    
634            boolean valid = true;
635            // Checking start key.
636            if ((currentRegionBoundariesInformation.storesFirstKey != null)
637                && (currentRegionBoundariesInformation.metaFirstKey != null)) {
638              valid = valid
639                  && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
640                    currentRegionBoundariesInformation.metaFirstKey) >= 0;
641            }
642            // Checking stop key.
643            if ((currentRegionBoundariesInformation.storesLastKey != null)
644                && (currentRegionBoundariesInformation.metaLastKey != null)) {
645              valid = valid
646                  && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
647                    currentRegionBoundariesInformation.metaLastKey) < 0;
648            }
649            if (!valid) {
650              errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
651                tablesInfo.get(Bytes.toString(regionInfo.getTableName())));
652              LOG.warn("Region's boundaries not alligned between stores and META for:");
653              LOG.warn(currentRegionBoundariesInformation);
654            }
655          }
656        } catch (IOException e) {
657          LOG.error(e);
658        }
659      }
660    
661  /**
662   * Iterates through the list of all orphan/invalid regiondirs.
663   */
664  private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
665    for (HbckInfo hi : orphanHdfsDirs) {
666      LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
667      adoptHdfsOrphan(hi);
668    }
669  }
670
671  /**
672   * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
673   * these orphans by creating a new region, and moving the column families,
674   * recovered edits, HLogs, into the new region dir.  We determine the region
675   * startkey and endkeys by looking at all of the hfiles inside the column
676   * families to identify the min and max keys. The resulting region will
677   * likely violate table integrity but will be dealt with by merging
678   * overlapping regions.
679   */
680  private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
681    Path p = hi.getHdfsRegionDir();
682    FileSystem fs = p.getFileSystem(getConf());
683    FileStatus[] dirs = fs.listStatus(p);
684    if (dirs == null) {
685      LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
686          p + ". This dir could probably be deleted.");
687      return ;
688    }
689
690    String tableName = Bytes.toString(hi.getTableName());
691    TableInfo tableInfo = tablesInfo.get(tableName);
692    Preconditions.checkNotNull("Table " + tableName + "' not present!", tableInfo);
693    HTableDescriptor template = tableInfo.getHTD();
694
695    // find min and max key values
696    Pair<byte[],byte[]> orphanRegionRange = null;
697    for (FileStatus cf : dirs) {
698      String cfName= cf.getPath().getName();
699      // TODO Figure out what the special dirs are
700      if (cfName.startsWith(".") || cfName.equals("splitlog")) continue;
701
702      FileStatus[] hfiles = fs.listStatus(cf.getPath());
703      for (FileStatus hfile : hfiles) {
704        byte[] start, end;
705        HFile.Reader hf = null;
706        try {
707          CacheConfig cacheConf = new CacheConfig(getConf());
708          hf = HFile.createReader(fs, hfile.getPath(), cacheConf);
709          hf.loadFileInfo();
710          KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
711          start = startKv.getRow();
712          KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
713          end = endKv.getRow();
714        } catch (IOException ioe) {
715          LOG.warn("Problem reading orphan file " + hfile + ", skipping");
716          continue;
717        } catch (NullPointerException ioe) {
718          LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
719          continue;
720        } finally {
721          if (hf != null) {
722            hf.close();
723          }
724        }
725
726        // expand the range to include the range of all hfiles
727        if (orphanRegionRange == null) {
728          // first range
729          orphanRegionRange = new Pair<byte[], byte[]>(start, end);
730        } else {
731          // TODO add test
732
733          // expand range only if the hfile is wider.
734          if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
735            orphanRegionRange.setFirst(start);
736          }
737          if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
738            orphanRegionRange.setSecond(end);
739          }
740        }
741      }
742    }
743    if (orphanRegionRange == null) {
744      LOG.warn("No data in dir " + p + ", sidelining data");
745      fixes++;
746      sidelineRegionDir(fs, hi);
747      return;
748    }
749    LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
750        Bytes.toString(orphanRegionRange.getSecond()) + ")");
751
752    // create new region on hdfs.  move data into place.
753    HRegionInfo hri = new HRegionInfo(template.getName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
754    LOG.info("Creating new region : " + hri);
755    HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
756    Path target = region.getRegionDir();
757
758    // rename all the data to new region
759    mergeRegionDirs(target, hi);
760    fixes++;
761  }
762
763  /**
764   * This method determines if there are table integrity errors in HDFS.  If
765   * there are errors and the appropriate "fix" options are enabled, the method
766   * will first correct orphan regions making them into legit regiondirs, and
767   * then reload to merge potentially overlapping regions.
768   *
769   * @return number of table integrity errors found
770   */
771  private int restoreHdfsIntegrity() throws IOException, InterruptedException {
772    // Determine what's on HDFS
773    LOG.info("Loading HBase regioninfo from HDFS...");
774    loadHdfsRegionDirs(); // populating regioninfo table.
775
776    int errs = errors.getErrorList().size();
777    // First time just get suggestions.
778    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
779    checkHdfsIntegrity(false, false);
780
781    if (errors.getErrorList().size() == errs) {
782      LOG.info("No integrity errors.  We are done with this phase. Glorious.");
783      return 0;
784    }
785
786    if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
787      adoptHdfsOrphans(orphanHdfsDirs);
788      // TODO optimize by incrementally adding instead of reloading.
789    }
790
791    // Make sure there are no holes now.
792    if (shouldFixHdfsHoles()) {
793      clearState(); // this also resets # fixes.
794      loadHdfsRegionDirs();
795      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
796      tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
797    }
798
799    // Now we fix overlaps
800    if (shouldFixHdfsOverlaps()) {
801      // second pass we fix overlaps.
802      clearState(); // this also resets # fixes.
803      loadHdfsRegionDirs();
804      tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
805      tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
806    }
807
808    return errors.getErrorList().size();
809  }
810
811  /**
812   * Scan all the store file names to find any lingering reference files,
813   * which refer to some none-exiting files. If "fix" option is enabled,
814   * any lingering reference file will be sidelined if found.
815   * <p>
816   * Lingering reference file prevents a region from opening. It has to
817   * be fixed before a cluster can start properly.
818   */
819  private void offlineReferenceFileRepair() throws IOException {
820    Configuration conf = getConf();
821    Path hbaseRoot = FSUtils.getRootDir(conf);
822    FileSystem fs = hbaseRoot.getFileSystem(conf);
823    Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot);
824    for (Path path: allFiles.values()) {
825      boolean isReference = false;
826      try {
827        isReference = StoreFile.isReference(path);
828      } catch (Throwable t) {
829        // Ignore. Some files may not be store files at all.
830        // For example, files under .oldlogs folder in .META.
831        // Warning message is already logged by
832        // StoreFile#isReference.
833      }
834      if (!isReference) continue;
835
836      Path referredToFile = StoreFile.getReferredToFile(path);
837      if (fs.exists(referredToFile)) continue;  // good, expected
838
839      // Found a lingering reference file
840      errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
841        "Found lingering reference file " + path);
842      if (!shouldFixReferenceFiles()) continue;
843
844      // Now, trying to fix it since requested
845      boolean success = false;
846      String pathStr = path.toString();
847
848      // A reference file path should be like
849      // ${hbase.rootdir}/table_name/region_id/family_name/referred_file.region_name
850      // Up 3 directories to get the table folder.
851      // So the file will be sidelined to a similar folder structure.
852      int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
853      for (int i = 0; index > 0 && i < 3; i++) {
854        index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index);
855      }
856      if (index > 0) {
857        Path rootDir = getSidelineDir();
858        Path dst = new Path(rootDir, pathStr.substring(index));
859        fs.mkdirs(dst.getParent());
860        LOG.info("Trying to sildeline reference file"
861          + path + " to " + dst);
862        setShouldRerun();
863
864        success = fs.rename(path, dst);
865      }
866      if (!success) {
867        LOG.error("Failed to sideline reference file " + path);
868      }
869    }
870  }
871
872  /**
873   * TODO -- need to add tests for this.
874   */
875  private void reportEmptyMetaCells() {
876    errors.print("Number of empty REGIONINFO_QUALIFIER rows in .META.: " +
877      emptyRegionInfoQualifiers.size());
878    if (details) {
879      for (Result r: emptyRegionInfoQualifiers) {
880        errors.print("  " + r);
881      }
882    }
883  }
884
885  /**
886   * TODO -- need to add tests for this.
887   */
888  private void reportTablesInFlux() {
889    AtomicInteger numSkipped = new AtomicInteger(0);
890    HTableDescriptor[] allTables = getTables(numSkipped);
891    errors.print("Number of Tables: " + allTables.length);
892    if (details) {
893      if (numSkipped.get() > 0) {
894        errors.detail("Number of Tables in flux: " + numSkipped.get());
895      }
896      for (HTableDescriptor td : allTables) {
897        String tableName = td.getNameAsString();
898        errors.detail("  Table: " + tableName + "\t" +
899                           (td.isReadOnly() ? "ro" : "rw") + "\t" +
900                           (td.isRootRegion() ? "ROOT" :
901                            (td.isMetaRegion() ? "META" : "    ")) + "\t" +
902                           " families: " + td.getFamilies().size());
903      }
904    }
905  }
906
907  public ErrorReporter getErrors() {
908    return errors;
909  }
910
911  /**
912   * Read the .regioninfo file from the file system.  If there is no
913   * .regioninfo, add it to the orphan hdfs region list.
914   */
915  private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
916    Path regionDir = hbi.getHdfsRegionDir();
917    if (regionDir == null) {
918      LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
919      return;
920    }
921
922    if (hbi.hdfsEntry.hri != null) {
923      // already loaded data
924      return;
925    }
926
927    Path regioninfo = new Path(regionDir, HRegion.REGIONINFO_FILE);
928    FileSystem fs = regioninfo.getFileSystem(getConf());
929
930    FSDataInputStream in = fs.open(regioninfo);
931    HRegionInfo hri = new HRegionInfo();
932    hri.readFields(in);
933    in.close();
934    LOG.debug("HRegionInfo read: " + hri.toString());
935    hbi.hdfsEntry.hri = hri;
936  }
937
938  /**
939   * Exception thrown when a integrity repair operation fails in an
940   * unresolvable way.
941   */
942  public static class RegionRepairException extends IOException {
943    private static final long serialVersionUID = 1L;
944    final IOException ioe;
945    public RegionRepairException(String s, IOException ioe) {
946      super(s);
947      this.ioe = ioe;
948    }
949  }
950
951  /**
952   * Populate hbi's from regionInfos loaded from file system.
953   */
954  private SortedMap<String, TableInfo> loadHdfsRegionInfos() throws IOException, InterruptedException {
955    tablesInfo.clear(); // regenerating the data
956    // generate region split structure
957    Collection<HbckInfo> hbckInfos = regionInfoMap.values();
958
959    // Parallelized read of .regioninfo files.
960    List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
961    List<Future<Void>> hbiFutures;
962
963    for (HbckInfo hbi : hbckInfos) {
964      WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
965      hbis.add(work);
966    }
967
968    // Submit and wait for completion
969    hbiFutures = executor.invokeAll(hbis);
970
971    for(int i=0; i<hbiFutures.size(); i++) {
972      WorkItemHdfsRegionInfo work = hbis.get(i);
973      Future<Void> f = hbiFutures.get(i);
974      try {
975        f.get();
976      } catch(ExecutionException e) {
977        LOG.warn("Failed to read .regioninfo file for region " +
978              work.hbi.getRegionNameAsString(), e.getCause());
979      }
980    }
981
982    // serialized table info gathering.
983    for (HbckInfo hbi: hbckInfos) {
984
985      if (hbi.getHdfsHRI() == null) {
986        // was an orphan
987        continue;
988      }
989
990
991      // get table name from hdfs, populate various HBaseFsck tables.
992      String tableName = Bytes.toString(hbi.getTableName());
993      if (tableName == null) {
994        // There was an entry in META not in the HDFS?
995        LOG.warn("tableName was null for: " + hbi);
996        continue;
997      }
998
999      TableInfo modTInfo = tablesInfo.get(tableName);
1000      if (modTInfo == null) {
1001        // only executed once per table.
1002        modTInfo = new TableInfo(tableName);
1003        Path hbaseRoot = FSUtils.getRootDir(getConf());
1004        tablesInfo.put(tableName, modTInfo);
1005        try {
1006          HTableDescriptor htd =
1007              FSTableDescriptors.getTableDescriptorFromFs(hbaseRoot.getFileSystem(getConf()),
1008              hbaseRoot, tableName);
1009          modTInfo.htds.add(htd);
1010        } catch (IOException ioe) {
1011          if (!orphanTableDirs.containsKey(tableName)) {
1012            LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1013            //should only report once for each table
1014            errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1015                "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1016            Set<String> columns = new HashSet<String>();
1017            orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1018          }
1019        }
1020      }
1021      if (!hbi.isSkipChecks()) {
1022        modTInfo.addRegionInfo(hbi);
1023      }
1024    }
1025
1026    return tablesInfo;
1027  }
1028
1029  /**
1030   * To get the column family list according to the column family dirs
1031   * @param columns
1032   * @param hbi
1033   * @return
1034   * @throws IOException
1035   */
1036  private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1037    Path regionDir = hbi.getHdfsRegionDir();
1038    FileSystem fs = regionDir.getFileSystem(getConf());
1039    FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1040    for (FileStatus subdir : subDirs) {
1041      String columnfamily = subdir.getPath().getName();
1042      columns.add(columnfamily);
1043    }
1044    return columns;
1045  }
1046
1047  /**
1048   * To fabricate a .tableinfo file with following contents<br>
1049   * 1. the correct tablename <br>
1050   * 2. the correct colfamily list<br>
1051   * 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1052   * @param tableName
1053   * @throws IOException
1054   */
1055  private boolean fabricateTableInfo(String tableName, Set<String> columns) throws IOException {
1056    if (columns ==null || columns.isEmpty()) return false;
1057    HTableDescriptor htd = new HTableDescriptor(tableName);
1058    for (String columnfamimly : columns) {
1059      htd.addFamily(new HColumnDescriptor(columnfamimly));
1060    }
1061    FSTableDescriptors.createTableDescriptor(htd, getConf(), true);
1062    return true;
1063  }
1064
1065  /**
1066   * To fix orphan table by creating a .tableinfo file under tableDir <br>
1067   * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1068   * 2. else create a default .tableinfo file with following items<br>
1069   * &nbsp;2.1 the correct tablename <br>
1070   * &nbsp;2.2 the correct colfamily list<br>
1071   * &nbsp;2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1072   * @throws IOException
1073   */
1074  public void fixOrphanTables() throws IOException {
1075    if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1076
1077      Path hbaseRoot = FSUtils.getRootDir(getConf());
1078      List<String> tmpList = new ArrayList<String>();
1079      tmpList.addAll(orphanTableDirs.keySet());
1080      HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1081      Iterator<Entry<String, Set<String>>> iter = orphanTableDirs.entrySet().iterator();
1082      int j = 0;
1083      int numFailedCase = 0;
1084      while (iter.hasNext()) {
1085        Entry<String, Set<String>> entry = (Entry<String, Set<String>>) iter.next();
1086        String tableName = entry.getKey();
1087        LOG.info("Trying to fix orphan table error: " + tableName);
1088        if (j < htds.length) {
1089          if (tableName.equals(Bytes.toString(htds[j].getName()))) {
1090            HTableDescriptor htd = htds[j];
1091            LOG.info("fixing orphan table: " + tableName + " from cache");
1092            FSTableDescriptors.createTableDescriptor(
1093                hbaseRoot.getFileSystem(getConf()), hbaseRoot, htd, true);
1094            j++;
1095            iter.remove();
1096          }
1097        } else {
1098          if (fabricateTableInfo(tableName, entry.getValue())) {
1099            LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1100            LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1101            iter.remove();
1102          } else {
1103            LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1104            numFailedCase++;
1105          }
1106        }
1107        fixes++;
1108      }
1109
1110      if (orphanTableDirs.isEmpty()) {
1111        // all orphanTableDirs are luckily recovered
1112        // re-run doFsck after recovering the .tableinfo file
1113        setShouldRerun();
1114        LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1115      } else if (numFailedCase > 0) {
1116        LOG.error("Failed to fix " + numFailedCase
1117            + " OrphanTables with default .tableinfo files");
1118      }
1119
1120    }
1121    //cleanup the list
1122    orphanTableDirs.clear();
1123
1124  }
1125
1126  /**
1127   * This borrows code from MasterFileSystem.bootstrap()
1128   * 
1129   * @return an open .META. HRegion
1130   */
1131  private HRegion createNewRootAndMeta() throws IOException {
1132    Path rootdir = new Path(getConf().get(HConstants.HBASE_DIR));
1133    Configuration c = getConf();
1134    HRegionInfo rootHRI = new HRegionInfo(HRegionInfo.ROOT_REGIONINFO);
1135    MasterFileSystem.setInfoFamilyCachingForRoot(false);
1136    HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1137    MasterFileSystem.setInfoFamilyCachingForMeta(false);
1138    HRegion root = HRegion.createHRegion(rootHRI, rootdir, c,
1139        HTableDescriptor.ROOT_TABLEDESC);
1140    HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c,
1141        HTableDescriptor.META_TABLEDESC);
1142    MasterFileSystem.setInfoFamilyCachingForRoot(true);
1143    MasterFileSystem.setInfoFamilyCachingForMeta(true);
1144
1145    // Add first region from the META table to the ROOT region.
1146    HRegion.addRegionToMETA(root, meta);
1147    root.close();
1148    root.getLog().closeAndDelete();
1149    return meta;
1150  }
1151
1152  /**
1153   * Generate set of puts to add to new meta.  This expects the tables to be 
1154   * clean with no overlaps or holes.  If there are any problems it returns null.
1155   * 
1156   * @return An array list of puts to do in bulk, null if tables have problems
1157   */
1158  private ArrayList<Put> generatePuts(SortedMap<String, TableInfo> tablesInfo) throws IOException {
1159    ArrayList<Put> puts = new ArrayList<Put>();
1160    boolean hasProblems = false;
1161    for (Entry<String, TableInfo> e : tablesInfo.entrySet()) {
1162      String name = e.getKey();
1163
1164      // skip "-ROOT-" and ".META."
1165      if (Bytes.compareTo(Bytes.toBytes(name), HConstants.ROOT_TABLE_NAME) == 0
1166          || Bytes.compareTo(Bytes.toBytes(name), HConstants.META_TABLE_NAME) == 0) {
1167        continue;
1168      }
1169
1170      TableInfo ti = e.getValue();
1171      for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1172          .entrySet()) {
1173        Collection<HbckInfo> his = spl.getValue();
1174        int sz = his.size();
1175        if (sz != 1) {
1176          // problem
1177          LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1178              + " had " +  sz + " regions instead of exactly 1." );
1179          hasProblems = true;
1180          continue;
1181        }
1182
1183        // add the row directly to meta.
1184        HbckInfo hi = his.iterator().next();
1185        HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
1186        Put p = new Put(hri.getRegionName());
1187        p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
1188            Writables.getBytes(hri));
1189        puts.add(p);
1190      }
1191    }
1192    return hasProblems ? null : puts;
1193  }
1194
1195  /**
1196   * Suggest fixes for each table
1197   */
1198  private void suggestFixes(SortedMap<String, TableInfo> tablesInfo) throws IOException {
1199    for (TableInfo tInfo : tablesInfo.values()) {
1200      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1201      tInfo.checkRegionChain(handler);
1202    }
1203  }
1204
1205  /**
1206   * Rebuilds meta from information in hdfs/fs.  Depends on configuration
1207   * settings passed into hbck constructor to point to a particular fs/dir.
1208   * 
1209   * @param fix flag that determines if method should attempt to fix holes
1210   * @return true if successful, false if attempt failed.
1211   */
1212  public boolean rebuildMeta(boolean fix) throws IOException,
1213      InterruptedException {
1214
1215    // TODO check to make sure hbase is offline. (or at least the table
1216    // currently being worked on is off line)
1217
1218    // Determine what's on HDFS
1219    LOG.info("Loading HBase regioninfo from HDFS...");
1220    loadHdfsRegionDirs(); // populating regioninfo table.
1221
1222    int errs = errors.getErrorList().size();
1223    tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1224    checkHdfsIntegrity(false, false);
1225
1226    // make sure ok.
1227    if (errors.getErrorList().size() != errs) {
1228      // While in error state, iterate until no more fixes possible
1229      while(true) {
1230        fixes = 0;
1231        suggestFixes(tablesInfo);
1232        errors.clear();
1233        loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1234        checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1235
1236        int errCount = errors.getErrorList().size();
1237
1238        if (fixes == 0) {
1239          if (errCount > 0) {
1240            return false; // failed to fix problems.
1241          } else {
1242            break; // no fixes and no problems? drop out and fix stuff!
1243          }
1244        }
1245      }
1246    }
1247
1248    // we can rebuild, move old root and meta out of the way and start
1249    LOG.info("HDFS regioninfo's seems good.  Sidelining old .META.");
1250    Path backupDir = sidelineOldRootAndMeta();
1251
1252    LOG.info("Creating new .META.");
1253    HRegion meta = createNewRootAndMeta();
1254
1255    // populate meta
1256    List<Put> puts = generatePuts(tablesInfo);
1257    if (puts == null) {
1258      LOG.fatal("Problem encountered when creating new .META. entries.  " +
1259        "You may need to restore the previously sidelined -ROOT- and .META.");
1260      return false;
1261    }
1262    meta.put(puts.toArray(new Put[0]));
1263    meta.close();
1264    meta.getLog().closeAndDelete();
1265    LOG.info("Success! .META. table rebuilt.");
1266    LOG.info("Old -ROOT- and .META. are moved into " + backupDir);
1267    return true;
1268  }
1269
1270  private SortedMap<String, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1271      boolean fixOverlaps) throws IOException {
1272    LOG.info("Checking HBase region split map from HDFS data...");
1273    for (TableInfo tInfo : tablesInfo.values()) {
1274      TableIntegrityErrorHandler handler;
1275      if (fixHoles || fixOverlaps) {
1276        handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1277          fixHoles, fixOverlaps);
1278      } else {
1279        handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1280      }
1281      if (!tInfo.checkRegionChain(handler)) {
1282        // should dump info as well.
1283        errors.report("Found inconsistency in table " + tInfo.getName());
1284      }
1285    }
1286    return tablesInfo;
1287  }
1288
1289  private Path getSidelineDir() throws IOException {
1290    if (sidelineDir == null) {
1291      Path hbaseDir = FSUtils.getRootDir(getConf());
1292      Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1293      sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1294          + startMillis);
1295    }
1296    return sidelineDir;
1297  }
1298
1299  /**
1300   * Sideline a region dir (instead of deleting it)
1301   */
1302  Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1303    return sidelineRegionDir(fs, null, hi);
1304  }
1305
1306  /**
1307   * Sideline a region dir (instead of deleting it)
1308   *
1309   * @param parentDir if specified, the region will be sidelined to
1310   * folder like .../parentDir/<table name>/<region name>. The purpose
1311   * is to group together similar regions sidelined, for example, those
1312   * regions should be bulk loaded back later on. If null, it is ignored.
1313   */
1314  Path sidelineRegionDir(FileSystem fs,
1315      String parentDir, HbckInfo hi) throws IOException {
1316    String tableName = Bytes.toString(hi.getTableName());
1317    Path regionDir = hi.getHdfsRegionDir();
1318
1319    if (!fs.exists(regionDir)) {
1320      LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1321      return null;
1322    }
1323
1324    Path rootDir = getSidelineDir();
1325    if (parentDir != null) {
1326      rootDir = new Path(rootDir, parentDir);
1327    }
1328    Path sidelineTableDir= new Path(rootDir, tableName);
1329    Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1330    fs.mkdirs(sidelineRegionDir);
1331    boolean success = false;
1332    FileStatus[] cfs =  fs.listStatus(regionDir);
1333    if (cfs == null) {
1334      LOG.info("Region dir is empty: " + regionDir);
1335    } else {
1336      for (FileStatus cf : cfs) {
1337        Path src = cf.getPath();
1338        Path dst =  new Path(sidelineRegionDir, src.getName());
1339        if (fs.isFile(src)) {
1340          // simple file
1341          success = fs.rename(src, dst);
1342          if (!success) {
1343            String msg = "Unable to rename file " + src +  " to " + dst;
1344            LOG.error(msg);
1345            throw new IOException(msg);
1346          }
1347          continue;
1348        }
1349
1350        // is a directory.
1351        fs.mkdirs(dst);
1352
1353        LOG.info("Sidelining files from " + src + " into containing region " + dst);
1354        // FileSystem.rename is inconsistent with directories -- if the
1355        // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1356        // it moves the src into the dst dir resulting in (foo/a/b).  If
1357        // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1358        FileStatus[] hfiles = fs.listStatus(src);
1359        if (hfiles != null && hfiles.length > 0) {
1360          for (FileStatus hfile : hfiles) {
1361            success = fs.rename(hfile.getPath(), dst);
1362            if (!success) {
1363              String msg = "Unable to rename file " + src +  " to " + dst;
1364              LOG.error(msg);
1365              throw new IOException(msg);
1366            }
1367          }
1368        }
1369        LOG.debug("Sideline directory contents:");
1370        debugLsr(sidelineRegionDir);
1371      }
1372    }
1373
1374    LOG.info("Removing old region dir: " + regionDir);
1375    success = fs.delete(regionDir, true);
1376    if (!success) {
1377      String msg = "Unable to delete dir " + regionDir;
1378      LOG.error(msg);
1379      throw new IOException(msg);
1380    }
1381    return sidelineRegionDir;
1382  }
1383
1384  /**
1385   * Side line an entire table.
1386   */
1387  void sidelineTable(FileSystem fs, byte[] table, Path hbaseDir,
1388      Path backupHbaseDir) throws IOException {
1389    String tableName = Bytes.toString(table);
1390    Path tableDir = new Path(hbaseDir, tableName);
1391    if (fs.exists(tableDir)) {
1392      Path backupTableDir= new Path(backupHbaseDir, tableName);
1393      boolean success = fs.rename(tableDir, backupTableDir);
1394      if (!success) {
1395        throw new IOException("Failed to move  " + tableName + " from " 
1396            +  tableDir.getName() + " to " + backupTableDir.getName());
1397      }
1398    } else {
1399      LOG.info("No previous " + tableName +  " exists.  Continuing.");
1400    }
1401  }
1402
1403  /**
1404   * @return Path to backup of original directory
1405   */
1406  Path sidelineOldRootAndMeta() throws IOException {
1407    // put current -ROOT- and .META. aside.
1408    Path hbaseDir = new Path(getConf().get(HConstants.HBASE_DIR));
1409    FileSystem fs = hbaseDir.getFileSystem(getConf());
1410    Path backupDir = getSidelineDir();
1411    fs.mkdirs(backupDir);
1412
1413    sidelineTable(fs, HConstants.ROOT_TABLE_NAME, hbaseDir, backupDir);
1414    try {
1415      sidelineTable(fs, HConstants.META_TABLE_NAME, hbaseDir, backupDir);
1416    } catch (IOException e) {
1417      LOG.error("Attempt to sideline meta failed, attempt to revert...", e);
1418      try {
1419        // move it back.
1420        sidelineTable(fs, HConstants.ROOT_TABLE_NAME, backupDir, hbaseDir);
1421        LOG.warn("... revert succeed.  -ROOT- and .META. still in "
1422            + "original state.");
1423      } catch (IOException ioe) {
1424        LOG.fatal("... failed to sideline root and meta and failed to restore "
1425            + "prevoius state.  Currently in inconsistent state.  To restore "
1426            + "try to rename -ROOT- in " + backupDir.getName() + " to " 
1427            + hbaseDir.getName() + ".", ioe);
1428      }
1429      throw e; // throw original exception
1430    }
1431    return backupDir;
1432  }
1433
1434  /**
1435   * Load the list of disabled tables in ZK into local set.
1436   * @throws ZooKeeperConnectionException
1437   * @throws IOException
1438   */
1439  private void loadDisabledTables()
1440  throws ZooKeeperConnectionException, IOException {
1441    HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1442      @Override
1443      public Void connect(HConnection connection) throws IOException {
1444        ZooKeeperWatcher zkw = connection.getZooKeeperWatcher();
1445        try {
1446          for (String tableName : ZKTableReadOnly.getDisabledOrDisablingTables(zkw)) {
1447            disabledTables.add(Bytes.toBytes(tableName));
1448          }
1449        } catch (KeeperException ke) {
1450          throw new IOException(ke);
1451        }
1452        return null;
1453      }
1454    });
1455  }
1456
1457  /**
1458   * Check if the specified region's table is disabled.
1459   */
1460  private boolean isTableDisabled(HRegionInfo regionInfo) {
1461    return disabledTables.contains(regionInfo.getTableName());
1462  }
1463
1464  /**
1465   * Scan HDFS for all regions, recording their information into
1466   * regionInfoMap
1467   */
1468  public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1469    Path rootDir = new Path(getConf().get(HConstants.HBASE_DIR));
1470    FileSystem fs = rootDir.getFileSystem(getConf());
1471
1472    // list all tables from HDFS
1473    List<FileStatus> tableDirs = Lists.newArrayList();
1474
1475    boolean foundVersionFile = false;
1476    FileStatus[] files = fs.listStatus(rootDir);
1477    for (FileStatus file : files) {
1478      String dirName = file.getPath().getName();
1479      if (dirName.equals(HConstants.VERSION_FILE_NAME)) {
1480        foundVersionFile = true;
1481      } else {
1482        if ((!checkMetaOnly && isTableIncluded(dirName)) ||
1483            dirName.equals("-ROOT-") ||
1484            dirName.equals(".META.")) {
1485          tableDirs.add(file);
1486        }
1487      }
1488    }
1489
1490    // verify that version file exists
1491    if (!foundVersionFile) {
1492      errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1493          "Version file does not exist in root dir " + rootDir);
1494      if (shouldFixVersionFile()) {
1495        LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1496            + " file.");
1497        setShouldRerun();
1498        FSUtils.setVersion(fs, rootDir, getConf().getInt(
1499            HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1500            HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1501            HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1502      }
1503    }
1504
1505    // level 1:  <HBASE_DIR>/*
1506    List<WorkItemHdfsDir> dirs = new ArrayList<WorkItemHdfsDir>(tableDirs.size());
1507    List<Future<Void>> dirsFutures;
1508
1509    for (FileStatus tableDir : tableDirs) {
1510      LOG.debug("Loading region dirs from " +tableDir.getPath());
1511      dirs.add(new WorkItemHdfsDir(this, fs, errors, tableDir));
1512    }
1513
1514    // Invoke and wait for Callables to complete
1515    dirsFutures = executor.invokeAll(dirs);
1516
1517    for(Future<Void> f: dirsFutures) {
1518      try {
1519        f.get();
1520      } catch(ExecutionException e) {
1521        LOG.warn("Could not load region dir " , e.getCause());
1522      }
1523    }
1524  }
1525
1526  /**
1527   * Record the location of the ROOT region as found in ZooKeeper,
1528   * as if it were in a META table. This is so that we can check
1529   * deployment of ROOT.
1530   */
1531  private boolean recordRootRegion() throws IOException {
1532    HRegionLocation rootLocation = connection.locateRegion(
1533      HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
1534
1535    // Check if Root region is valid and existing
1536    if (rootLocation == null || rootLocation.getRegionInfo() == null ||
1537        rootLocation.getHostname() == null) {
1538      errors.reportError(ERROR_CODE.NULL_ROOT_REGION,
1539        "Root Region or some of its attributes are null.");
1540      return false;
1541    }
1542    ServerName sn;
1543    try {
1544      sn = getRootRegionServerName();
1545    } catch (InterruptedException e) {
1546      throw new IOException("Interrupted", e);
1547    }
1548    MetaEntry m =
1549      new MetaEntry(rootLocation.getRegionInfo(), sn, System.currentTimeMillis());
1550    HbckInfo hbInfo = new HbckInfo(m);
1551    regionInfoMap.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo);
1552    return true;
1553  }
1554
1555  private ServerName getRootRegionServerName()
1556  throws IOException, InterruptedException {
1557    RootRegionTracker rootRegionTracker =
1558      new RootRegionTracker(this.connection.getZooKeeperWatcher(), new Abortable() {
1559        @Override
1560        public void abort(String why, Throwable e) {
1561          LOG.error(why, e);
1562          System.exit(1);
1563        }
1564        @Override
1565        public boolean isAborted(){
1566          return false;
1567        }
1568        
1569      });
1570    rootRegionTracker.start();
1571    ServerName sn = null;
1572    try {
1573      sn = rootRegionTracker.getRootRegionLocation();
1574    } finally {
1575      rootRegionTracker.stop();
1576    }
1577    return sn;
1578  }
1579
1580  /**
1581   * Contacts each regionserver and fetches metadata about regions.
1582   * @param regionServerList - the list of region servers to connect to
1583   * @throws IOException if a remote or network exception occurs
1584   */
1585  void processRegionServers(Collection<ServerName> regionServerList)
1586    throws IOException, InterruptedException {
1587
1588    List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1589    List<Future<Void>> workFutures;
1590
1591    // loop to contact each region server in parallel
1592    for (ServerName rsinfo: regionServerList) {
1593      workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1594    }
1595    
1596    workFutures = executor.invokeAll(workItems);
1597
1598    for(int i=0; i<workFutures.size(); i++) {
1599      WorkItemRegion item = workItems.get(i);
1600      Future<Void> f = workFutures.get(i);
1601      try {
1602        f.get();
1603      } catch(ExecutionException e) {
1604        LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1605            e.getCause());
1606      }
1607    }
1608  }
1609
1610  /**
1611   * Check consistency of all regions that have been found in previous phases.
1612   */
1613  private void checkAndFixConsistency()
1614  throws IOException, KeeperException, InterruptedException {
1615    for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1616      checkRegionConsistency(e.getKey(), e.getValue());
1617    }
1618  }
1619
1620  private void preCheckPermission() throws IOException, AccessControlException {
1621    if (shouldIgnorePreCheckPermission()) {
1622      return;
1623    }
1624
1625    Configuration conf = getConf();
1626    Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR));
1627    FileSystem fs = hbaseDir.getFileSystem(conf);
1628    UserProvider provider = UserProvider.instantiate(conf);
1629    User user = provider.getCurrent();
1630    FileStatus[] files = fs.listStatus(hbaseDir);
1631    for (FileStatus file : files) {
1632      try {
1633        FSUtils.checkAccess(user, file, FsAction.WRITE);
1634      } catch (AccessControlException ace) {
1635        LOG.warn("Got AccessControlException when preCheckPermission ", ace);
1636        errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + user.getShortName()
1637          + " does not have write perms to " + file.getPath()
1638          + ". Please rerun hbck as hdfs user " + file.getOwner());
1639        throw new AccessControlException(ace);
1640      }
1641    }
1642  }
1643
1644  /**
1645   * Deletes region from meta table
1646   */
1647  private void deleteMetaRegion(HbckInfo hi) throws IOException {
1648    Delete d = new Delete(hi.metaEntry.getRegionName());
1649    meta.delete(d);
1650    meta.flushCommits();
1651    LOG.info("Deleted " + hi.metaEntry.getRegionNameAsString() + " from META" );
1652  }
1653
1654  /**
1655   * Reset the split parent region info in meta table
1656   */
1657  private void resetSplitParent(HbckInfo hi) throws IOException {
1658    RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1659    Delete d = new Delete(hi.metaEntry.getRegionName());
1660    d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1661    d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1662    mutations.add(d);
1663
1664    Put p = new Put(hi.metaEntry.getRegionName());
1665    HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1666    hri.setOffline(false);
1667    hri.setSplit(false);
1668    p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
1669      Writables.getBytes(hri));
1670    mutations.add(p);
1671
1672    meta.mutateRow(mutations);
1673    meta.flushCommits();
1674    LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1675  }
1676
1677  /**
1678   * This backwards-compatibility wrapper for permanently offlining a region
1679   * that should not be alive.  If the region server does not support the
1680   * "offline" method, it will use the closest unassign method instead.  This
1681   * will basically work until one attempts to disable or delete the affected
1682   * table.  The problem has to do with in-memory only master state, so
1683   * restarting the HMaster or failing over to another should fix this.
1684   */
1685  private void offline(byte[] regionName) throws IOException {
1686    String regionString = Bytes.toStringBinary(regionName);
1687    if (!rsSupportsOffline) {
1688      LOG.warn("Using unassign region " + regionString
1689          + " instead of using offline method, you should"
1690          + " restart HMaster after these repairs");
1691      admin.unassign(regionName, true);
1692      return;
1693    }
1694
1695    // first time we assume the rs's supports #offline.
1696    try {
1697      LOG.info("Offlining region " + regionString);
1698      admin.getMaster().offline(regionName);
1699    } catch (IOException ioe) {
1700      String notFoundMsg = "java.lang.NoSuchMethodException: " +
1701        "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1702      if (ioe.getMessage().contains(notFoundMsg)) {
1703        LOG.warn("Using unassign region " + regionString
1704            + " instead of using offline method, you should"
1705            + " restart HMaster after these repairs");
1706        rsSupportsOffline = false; // in the future just use unassign
1707        admin.unassign(regionName, true);
1708        return;
1709      }
1710      throw ioe;
1711    }
1712  }
1713
1714  private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1715    for (OnlineEntry rse : hi.deployedEntries) {
1716      LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
1717      try {
1718        HBaseFsckRepair.closeRegionSilentlyAndWait(admin, rse.hsa, rse.hri);
1719        offline(rse.hri.getRegionName());
1720      } catch (IOException ioe) {
1721        LOG.warn("Got exception when attempting to offline region "
1722            + Bytes.toString(rse.hri.getRegionName()), ioe);
1723      }
1724    }
1725  }
1726
1727  /**
1728   * Attempts to undeploy a region from a region server based in information in
1729   * META.  Any operations that modify the file system should make sure that
1730   * its corresponding region is not deployed to prevent data races.
1731   *
1732   * A separate call is required to update the master in-memory region state
1733   * kept in the AssignementManager.  Because disable uses this state instead of
1734   * that found in META, we can't seem to cleanly disable/delete tables that
1735   * have been hbck fixed.  When used on a version of HBase that does not have
1736   * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master
1737   * restart or failover may be required.
1738   */
1739  @SuppressWarnings("deprecation")
1740  private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
1741    if (hi.metaEntry == null && hi.hdfsEntry == null) {
1742      undeployRegions(hi);
1743      return;
1744    }
1745
1746    // get assignment info and hregioninfo from meta.
1747    Get get = new Get(hi.getRegionName());
1748    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1749    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1750    get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1751    Result r = meta.get(get);
1752    byte[] value = r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1753    byte[] startcodeBytes = r.getValue(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1754    if (value == null || startcodeBytes == null) {
1755      errors.reportError("Unable to close region "
1756          + hi.getRegionNameAsString() +  " because meta does not "
1757          + "have handle to reach it.");
1758      return;
1759    }
1760    long startcode = Bytes.toLong(startcodeBytes);
1761
1762    ServerName hsa = new ServerName(Bytes.toString(value), startcode);
1763    byte[] hriVal = r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1764    HRegionInfo hri= Writables.getHRegionInfoOrNull(hriVal);
1765    if (hri == null) {
1766      LOG.warn("Unable to close region " + hi.getRegionNameAsString()
1767          + " because META had invalid or missing "
1768          + HConstants.CATALOG_FAMILY_STR + ":"
1769          + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
1770          + " qualifier value.");
1771      return;
1772    }
1773
1774    // close the region -- close files and remove assignment
1775    HBaseFsckRepair.closeRegionSilentlyAndWait(admin, hsa, hri);
1776  }
1777
1778  private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1779    KeeperException, InterruptedException {
1780    // If we are trying to fix the errors
1781    if (shouldFixAssignments()) {
1782      errors.print(msg);
1783      undeployRegions(hbi);
1784      setShouldRerun();
1785      HRegionInfo hri = hbi.getHdfsHRI();
1786      if (hri == null) {
1787        hri = hbi.metaEntry;
1788      }
1789      HBaseFsckRepair.fixUnassigned(admin, hri);
1790      HBaseFsckRepair.waitUntilAssigned(admin, hri);
1791    }
1792  }
1793
1794  /**
1795   * Check a single region for consistency and correct deployment.
1796   */
1797  private void checkRegionConsistency(final String key, final HbckInfo hbi)
1798  throws IOException, KeeperException, InterruptedException {
1799    String descriptiveName = hbi.toString();
1800
1801    boolean inMeta = hbi.metaEntry != null;
1802    // In case not checking HDFS, assume the region is on HDFS
1803    boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
1804    boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
1805    boolean isDeployed = !hbi.deployedOn.isEmpty();
1806    boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
1807    boolean deploymentMatchesMeta =
1808      hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
1809      hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
1810    boolean splitParent =
1811      (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
1812    boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
1813    boolean recentlyModified = inHdfs &&
1814      hbi.getModTime() + timelag > System.currentTimeMillis();
1815
1816    // ========== First the healthy cases =============
1817    if (hbi.containsOnlyHdfsEdits()) {
1818      return;
1819    }
1820    if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
1821      return;
1822    } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
1823      LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
1824        "tabled that is not deployed");
1825      return;
1826    } else if (recentlyModified) {
1827      LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
1828      return;
1829    }
1830    // ========== Cases where the region is not in META =============
1831    else if (!inMeta && !inHdfs && !isDeployed) {
1832      // We shouldn't have record of this region at all then!
1833      assert false : "Entry for region with no data";
1834    } else if (!inMeta && !inHdfs && isDeployed) {
1835      errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
1836          + descriptiveName + ", key=" + key + ", not on HDFS or in META but " +
1837          "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1838      if (shouldFixAssignments()) {
1839        undeployRegions(hbi);
1840      }
1841
1842    } else if (!inMeta && inHdfs && !isDeployed) {
1843      errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
1844          + descriptiveName + " on HDFS, but not listed in META " +
1845          "or deployed on any region server");
1846      // restore region consistency of an adopted orphan
1847      if (shouldFixMeta()) {
1848        if (!hbi.isHdfsRegioninfoPresent()) {
1849          LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
1850              +  " in table integrity repair phase if -fixHdfsOrphans was" +
1851              " used.");
1852          return;
1853        }
1854
1855        LOG.info("Patching .META. with .regioninfo: " + hbi.getHdfsHRI());
1856        HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1857
1858        tryAssignmentRepair(hbi, "Trying to reassign region...");
1859      }
1860
1861    } else if (!inMeta && inHdfs && isDeployed) {
1862      errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
1863          + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1864      debugLsr(hbi.getHdfsRegionDir());
1865      if (shouldFixMeta()) {
1866        if (!hbi.isHdfsRegioninfoPresent()) {
1867          LOG.error("This should have been repaired in table integrity repair phase");
1868          return;
1869        }
1870
1871        LOG.info("Patching .META. with with .regioninfo: " + hbi.getHdfsHRI());
1872        HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1873
1874        tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1875      }
1876
1877    // ========== Cases where the region is in META =============
1878    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
1879      // check whether this is an actual error, or just transient state where parent
1880      // is not cleaned
1881      if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
1882        // check that split daughters are there
1883        HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
1884        HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
1885        if (infoA != null && infoB != null) {
1886          // we already processed or will process daughters. Move on, nothing to see here.
1887          hbi.setSkipChecks(true);
1888          return;
1889        }
1890      }
1891      errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
1892          + descriptiveName + " is a split parent in META, in HDFS, "
1893          + "and not deployed on any region server. This could be transient.");
1894      if (shouldFixSplitParents()) {
1895        setShouldRerun();
1896        resetSplitParent(hbi);
1897      }
1898    } else if (inMeta && !inHdfs && !isDeployed) {
1899      errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
1900          + descriptiveName + " found in META, but not in HDFS "
1901          + "or deployed on any region server.");
1902      if (shouldFixMeta()) {
1903        deleteMetaRegion(hbi);
1904      }
1905    } else if (inMeta && !inHdfs && isDeployed) {
1906      errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
1907          + " found in META, but not in HDFS, " +
1908          "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1909      // We treat HDFS as ground truth.  Any information in meta is transient
1910      // and equivalent data can be regenerated.  So, lets unassign and remove
1911      // these problems from META.
1912      if (shouldFixAssignments()) {
1913        errors.print("Trying to fix unassigned region...");
1914        closeRegion(hbi);// Close region will cause RS to abort.
1915      }
1916      if (shouldFixMeta()) {
1917        // wait for it to complete
1918        deleteMetaRegion(hbi);
1919      }
1920    } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
1921      errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
1922          + " not deployed on any region server.");
1923      tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1924    } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
1925      errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
1926          "Region " + descriptiveName + " should not be deployed according " +
1927          "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1928      if (shouldFixAssignments()) {
1929        errors.print("Trying to close the region " + descriptiveName);
1930        setShouldRerun();
1931        HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1932      }
1933    } else if (inMeta && inHdfs && isMultiplyDeployed) {
1934      errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
1935          + " is listed in META on region server " + hbi.metaEntry.regionServer
1936          + " but is multiply assigned to region servers " +
1937          Joiner.on(", ").join(hbi.deployedOn));
1938      // If we are trying to fix the errors
1939      if (shouldFixAssignments()) {
1940        errors.print("Trying to fix assignment error...");
1941        setShouldRerun();
1942        HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1943      }
1944    } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
1945      errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
1946          + descriptiveName + " listed in META on region server " +
1947          hbi.metaEntry.regionServer + " but found on region server " +
1948          hbi.deployedOn.get(0));
1949      // If we are trying to fix the errors
1950      if (shouldFixAssignments()) {
1951        errors.print("Trying to fix assignment error...");
1952        setShouldRerun();
1953        HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
1954        HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
1955      }
1956    } else {
1957      errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
1958          " is in an unforeseen state:" +
1959          " inMeta=" + inMeta +
1960          " inHdfs=" + inHdfs +
1961          " isDeployed=" + isDeployed +
1962          " isMultiplyDeployed=" + isMultiplyDeployed +
1963          " deploymentMatchesMeta=" + deploymentMatchesMeta +
1964          " shouldBeDeployed=" + shouldBeDeployed);
1965    }
1966  }
1967
1968  /**
1969   * Checks tables integrity. Goes over all regions and scans the tables.
1970   * Collects all the pieces for each table and checks if there are missing,
1971   * repeated or overlapping ones.
1972   * @throws IOException
1973   */
1974  SortedMap<String, TableInfo> checkIntegrity() throws IOException {
1975    tablesInfo = new TreeMap<String,TableInfo> ();
1976    List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
1977    LOG.debug("There are " + regionInfoMap.size() + " region info entries");
1978    for (HbckInfo hbi : regionInfoMap.values()) {
1979      // Check only valid, working regions
1980      if (hbi.metaEntry == null) {
1981        // this assumes that consistency check has run loadMetaEntry
1982        noHDFSRegionInfos.add(hbi);
1983        Path p = hbi.getHdfsRegionDir();
1984        if (p == null) {
1985          errors.report("No regioninfo in Meta or HDFS. " + hbi);
1986        }
1987
1988        // TODO test.
1989        continue;
1990      }
1991      if (hbi.metaEntry.regionServer == null) {
1992        errors.detail("Skipping region because no region server: " + hbi);
1993        continue;
1994      }
1995      if (hbi.metaEntry.isOffline()) {
1996        errors.detail("Skipping region because it is offline: " + hbi);
1997        continue;
1998      }
1999      if (hbi.containsOnlyHdfsEdits()) {
2000        errors.detail("Skipping region because it only contains edits" + hbi);
2001        continue;
2002      }
2003
2004      // Missing regionDir or over-deployment is checked elsewhere. Include
2005      // these cases in modTInfo, so we can evaluate those regions as part of
2006      // the region chain in META
2007      //if (hbi.foundRegionDir == null) continue;
2008      //if (hbi.deployedOn.size() != 1) continue;
2009      if (hbi.deployedOn.size() == 0) continue;
2010
2011      // We should be safe here
2012      String tableName = hbi.metaEntry.getTableNameAsString();
2013      TableInfo modTInfo = tablesInfo.get(tableName);
2014      if (modTInfo == null) {
2015        modTInfo = new TableInfo(tableName);
2016      }
2017      for (ServerName server : hbi.deployedOn) {
2018        modTInfo.addServer(server);
2019      }
2020
2021      if (!hbi.isSkipChecks()) {
2022        modTInfo.addRegionInfo(hbi);
2023      }
2024
2025      tablesInfo.put(tableName, modTInfo);
2026    }
2027
2028    for (TableInfo tInfo : tablesInfo.values()) {
2029      TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2030      if (!tInfo.checkRegionChain(handler)) {
2031        errors.report("Found inconsistency in table " + tInfo.getName());
2032      }
2033    }
2034    return tablesInfo;
2035  }
2036
2037  /**
2038   * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2039   * @return number of file move fixes done to merge regions.
2040   */
2041  public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2042    int fileMoves = 0;
2043    String thread = Thread.currentThread().getName();
2044    LOG.debug("[" + thread + "] Contained region dir after close and pause");
2045    debugLsr(contained.getHdfsRegionDir());
2046
2047    // rename the contained into the container.
2048    FileSystem fs = targetRegionDir.getFileSystem(getConf());
2049    FileStatus[] dirs = null;
2050    try { 
2051      dirs = fs.listStatus(contained.getHdfsRegionDir());
2052    } catch (FileNotFoundException fnfe) {
2053      // region we are attempting to merge in is not present!  Since this is a merge, there is
2054      // no harm skipping this region if it does not exist.
2055      if (!fs.exists(contained.getHdfsRegionDir())) {
2056        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir() 
2057            + " is missing. Assuming already sidelined or moved.");
2058      } else {
2059        sidelineRegionDir(fs, contained);
2060      }
2061      return fileMoves;
2062    }
2063
2064    if (dirs == null) {
2065      if (!fs.exists(contained.getHdfsRegionDir())) {
2066        LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir() 
2067            + " already sidelined.");
2068      } else {
2069        sidelineRegionDir(fs, contained);
2070      }
2071      return fileMoves;
2072    }
2073
2074    for (FileStatus cf : dirs) {
2075      Path src = cf.getPath();
2076      Path dst =  new Path(targetRegionDir, src.getName());
2077
2078      if (src.getName().equals(HRegion.REGIONINFO_FILE)) {
2079        // do not copy the old .regioninfo file.
2080        continue;
2081      }
2082
2083      if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2084        // do not copy the .oldlogs files
2085        continue;
2086      }
2087
2088      LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2089      // FileSystem.rename is inconsistent with directories -- if the
2090      // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2091      // it moves the src into the dst dir resulting in (foo/a/b).  If
2092      // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2093      for (FileStatus hfile : fs.listStatus(src)) {
2094        boolean success = fs.rename(hfile.getPath(), dst);
2095        if (success) {
2096          fileMoves++;
2097        }
2098      }
2099      LOG.debug("[" + thread + "] Sideline directory contents:");
2100      debugLsr(targetRegionDir);
2101    }
2102
2103    // if all success.
2104    sidelineRegionDir(fs, contained);
2105    LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2106        getSidelineDir());
2107    debugLsr(contained.getHdfsRegionDir());
2108
2109    return fileMoves;
2110  }
2111
2112
2113  static class WorkItemOverlapMerge implements Callable<Void> {
2114    private TableIntegrityErrorHandler handler;
2115    Collection<HbckInfo> overlapgroup;
2116    
2117    WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2118      this.handler = handler;
2119      this.overlapgroup = overlapgroup;
2120    }
2121    
2122    @Override
2123    public Void call() throws Exception {
2124      handler.handleOverlapGroup(overlapgroup);
2125      return null;
2126    }
2127  };
2128  
2129  
2130  /**
2131   * Maintain information about a particular table.
2132   */
2133  public class TableInfo {
2134    String tableName;
2135    TreeSet <ServerName> deployedOn;
2136
2137    // backwards regions
2138    final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2139
2140    // sidelined big overlapped regions
2141    final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2142
2143    // region split calculator
2144    final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2145
2146    // Histogram of different HTableDescriptors found.  Ideally there is only one!
2147    final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2148
2149    // key = start split, values = set of splits in problem group
2150    final Multimap<byte[], HbckInfo> overlapGroups =
2151      TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2152
2153    TableInfo(String name) {
2154      this.tableName = name;
2155      deployedOn = new TreeSet <ServerName>();
2156    }
2157
2158    /**
2159     * @return descriptor common to all regions.  null if are none or multiple!
2160     */
2161    private HTableDescriptor getHTD() {
2162      if (htds.size() == 1) {
2163        return (HTableDescriptor)htds.toArray()[0];
2164      } else {
2165        LOG.error("None/Multiple table descriptors found for table '"
2166          + tableName + "' regions: " + htds);
2167      }
2168      return null;
2169    }
2170
2171    public void addRegionInfo(HbckInfo hir) {
2172      if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2173        // end key is absolute end key, just add it.
2174        sc.add(hir);
2175        return;
2176      }
2177
2178      // if not the absolute end key, check for cycle 
2179      if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2180        errors.reportError(
2181            ERROR_CODE.REGION_CYCLE,
2182            String.format("The endkey for this region comes before the "
2183                + "startkey, startkey=%s, endkey=%s",
2184                Bytes.toStringBinary(hir.getStartKey()),
2185                Bytes.toStringBinary(hir.getEndKey())), this, hir);
2186        backwards.add(hir);
2187        return;
2188      }
2189
2190      // main case, add to split calculator
2191      sc.add(hir);
2192    }
2193
2194    public void addServer(ServerName server) {
2195      this.deployedOn.add(server);
2196    }
2197
2198    public String getName() {
2199      return tableName;
2200    }
2201
2202    public int getNumRegions() {
2203      return sc.getStarts().size() + backwards.size();
2204    }
2205
2206    private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2207      ErrorReporter errors;
2208
2209      IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2210        this.errors = errors;
2211        setTableInfo(ti);
2212      }
2213
2214      @Override
2215      public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2216        errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2217            "First region should start with an empty key.  You need to "
2218            + " create a new region and regioninfo in HDFS to plug the hole.",
2219            getTableInfo(), hi);
2220      }
2221      
2222      @Override
2223      public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2224        errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2225            "Last region should end with an empty key. You need to "
2226                + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2227      }
2228
2229      @Override
2230      public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2231        errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2232            "Region has the same start and end key.", getTableInfo(), hi);
2233      }
2234
2235      @Override
2236      public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2237        byte[] key = r1.getStartKey();
2238        // dup start key
2239        errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2240            "Multiple regions have the same startkey: "
2241            + Bytes.toStringBinary(key), getTableInfo(), r1);
2242        errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2243            "Multiple regions have the same startkey: "
2244            + Bytes.toStringBinary(key), getTableInfo(), r2);
2245      }
2246
2247      @Override
2248      public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2249        errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2250            "There is an overlap in the region chain.",
2251            getTableInfo(), hi1, hi2);
2252      }
2253
2254      @Override
2255      public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2256        errors.reportError(
2257            ERROR_CODE.HOLE_IN_REGION_CHAIN,
2258            "There is a hole in the region chain between "
2259                + Bytes.toStringBinary(holeStart) + " and "
2260                + Bytes.toStringBinary(holeStop)
2261                + ".  You need to create a new .regioninfo and region "
2262                + "dir in hdfs to plug the hole.");
2263      }
2264    };
2265
2266    /**
2267     * This handler fixes integrity errors from hdfs information.  There are
2268     * basically three classes of integrity problems 1) holes, 2) overlaps, and
2269     * 3) invalid regions.
2270     *
2271     * This class overrides methods that fix holes and the overlap group case.
2272     * Individual cases of particular overlaps are handled by the general
2273     * overlap group merge repair case.
2274     *
2275     * If hbase is online, this forces regions offline before doing merge
2276     * operations.
2277     */
2278    private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2279      Configuration conf;
2280
2281      boolean fixOverlaps = true;
2282
2283      HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2284          boolean fixHoles, boolean fixOverlaps) {
2285        super(ti, errors);
2286        this.conf = conf;
2287        this.fixOverlaps = fixOverlaps;
2288        // TODO properly use fixHoles
2289      }
2290
2291      /**
2292       * This is a special case hole -- when the first region of a table is
2293       * missing from META, HBase doesn't acknowledge the existance of the
2294       * table.
2295       */
2296      public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2297        errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2298            "First region should start with an empty key.  Creating a new " +
2299            "region and regioninfo in HDFS to plug the hole.",
2300            getTableInfo(), next);
2301        HTableDescriptor htd = getTableInfo().getHTD();
2302        // from special EMPTY_START_ROW to next region's startKey
2303        HRegionInfo newRegion = new HRegionInfo(htd.getName(),
2304            HConstants.EMPTY_START_ROW, next.getStartKey());
2305
2306        // TODO test
2307        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2308        LOG.info("Table region start key was not empty.  Created new empty region: "
2309            + newRegion + " " +region);
2310        fixes++;
2311      }
2312
2313      public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2314        errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2315            "Last region should end with an empty key. Creating a new "
2316                + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2317        HTableDescriptor htd = getTableInfo().getHTD();
2318        // from curEndKey to EMPTY_START_ROW
2319        HRegionInfo newRegion = new HRegionInfo(htd.getName(), curEndKey,
2320            HConstants.EMPTY_START_ROW);
2321
2322        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2323        LOG.info("Table region end key was not empty. Created new empty region: " + newRegion
2324            + " " + region);
2325        fixes++;
2326      }
2327      
2328      /**
2329       * There is a hole in the hdfs regions that violates the table integrity
2330       * rules.  Create a new empty region that patches the hole.
2331       */
2332      public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2333        errors.reportError(
2334            ERROR_CODE.HOLE_IN_REGION_CHAIN,
2335            "There is a hole in the region chain between "
2336                + Bytes.toStringBinary(holeStartKey) + " and "
2337                + Bytes.toStringBinary(holeStopKey)
2338                + ".  Creating a new regioninfo and region "
2339                + "dir in hdfs to plug the hole.");
2340        HTableDescriptor htd = getTableInfo().getHTD();
2341        HRegionInfo newRegion = new HRegionInfo(htd.getName(), holeStartKey, holeStopKey);
2342        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2343        LOG.info("Plugged hold by creating new empty region: "+ newRegion + " " +region);
2344        fixes++;
2345      }
2346
2347      /**
2348       * This takes set of overlapping regions and merges them into a single
2349       * region.  This covers cases like degenerate regions, shared start key,
2350       * general overlaps, duplicate ranges, and partial overlapping regions.
2351       *
2352       * Cases:
2353       * - Clean regions that overlap
2354       * - Only .oldlogs regions (can't find start/stop range, or figure out)
2355       * 
2356       * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
2357       */
2358      @Override
2359      public void handleOverlapGroup(Collection<HbckInfo> overlap)
2360          throws IOException {
2361        Preconditions.checkNotNull(overlap);
2362        Preconditions.checkArgument(overlap.size() >0);
2363
2364        if (!this.fixOverlaps) {
2365          LOG.warn("Not attempting to repair overlaps.");
2366          return;
2367        }
2368
2369        if (overlap.size() > maxMerge) {
2370          LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2371            "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2372          if (sidelineBigOverlaps) {
2373            // we only sideline big overlapped groups that exceeds the max number of regions to merge
2374            sidelineBigOverlaps(overlap);
2375          }
2376          return;
2377        }
2378
2379        mergeOverlaps(overlap);
2380      }
2381
2382      void mergeOverlaps(Collection<HbckInfo> overlap)
2383          throws IOException {
2384        String thread = Thread.currentThread().getName();
2385        LOG.info("== [" + thread + "] Merging regions into one region: "
2386          + Joiner.on(",").join(overlap));
2387        // get the min / max range and close all concerned regions
2388        Pair<byte[], byte[]> range = null;
2389        for (HbckInfo hi : overlap) {
2390          if (range == null) {
2391            range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2392          } else {
2393            if (RegionSplitCalculator.BYTES_COMPARATOR
2394                .compare(hi.getStartKey(), range.getFirst()) < 0) {
2395              range.setFirst(hi.getStartKey());
2396            }
2397            if (RegionSplitCalculator.BYTES_COMPARATOR
2398                .compare(hi.getEndKey(), range.getSecond()) > 0) {
2399              range.setSecond(hi.getEndKey());
2400            }
2401          }
2402          // need to close files so delete can happen.
2403          LOG.debug("[" + thread + "] Closing region before moving data around: " +  hi);
2404          LOG.debug("[" + thread + "] Contained region dir before close");
2405          debugLsr(hi.getHdfsRegionDir());
2406          try {
2407            LOG.info("[" + thread + "] Closing region: " + hi);
2408            closeRegion(hi);
2409          } catch (IOException ioe) {
2410            LOG.warn("[" + thread + "] Was unable to close region " + hi
2411              + ".  Just continuing... ", ioe);
2412          } catch (InterruptedException e) {
2413            LOG.warn("[" + thread + "] Was unable to close region " + hi
2414              + ".  Just continuing... ", e);
2415          }
2416
2417          try {
2418            LOG.info("[" + thread + "] Offlining region: " + hi);
2419            offline(hi.getRegionName());
2420          } catch (IOException ioe) {
2421            LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2422              + ".  Just continuing... ", ioe);
2423          }
2424        }
2425
2426        // create new empty container region.
2427        HTableDescriptor htd = getTableInfo().getHTD();
2428        // from start key to end Key
2429        HRegionInfo newRegion = new HRegionInfo(htd.getName(), range.getFirst(),
2430            range.getSecond());
2431        HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2432        LOG.info("[" + thread + "] Created new empty container region: " +
2433            newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2434        debugLsr(region.getRegionDir());
2435
2436        // all target regions are closed, should be able to safely cleanup.
2437        boolean didFix= false;
2438        Path target = region.getRegionDir();
2439        for (HbckInfo contained : overlap) {
2440          LOG.info("[" + thread + "] Merging " + contained  + " into " + target );
2441          int merges = mergeRegionDirs(target, contained);
2442          if (merges > 0) {
2443            didFix = true;
2444          }
2445        }
2446        if (didFix) {
2447          fixes++;
2448        }
2449      }
2450
2451      /**
2452       * Sideline some regions in a big overlap group so that it
2453       * will have fewer regions, and it is easier to merge them later on.
2454       *
2455       * @param bigOverlap the overlapped group with regions more than maxMerge
2456       * @throws IOException
2457       */
2458      void sidelineBigOverlaps(
2459          Collection<HbckInfo> bigOverlap) throws IOException {
2460        int overlapsToSideline = bigOverlap.size() - maxMerge;
2461        if (overlapsToSideline > maxOverlapsToSideline) {
2462          overlapsToSideline = maxOverlapsToSideline;
2463        }
2464        List<HbckInfo> regionsToSideline =
2465          RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2466        FileSystem fs = FileSystem.get(conf);
2467        for (HbckInfo regionToSideline: regionsToSideline) {
2468          try {
2469            LOG.info("Closing region: " + regionToSideline);
2470            closeRegion(regionToSideline);
2471          } catch (IOException ioe) {
2472            LOG.warn("Was unable to close region " + regionToSideline
2473              + ".  Just continuing... ", ioe);
2474          } catch (InterruptedException e) {
2475            LOG.warn("Was unable to close region " + regionToSideline
2476              + ".  Just continuing... ", e);
2477          }
2478
2479          try {
2480            LOG.info("Offlining region: " + regionToSideline);
2481            offline(regionToSideline.getRegionName());
2482          } catch (IOException ioe) {
2483            LOG.warn("Unable to offline region from master: " + regionToSideline
2484              + ".  Just continuing... ", ioe);
2485          }
2486
2487          LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2488          Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2489          if (sidelineRegionDir != null) {
2490            sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2491            LOG.info("After sidelined big overlapped region: "
2492              + regionToSideline.getRegionNameAsString()
2493              + " to " + sidelineRegionDir.toString());
2494            fixes++;
2495          }
2496        }
2497      }
2498    }
2499
2500    /**
2501     * Check the region chain (from META) of this table.  We are looking for
2502     * holes, overlaps, and cycles.
2503     * @return false if there are errors
2504     * @throws IOException
2505     */
2506    public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2507      // When table is disabled no need to check for the region chain. Some of the regions
2508      // accidently if deployed, this below code might report some issues like missing start
2509      // or end regions or region hole in chain and may try to fix which is unwanted.
2510      if (disabledTables.contains(this.tableName.getBytes())) {
2511        return true;
2512      }
2513      int originalErrorsCount = errors.getErrorList().size();
2514      Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2515      SortedSet<byte[]> splits = sc.getSplits();
2516
2517      byte[] prevKey = null;
2518      byte[] problemKey = null;
2519      for (byte[] key : splits) {
2520        Collection<HbckInfo> ranges = regions.get(key);
2521        if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2522          for (HbckInfo rng : ranges) {
2523            handler.handleRegionStartKeyNotEmpty(rng);
2524          }
2525        }
2526
2527        // check for degenerate ranges
2528        for (HbckInfo rng : ranges) {
2529          // special endkey case converts '' to null
2530          byte[] endKey = rng.getEndKey();
2531          endKey = (endKey.length == 0) ? null : endKey;
2532          if (Bytes.equals(rng.getStartKey(),endKey)) {
2533            handler.handleDegenerateRegion(rng);
2534          }
2535        }
2536
2537        if (ranges.size() == 1) {
2538          // this split key is ok -- no overlap, not a hole.
2539          if (problemKey != null) {
2540            LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2541          }
2542          problemKey = null; // fell through, no more problem.
2543        } else if (ranges.size() > 1) {
2544          // set the new problem key group name, if already have problem key, just
2545          // keep using it.
2546          if (problemKey == null) {
2547            // only for overlap regions.
2548            LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2549            problemKey = key;
2550          }
2551          overlapGroups.putAll(problemKey, ranges);
2552
2553          // record errors
2554          ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
2555          //  this dumb and n^2 but this shouldn't happen often
2556          for (HbckInfo r1 : ranges) {
2557            subRange.remove(r1);
2558            for (HbckInfo r2 : subRange) {
2559              if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
2560                handler.handleDuplicateStartKeys(r1,r2);
2561              } else {
2562                // overlap
2563                handler.handleOverlapInRegionChain(r1, r2);
2564              }
2565            }
2566          }
2567
2568        } else if (ranges.size() == 0) {
2569          if (problemKey != null) {
2570            LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2571          }
2572          problemKey = null;
2573
2574          byte[] holeStopKey = sc.getSplits().higher(key);
2575          // if higher key is null we reached the top.
2576          if (holeStopKey != null) {
2577            // hole
2578            handler.handleHoleInRegionChain(key, holeStopKey);
2579          }
2580        }
2581        prevKey = key;
2582      }
2583
2584      // When the last region of a table is proper and having an empty end key, 'prevKey'
2585      // will be null.
2586      if (prevKey != null) {
2587        handler.handleRegionEndKeyNotEmpty(prevKey);
2588      }
2589
2590      // TODO fold this into the TableIntegrityHandler
2591      if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
2592        LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
2593            " false to run serially.");
2594        boolean ok = handleOverlapsParallel(handler, prevKey);
2595        if (!ok) {
2596          return false;
2597        }
2598      } else {
2599        LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
2600            " true to run in parallel.");
2601        for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2602          handler.handleOverlapGroup(overlap);
2603        }
2604      }
2605
2606      if (details) {
2607        // do full region split map dump
2608        errors.print("---- Table '"  +  this.tableName
2609            + "': region split map");
2610        dump(splits, regions);
2611        errors.print("---- Table '"  +  this.tableName
2612            + "': overlap groups");
2613        dumpOverlapProblems(overlapGroups);
2614        errors.print("There are " + overlapGroups.keySet().size()
2615            + " overlap groups with " + overlapGroups.size()
2616            + " overlapping regions");
2617      }
2618      if (!sidelinedRegions.isEmpty()) {
2619        LOG.warn("Sidelined big overlapped regions, please bulk load them!");
2620        errors.print("---- Table '"  +  this.tableName
2621            + "': sidelined big overlapped regions");
2622        dumpSidelinedRegions(sidelinedRegions);
2623      }
2624      return errors.getErrorList().size() == originalErrorsCount;
2625    }
2626
2627    private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
2628        throws IOException {
2629      // we parallelize overlap handler for the case we have lots of groups to fix.  We can
2630      // safely assume each group is independent. 
2631      List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
2632      List<Future<Void>> rets;
2633      for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2634        // 
2635        merges.add(new WorkItemOverlapMerge(overlap, handler));
2636      }
2637      try {
2638        rets = executor.invokeAll(merges);
2639      } catch (InterruptedException e) {
2640        e.printStackTrace();
2641        LOG.error("Overlap merges were interrupted", e);
2642        return false;
2643      }
2644      for(int i=0; i<merges.size(); i++) {
2645        WorkItemOverlapMerge work = merges.get(i);
2646        Future<Void> f = rets.get(i);
2647        try {
2648          f.get();
2649        } catch(ExecutionException e) {
2650          LOG.warn("Failed to merge overlap group" + work, e.getCause());
2651        } catch (InterruptedException e) {
2652          LOG.error("Waiting for overlap merges was interrupted", e);
2653          return false;
2654        }
2655      }
2656      return true;
2657    }
2658
2659    /**
2660     * This dumps data in a visually reasonable way for visual debugging
2661     * 
2662     * @param splits
2663     * @param regions
2664     */
2665    void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
2666      // we display this way because the last end key should be displayed as well.
2667      StringBuilder sb = new StringBuilder();
2668      for (byte[] k : splits) {
2669        sb.setLength(0); // clear out existing buffer, if any.
2670        sb.append(Bytes.toStringBinary(k) + ":\t");
2671        for (HbckInfo r : regions.get(k)) {
2672          sb.append("[ "+ r.toString() + ", "
2673              + Bytes.toStringBinary(r.getEndKey())+ "]\t");
2674        }
2675        errors.print(sb.toString());
2676      }
2677    }
2678  }
2679
2680  public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
2681    // we display this way because the last end key should be displayed as
2682    // well.
2683    for (byte[] k : regions.keySet()) {
2684      errors.print(Bytes.toStringBinary(k) + ":");
2685      for (HbckInfo r : regions.get(k)) {
2686        errors.print("[ " + r.toString() + ", "
2687            + Bytes.toStringBinary(r.getEndKey()) + "]");
2688      }
2689      errors.print("----");
2690    }
2691  }
2692
2693  public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
2694    for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
2695      String tableName = Bytes.toStringBinary(entry.getValue().getTableName());
2696      Path path = entry.getKey();
2697      errors.print("This sidelined region dir should be bulk loaded: "
2698        + path.toString());
2699      errors.print("Bulk load command looks like: "
2700        + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
2701        + path.toUri().getPath() + " "+ tableName);
2702    }
2703  }
2704
2705  public Multimap<byte[], HbckInfo> getOverlapGroups(
2706      String table) {
2707    TableInfo ti = tablesInfo.get(table);
2708    return ti.overlapGroups;
2709  }
2710
2711  /**
2712   * Return a list of user-space table names whose metadata have not been
2713   * modified in the last few milliseconds specified by timelag
2714   * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
2715   * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
2716   * milliseconds specified by timelag, then the table is a candidate to be returned.
2717   * @return tables that have not been modified recently
2718   * @throws IOException if an error is encountered
2719   */
2720   HTableDescriptor[] getTables(AtomicInteger numSkipped) {
2721    List<String> tableNames = new ArrayList<String>();
2722    long now = System.currentTimeMillis();
2723
2724    for (HbckInfo hbi : regionInfoMap.values()) {
2725      MetaEntry info = hbi.metaEntry;
2726
2727      // if the start key is zero, then we have found the first region of a table.
2728      // pick only those tables that were not modified in the last few milliseconds.
2729      if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2730        if (info.modTime + timelag < now) {
2731          tableNames.add(info.getTableNameAsString());
2732        } else {
2733          numSkipped.incrementAndGet(); // one more in-flux table
2734        }
2735      }
2736    }
2737    return getHTableDescriptors(tableNames);
2738  }
2739
2740   HTableDescriptor[] getHTableDescriptors(List<String> tableNames) {
2741    HTableDescriptor[] htd = new HTableDescriptor[0];
2742     try {
2743       LOG.info("getHTableDescriptors == tableNames => " + tableNames);
2744       htd = new HBaseAdmin(getConf()).getTableDescriptors(tableNames);
2745     } catch (IOException e) {
2746       LOG.debug("Exception getting table descriptors", e);
2747     }
2748     return htd;
2749  }
2750
2751
2752  /**
2753   * Gets the entry in regionInfo corresponding to the the given encoded
2754   * region name. If the region has not been seen yet, a new entry is added
2755   * and returned.
2756   */
2757  private synchronized HbckInfo getOrCreateInfo(String name) {
2758    HbckInfo hbi = regionInfoMap.get(name);
2759    if (hbi == null) {
2760      hbi = new HbckInfo(null);
2761      regionInfoMap.put(name, hbi);
2762    }
2763    return hbi;
2764  }
2765
2766  /**
2767    * Check values in regionInfo for .META.
2768    * Check if zero or more than one regions with META are found.
2769    * If there are inconsistencies (i.e. zero or more than one regions
2770    * pretend to be holding the .META.) try to fix that and report an error.
2771    * @throws IOException from HBaseFsckRepair functions
2772   * @throws KeeperException
2773   * @throws InterruptedException
2774    */
2775  boolean checkMetaRegion()
2776    throws IOException, KeeperException, InterruptedException {
2777    List <HbckInfo> metaRegions = Lists.newArrayList();
2778    for (HbckInfo value : regionInfoMap.values()) {
2779      if (value.metaEntry.isMetaRegion()) {
2780        metaRegions.add(value);
2781      }
2782    }
2783
2784    // If something is wrong
2785    if (metaRegions.size() != 1) {
2786      HRegionLocation rootLocation = connection.locateRegion(
2787        HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
2788      HbckInfo root =
2789          regionInfoMap.get(rootLocation.getRegionInfo().getEncodedName());
2790
2791      // If there is no region holding .META.
2792      if (metaRegions.size() == 0) {
2793        errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
2794        if (shouldFixAssignments()) {
2795          errors.print("Trying to fix a problem with .META...");
2796          setShouldRerun();
2797          // try to fix it (treat it as unassigned region)
2798          HBaseFsckRepair.fixUnassigned(admin, root.metaEntry);
2799          HBaseFsckRepair.waitUntilAssigned(admin, root.getHdfsHRI());
2800        }
2801      }
2802      // If there are more than one regions pretending to hold the .META.
2803      else if (metaRegions.size() > 1) {
2804        errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
2805        if (shouldFixAssignments()) {
2806          errors.print("Trying to fix a problem with .META...");
2807          setShouldRerun();
2808          // try fix it (treat is a dupe assignment)
2809          List <ServerName> deployedOn = Lists.newArrayList();
2810          for (HbckInfo mRegion : metaRegions) {
2811            deployedOn.add(mRegion.metaEntry.regionServer);
2812          }
2813          HBaseFsckRepair.fixMultiAssignment(admin, root.metaEntry, deployedOn);
2814        }
2815      }
2816      // rerun hbck with hopefully fixed META
2817      return false;
2818    }
2819    // no errors, so continue normally
2820    return true;
2821  }
2822
2823  /**
2824   * Scan .META. and -ROOT-, adding all regions found to the regionInfo map.
2825   * @throws IOException if an error is encountered
2826   */
2827  boolean loadMetaEntries() throws IOException {
2828
2829    // get a list of all regions from the master. This involves
2830    // scanning the META table
2831    if (!recordRootRegion()) {
2832      // Will remove later if we can fix it
2833      errors.reportError("Fatal error: unable to get root region location. Exiting...");
2834      return false;
2835    }
2836
2837    MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
2838      int countRecord = 1;
2839
2840      // comparator to sort KeyValues with latest modtime
2841      final Comparator<KeyValue> comp = new Comparator<KeyValue>() {
2842        public int compare(KeyValue k1, KeyValue k2) {
2843          return (int)(k1.getTimestamp() - k2.getTimestamp());
2844        }
2845      };
2846
2847      public boolean processRow(Result result) throws IOException {
2848        try {
2849
2850          // record the latest modification of this META record
2851          long ts =  Collections.max(result.list(), comp).getTimestamp();
2852          Pair<HRegionInfo, ServerName> pair = MetaReader.parseCatalogResult(result);
2853          if (pair == null || pair.getFirst() == null) {
2854            emptyRegionInfoQualifiers.add(result);
2855            return true;
2856          }
2857          ServerName sn = null;
2858          if (pair.getSecond() != null) {
2859            sn = pair.getSecond();
2860          }
2861          HRegionInfo hri = pair.getFirst();
2862          if (!(isTableIncluded(hri.getTableNameAsString())
2863              || hri.isMetaRegion() || hri.isRootRegion())) {
2864            return true;
2865          }
2866          PairOfSameType<HRegionInfo> daughters = MetaReader.getDaughterRegions(result);
2867          MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
2868          HbckInfo hbInfo = new HbckInfo(m);
2869          HbckInfo previous = regionInfoMap.put(hri.getEncodedName(), hbInfo);
2870          if (previous != null) {
2871            throw new IOException("Two entries in META are same " + previous);
2872          }
2873
2874          // show proof of progress to the user, once for every 100 records.
2875          if (countRecord % 100 == 0) {
2876            errors.progress();
2877          }
2878          countRecord++;
2879          return true;
2880        } catch (RuntimeException e) {
2881          LOG.error("Result=" + result);
2882          throw e;
2883        }
2884      }
2885    };
2886
2887    // Scan -ROOT- to pick up META regions
2888    MetaScanner.metaScan(getConf(), null, visitor, null, null,
2889      Integer.MAX_VALUE, HConstants.ROOT_TABLE_NAME);
2890
2891    if (!checkMetaOnly) {
2892      // Scan .META. to pick up user regions
2893      MetaScanner.metaScan(getConf(), visitor);
2894    }
2895
2896    errors.print("");
2897    return true;
2898  }
2899
2900  /**
2901   * Stores the regioninfo entries scanned from META
2902   */
2903  static class MetaEntry extends HRegionInfo {
2904    ServerName regionServer;   // server hosting this region
2905    long modTime;          // timestamp of most recent modification metadata
2906    HRegionInfo splitA, splitB; //split daughters
2907
2908    public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
2909      this(rinfo, regionServer, modTime, null, null);
2910    }
2911
2912    public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
2913        HRegionInfo splitA, HRegionInfo splitB) {
2914      super(rinfo);
2915      this.regionServer = regionServer;
2916      this.modTime = modTime;
2917      this.splitA = splitA;
2918      this.splitB = splitB;
2919    }
2920
2921    public boolean equals(Object o) {
2922      boolean superEq = super.equals(o);
2923      if (!superEq) {
2924        return superEq;
2925      }
2926
2927      MetaEntry me = (MetaEntry) o;
2928      if (!regionServer.equals(me.regionServer)) {
2929        return false;
2930      }
2931      return (modTime == me.modTime);
2932    }
2933  }
2934
2935  /**
2936   * Stores the regioninfo entries from HDFS
2937   */
2938  static class HdfsEntry {
2939    HRegionInfo hri;
2940    Path hdfsRegionDir = null;
2941    long hdfsRegionDirModTime  = 0;
2942    boolean hdfsRegioninfoFilePresent = false;
2943    boolean hdfsOnlyEdits = false;
2944  }
2945
2946  /**
2947   * Stores the regioninfo retrieved from Online region servers.
2948   */
2949  static class OnlineEntry {
2950    HRegionInfo hri;
2951    ServerName hsa;
2952
2953    public String toString() {
2954      return hsa.toString() + ";" + hri.getRegionNameAsString();
2955    }
2956  }
2957
2958  /**
2959   * Maintain information about a particular region.  It gathers information
2960   * from three places -- HDFS, META, and region servers.
2961   */
2962  public static class HbckInfo implements KeyRange {
2963    private MetaEntry metaEntry = null; // info in META
2964    private HdfsEntry hdfsEntry = null; // info in HDFS
2965    private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
2966    private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
2967    private boolean skipChecks = false; // whether to skip further checks to this region info.
2968
2969    HbckInfo(MetaEntry metaEntry) {
2970      this.metaEntry = metaEntry;
2971    }
2972
2973    public synchronized void addServer(HRegionInfo hri, ServerName server) {
2974      OnlineEntry rse = new OnlineEntry() ;
2975      rse.hri = hri;
2976      rse.hsa = server;
2977      this.deployedEntries.add(rse);
2978      this.deployedOn.add(server);
2979    }
2980
2981    public synchronized String toString() {
2982      StringBuilder sb = new StringBuilder();
2983      sb.append("{ meta => ");
2984      sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
2985      sb.append( ", hdfs => " + getHdfsRegionDir());
2986      sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
2987      sb.append(" }");
2988      return sb.toString();
2989    }
2990
2991    @Override
2992    public byte[] getStartKey() {
2993      if (this.metaEntry != null) {
2994        return this.metaEntry.getStartKey();
2995      } else if (this.hdfsEntry != null) {
2996        return this.hdfsEntry.hri.getStartKey();
2997      } else {
2998        LOG.error("Entry " + this + " has no meta or hdfs region start key.");
2999        return null;
3000      }
3001    }
3002
3003    @Override
3004    public byte[] getEndKey() {
3005      if (this.metaEntry != null) {
3006        return this.metaEntry.getEndKey();
3007      } else if (this.hdfsEntry != null) {
3008        return this.hdfsEntry.hri.getEndKey();
3009      } else {
3010        LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3011        return null;
3012      }
3013    }
3014
3015    public byte[] getTableName() {
3016      if (this.metaEntry != null) {
3017        return this.metaEntry.getTableName();
3018      } else if (this.hdfsEntry != null) {
3019        // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3020        // so we get the name from the Path
3021        Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3022        return Bytes.toBytes(tableDir.getName());
3023      } else {
3024        // Currently no code exercises this path, but we could add one for
3025        // getting table name from OnlineEntry
3026        return null;
3027      }
3028    }
3029
3030    public String getRegionNameAsString() {
3031      if (metaEntry != null) {
3032        return metaEntry.getRegionNameAsString();
3033      } else if (hdfsEntry != null) {
3034        if (hdfsEntry.hri != null) {
3035          return hdfsEntry.hri.getRegionNameAsString();
3036        }
3037      }
3038      return null;
3039    }
3040
3041    public byte[] getRegionName() {
3042      if (metaEntry != null) {
3043        return metaEntry.getRegionName();
3044      } else if (hdfsEntry != null) {
3045        return hdfsEntry.hri.getRegionName();
3046      } else {
3047        return null;
3048      }
3049    }
3050
3051    Path getHdfsRegionDir() {
3052      if (hdfsEntry == null) {
3053        return null;
3054      }
3055      return hdfsEntry.hdfsRegionDir;
3056    }
3057
3058    boolean containsOnlyHdfsEdits() {
3059      if (hdfsEntry == null) {
3060        return false;
3061      }
3062      return hdfsEntry.hdfsOnlyEdits;
3063    }
3064
3065    boolean isHdfsRegioninfoPresent() {
3066      if (hdfsEntry == null) {
3067        return false;
3068      }
3069      return hdfsEntry.hdfsRegioninfoFilePresent;
3070    }
3071
3072    long getModTime() {
3073      if (hdfsEntry == null) {
3074        return 0;
3075      }
3076      return hdfsEntry.hdfsRegionDirModTime;
3077    }
3078
3079    HRegionInfo getHdfsHRI() {
3080      if (hdfsEntry == null) {
3081        return null;
3082      }
3083      return hdfsEntry.hri;
3084    }
3085
3086    public void setSkipChecks(boolean skipChecks) {
3087      this.skipChecks = skipChecks;
3088    }
3089
3090    public boolean isSkipChecks() {
3091      return skipChecks;
3092    }
3093  }
3094
3095  final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3096    @Override
3097    public int compare(HbckInfo l, HbckInfo r) {
3098      if (l == r) {
3099        // same instance
3100        return 0;
3101      }
3102
3103      int tableCompare = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3104          l.getTableName(), r.getTableName());
3105      if (tableCompare != 0) {
3106        return tableCompare;
3107      }
3108
3109      int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3110          l.getStartKey(), r.getStartKey());
3111      if (startComparison != 0) {
3112        return startComparison;
3113      }
3114
3115      // Special case for absolute endkey
3116      byte[] endKey = r.getEndKey();
3117      endKey = (endKey.length == 0) ? null : endKey;
3118      byte[] endKey2 = l.getEndKey();
3119      endKey2 = (endKey2.length == 0) ? null : endKey2;
3120      int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3121          endKey2,  endKey);
3122
3123      if (endComparison != 0) {
3124        return endComparison;
3125      }
3126
3127      // use regionId as tiebreaker.
3128      // Null is considered after all possible values so make it bigger.
3129      if (l.hdfsEntry == null && r.hdfsEntry == null) {
3130        return 0;
3131      }
3132      if (l.hdfsEntry == null && r.hdfsEntry != null) {
3133        return 1;
3134      }
3135      // l.hdfsEntry must not be null
3136      if (r.hdfsEntry == null) {
3137        return -1;
3138      }
3139      // both l.hdfsEntry and r.hdfsEntry must not be null.
3140      return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3141    }
3142  };
3143
3144  /**
3145   * Prints summary of all tables found on the system.
3146   */
3147  private void printTableSummary(SortedMap<String, TableInfo> tablesInfo) {
3148    StringBuilder sb = new StringBuilder();
3149    errors.print("Summary:");
3150    for (TableInfo tInfo : tablesInfo.values()) {
3151      if (errors.tableHasErrors(tInfo)) {
3152        errors.print("Table " + tInfo.getName() + " is inconsistent.");
3153      } else {
3154        errors.print("  " + tInfo.getName() + " is okay.");
3155      }
3156      errors.print("    Number of regions: " + tInfo.getNumRegions());
3157      sb.setLength(0); // clear out existing buffer, if any.
3158      sb.append("    Deployed on: ");
3159      for (ServerName server : tInfo.deployedOn) {
3160        sb.append(" " + server.toString());
3161      }
3162      errors.print(sb.toString());
3163    }
3164  }
3165
3166  static ErrorReporter getErrorReporter(
3167      final Configuration conf) throws ClassNotFoundException {
3168    Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3169    return (ErrorReporter)ReflectionUtils.newInstance(reporter, conf);
3170  }
3171
3172  public interface ErrorReporter {
3173    public static enum ERROR_CODE {
3174      UNKNOWN, NO_META_REGION, NULL_ROOT_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3175      NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
3176      MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3177      FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3178      HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3179      ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3180      WRONG_USAGE, BOUNDARIES_ERROR
3181    }
3182    public void clear();
3183    public void report(String message);
3184    public void reportError(String message);
3185    public void reportError(ERROR_CODE errorCode, String message);
3186    public void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3187    public void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3188    public void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info1, HbckInfo info2);
3189    public int summarize();
3190    public void detail(String details);
3191    public ArrayList<ERROR_CODE> getErrorList();
3192    public void progress();
3193    public void print(String message);
3194    public void resetErrors();
3195    public boolean tableHasErrors(TableInfo table);
3196  }
3197
3198  static class PrintingErrorReporter implements ErrorReporter {
3199    public int errorCount = 0;
3200    private int showProgress;
3201
3202    Set<TableInfo> errorTables = new HashSet<TableInfo>();
3203
3204    // for use by unit tests to verify which errors were discovered
3205    private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3206
3207    public void clear() {
3208      errorTables.clear();
3209      errorList.clear();
3210      errorCount = 0;
3211    }
3212
3213    public synchronized void reportError(ERROR_CODE errorCode, String message) {
3214      if (errorCode == ERROR_CODE.WRONG_USAGE) {
3215        System.err.println(message);
3216        return;
3217      }
3218
3219      errorList.add(errorCode);
3220      if (!summary) {
3221        System.out.println("ERROR: " + message);
3222      }
3223      errorCount++;
3224      showProgress = 0;
3225    }
3226
3227    public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3228      errorTables.add(table);
3229      reportError(errorCode, message);
3230    }
3231    
3232    public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3233                                         HbckInfo info) {
3234      errorTables.add(table);
3235      String reference = "(region " + info.getRegionNameAsString() + ")";
3236      reportError(errorCode, reference + " " + message);
3237    }
3238
3239    public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3240                                         HbckInfo info1, HbckInfo info2) {
3241      errorTables.add(table);
3242      String reference = "(regions " + info1.getRegionNameAsString()
3243          + " and " + info2.getRegionNameAsString() + ")";
3244      reportError(errorCode, reference + " " + message);
3245    }
3246
3247    public synchronized void reportError(String message) {
3248      reportError(ERROR_CODE.UNKNOWN, message);
3249    }
3250
3251    /**
3252     * Report error information, but do not increment the error count.  Intended for cases
3253     * where the actual error would have been reported previously.
3254     * @param message
3255     */
3256    public synchronized void report(String message) {
3257      if (! summary) {
3258        System.out.println("ERROR: " + message);
3259      }
3260      showProgress = 0;
3261    }
3262
3263    public synchronized int summarize() {
3264      System.out.println(Integer.toString(errorCount) +
3265                         " inconsistencies detected.");
3266      if (errorCount == 0) {
3267        System.out.println("Status: OK");
3268        return 0;
3269      } else {
3270        System.out.println("Status: INCONSISTENT");
3271        return -1;
3272      }
3273    }
3274
3275    public ArrayList<ERROR_CODE> getErrorList() {
3276      return errorList;
3277    }
3278
3279    public synchronized void print(String message) {
3280      if (!summary) {
3281        System.out.println(message);
3282      }
3283    }
3284
3285    @Override
3286    public boolean tableHasErrors(TableInfo table) {
3287      return errorTables.contains(table);
3288    }
3289
3290    @Override
3291    public void resetErrors() {
3292      errorCount = 0;
3293    }
3294
3295    public synchronized void detail(String message) {
3296      if (details) {
3297        System.out.println(message);
3298      }
3299      showProgress = 0;
3300    }
3301
3302    public synchronized void progress() {
3303      if (showProgress++ == 10) {
3304        if (!summary) {
3305          System.out.print(".");
3306        }
3307        showProgress = 0;
3308      }
3309    }
3310  }
3311
3312  /**
3313   * Contact a region server and get all information from it
3314   */
3315  static class WorkItemRegion implements Callable<Void> {
3316    private HBaseFsck hbck;
3317    private ServerName rsinfo;
3318    private ErrorReporter errors;
3319    private HConnection connection;
3320
3321    WorkItemRegion(HBaseFsck hbck, ServerName info,
3322                   ErrorReporter errors, HConnection connection) {
3323      this.hbck = hbck;
3324      this.rsinfo = info;
3325      this.errors = errors;
3326      this.connection = connection;
3327    }
3328
3329    @Override
3330    public synchronized Void call() throws IOException {
3331      errors.progress();
3332      try {
3333        HRegionInterface server =
3334            connection.getHRegionConnection(rsinfo.getHostname(), rsinfo.getPort());
3335
3336        // list all online regions from this region server
3337        List<HRegionInfo> regions = server.getOnlineRegions();
3338        regions = filterRegions(regions);
3339        if (details) {
3340          errors.detail("RegionServer: " + rsinfo.getServerName() +
3341                           " number of regions: " + regions.size());
3342          for (HRegionInfo rinfo: regions) {
3343            errors.detail("  " + rinfo.getRegionNameAsString() +
3344                             " id: " + rinfo.getRegionId() +
3345                             " encoded_name: " + rinfo.getEncodedName() +
3346                             " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3347                             " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3348          }
3349        }
3350
3351        // check to see if the existence of this region matches the region in META
3352        for (HRegionInfo r:regions) {
3353          HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3354          hbi.addServer(r, rsinfo);
3355        }
3356      } catch (IOException e) {          // unable to connect to the region server. 
3357        errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3358          " Unable to fetch region information. " + e);
3359        throw e;
3360      }
3361      return null;
3362    }
3363
3364    private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
3365      List<HRegionInfo> ret = Lists.newArrayList();
3366      for (HRegionInfo hri : regions) {
3367        if (hri.isMetaTable() || (!hbck.checkMetaOnly
3368            && hbck.isTableIncluded(hri.getTableNameAsString()))) {
3369          ret.add(hri);
3370        }
3371      }
3372      return ret;
3373    }
3374  }
3375
3376  /**
3377   * Contact hdfs and get all information about specified table directory into
3378   * regioninfo list.
3379   */
3380  static class WorkItemHdfsDir implements Callable<Void> {
3381    private HBaseFsck hbck;
3382    private FileStatus tableDir;
3383    private ErrorReporter errors;
3384    private FileSystem fs;
3385
3386    WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors, 
3387                    FileStatus status) {
3388      this.hbck = hbck;
3389      this.fs = fs;
3390      this.tableDir = status;
3391      this.errors = errors;
3392    }
3393
3394    @Override
3395    public synchronized Void call() throws IOException {
3396      try {
3397        String tableName = tableDir.getPath().getName();
3398        // ignore hidden files
3399        if (tableName.startsWith(".") &&
3400            !tableName.equals( Bytes.toString(HConstants.META_TABLE_NAME))) {
3401          return null;
3402        }
3403        // level 2: <HBASE_DIR>/<table>/*
3404        FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
3405        for (FileStatus regionDir : regionDirs) {
3406          String encodedName = regionDir.getPath().getName();
3407          // ignore directories that aren't hexadecimal
3408          if (!encodedName.toLowerCase().matches("[0-9a-f]+")) {
3409            continue;
3410          }
3411
3412          LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
3413          HbckInfo hbi = hbck.getOrCreateInfo(encodedName);
3414          HdfsEntry he = new HdfsEntry();
3415          synchronized (hbi) {
3416            if (hbi.getHdfsRegionDir() != null) {
3417              errors.print("Directory " + encodedName + " duplicate??" +
3418                           hbi.getHdfsRegionDir());
3419            }
3420
3421            he.hdfsRegionDir = regionDir.getPath();
3422            he.hdfsRegionDirModTime = regionDir.getModificationTime();
3423            Path regioninfoFile = new Path(he.hdfsRegionDir, HRegion.REGIONINFO_FILE);
3424            he.hdfsRegioninfoFilePresent = fs.exists(regioninfoFile);
3425            // we add to orphan list when we attempt to read .regioninfo
3426
3427            // Set a flag if this region contains only edits
3428            // This is special case if a region is left after split
3429            he.hdfsOnlyEdits = true;
3430            FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
3431            Path ePath = HLog.getRegionDirRecoveredEditsDir(regionDir.getPath());
3432            for (FileStatus subDir : subDirs) {
3433              String sdName = subDir.getPath().getName();
3434              if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
3435                he.hdfsOnlyEdits = false;
3436                break;
3437              }
3438            }
3439            hbi.hdfsEntry = he;
3440          }
3441        }
3442      } catch (IOException e) {
3443        // unable to connect to the region server.
3444        errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
3445            + tableDir.getPath().getName()
3446            + " Unable to fetch region information. " + e);
3447        throw e;
3448      }
3449      return null;
3450    }
3451  }
3452
3453  /**
3454   * Contact hdfs and get all information about specified table directory into
3455   * regioninfo list.
3456   */
3457  static class WorkItemHdfsRegionInfo implements Callable<Void> {
3458    private HbckInfo hbi;
3459    private HBaseFsck hbck;
3460    private ErrorReporter errors;
3461
3462    WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
3463      this.hbi = hbi;
3464      this.hbck = hbck;
3465      this.errors = errors;
3466    }
3467
3468    @Override
3469    public synchronized Void call() throws IOException {
3470      // only load entries that haven't been loaded yet.
3471      if (hbi.getHdfsHRI() == null) {
3472        try {
3473          hbck.loadHdfsRegioninfo(hbi);
3474        } catch (IOException ioe) {
3475          String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3476              + Bytes.toString(hbi.getTableName()) + " in hdfs dir "
3477              + hbi.getHdfsRegionDir()
3478              + "!  It may be an invalid format or version file.  Treating as "
3479              + "an orphaned regiondir.";
3480          errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3481          try {
3482            hbck.debugLsr(hbi.getHdfsRegionDir());
3483          } catch (IOException ioe2) {
3484            LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3485            throw ioe2;
3486          }
3487          hbck.orphanHdfsDirs.add(hbi);
3488          throw ioe;
3489        }
3490      }
3491      return null;
3492    }
3493  };
3494
3495  /**
3496   * Display the full report from fsck. This displays all live and dead region
3497   * servers, and all known regions.
3498   */
3499  public void setDisplayFullReport() {
3500    details = true;
3501  }
3502
3503  /**
3504   * Set summary mode.
3505   * Print only summary of the tables and status (OK or INCONSISTENT)
3506   */
3507  void setSummary() {
3508    summary = true;
3509  }
3510
3511  /**
3512   * Set META check mode.
3513   * Print only info about META table deployment/state
3514   */
3515  void setCheckMetaOnly() {
3516    checkMetaOnly = true;
3517  }
3518
3519  /**
3520   * Check if we should rerun fsck again. This checks if we've tried to
3521   * fix something and we should rerun fsck tool again.
3522   * Display the full report from fsck. This displays all live and dead
3523   * region servers, and all known regions.
3524   */
3525  void setShouldRerun() {
3526    rerun = true;
3527  }
3528
3529  boolean shouldRerun() {
3530    return rerun;
3531  }
3532
3533  /**
3534   * Fix inconsistencies found by fsck. This should try to fix errors (if any)
3535   * found by fsck utility.
3536   */
3537  public void setFixAssignments(boolean shouldFix) {
3538    fixAssignments = shouldFix;
3539  }
3540
3541  boolean shouldFixAssignments() {
3542    return fixAssignments;
3543  }
3544
3545  public void setFixMeta(boolean shouldFix) {
3546    fixMeta = shouldFix;
3547  }
3548
3549  boolean shouldFixMeta() {
3550    return fixMeta;
3551  }
3552
3553  public void setCheckHdfs(boolean checking) {
3554    checkHdfs = checking;
3555  }
3556
3557  boolean shouldCheckHdfs() {
3558    return checkHdfs;
3559  }
3560
3561  public void setFixHdfsHoles(boolean shouldFix) {
3562    fixHdfsHoles = shouldFix;
3563  }
3564
3565  boolean shouldFixHdfsHoles() {
3566    return fixHdfsHoles;
3567  }
3568
3569  public void setFixTableOrphans(boolean shouldFix) {
3570    fixTableOrphans = shouldFix;
3571  }
3572
3573  boolean shouldFixTableOrphans() {
3574    return fixTableOrphans;
3575  }
3576
3577  public void setFixHdfsOverlaps(boolean shouldFix) {
3578    fixHdfsOverlaps = shouldFix;
3579  }
3580
3581  boolean shouldFixHdfsOverlaps() {
3582    return fixHdfsOverlaps;
3583  }
3584
3585  public void setFixHdfsOrphans(boolean shouldFix) {
3586    fixHdfsOrphans = shouldFix;
3587  }
3588
3589  boolean shouldFixHdfsOrphans() {
3590    return fixHdfsOrphans;
3591  }
3592
3593  public void setFixVersionFile(boolean shouldFix) {
3594    fixVersionFile = shouldFix;
3595  }
3596
3597  public boolean shouldFixVersionFile() {
3598    return fixVersionFile;
3599  }
3600
3601  public void setSidelineBigOverlaps(boolean sbo) {
3602    this.sidelineBigOverlaps = sbo;
3603  }
3604
3605  public boolean shouldSidelineBigOverlaps() {
3606    return sidelineBigOverlaps;
3607  }
3608
3609  public void setFixSplitParents(boolean shouldFix) {
3610    fixSplitParents = shouldFix;
3611  }
3612
3613  boolean shouldFixSplitParents() {
3614    return fixSplitParents;
3615  }
3616
3617  public void setFixReferenceFiles(boolean shouldFix) {
3618    fixReferenceFiles = shouldFix;
3619  }
3620
3621  boolean shouldFixReferenceFiles() {
3622    return fixReferenceFiles;
3623  }
3624
3625  public boolean shouldIgnorePreCheckPermission() {
3626    return ignorePreCheckPermission;
3627  }
3628
3629  public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3630    this.ignorePreCheckPermission = ignorePreCheckPermission;
3631  }
3632
3633  /**
3634   * @param mm maximum number of regions to merge into a single region.
3635   */
3636  public void setMaxMerge(int mm) {
3637    this.maxMerge = mm;
3638  }
3639
3640  public int getMaxMerge() {
3641    return maxMerge;
3642  }
3643
3644  public void setMaxOverlapsToSideline(int mo) {
3645    this.maxOverlapsToSideline = mo;
3646  }
3647
3648  public int getMaxOverlapsToSideline() {
3649    return maxOverlapsToSideline;
3650  }
3651
3652  /**
3653   * Only check/fix tables specified by the list,
3654   * Empty list means all tables are included.
3655   */
3656  boolean isTableIncluded(String table) {
3657    return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
3658  }
3659
3660  public void includeTable(String table) {
3661    tablesIncluded.add(table);
3662  }
3663
3664  Set<String> getIncludedTables() {
3665    return new HashSet<String>(tablesIncluded);
3666  }
3667
3668  /**
3669   * We are interested in only those tables that have not changed their state in
3670   * META during the last few seconds specified by hbase.admin.fsck.timelag
3671   * @param seconds - the time in seconds
3672   */
3673  public void setTimeLag(long seconds) {
3674    timelag = seconds * 1000; // convert to milliseconds
3675  }
3676
3677  /**
3678   * 
3679   * @param sidelineDir - HDFS path to sideline data
3680   */
3681  public void setSidelineDir(String sidelineDir) {
3682    this.sidelineDir = new Path(sidelineDir);
3683  }
3684
3685  protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
3686    return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
3687  }
3688
3689  public HFileCorruptionChecker getHFilecorruptionChecker() {
3690    return hfcc;
3691  }
3692
3693  public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
3694    this.hfcc = hfcc;
3695  }
3696
3697  /**
3698   * Set region boundaries check mode.
3699   */
3700  void setRegionBoundariesCheck() {
3701    checkRegionBoundaries = true;
3702  }
3703
3704  public void setRetCode(int code) {
3705    this.retcode = code;
3706  }
3707
3708  public int getRetCode() {
3709    return retcode;
3710  }
3711
3712  protected HBaseFsck printUsageAndExit() {
3713    StringWriter sw = new StringWriter(2048);
3714    PrintWriter out = new PrintWriter(sw);
3715    out.println("Usage: fsck [opts] {only tables}");
3716    out.println(" where [opts] are:");
3717    out.println("   -help Display help options (this)");
3718    out.println("   -details Display full report of all regions.");
3719    out.println("   -timelag <timeInSeconds>  Process only regions that " +
3720                       " have not experienced any metadata updates in the last " +
3721                       " <timeInSeconds> seconds.");
3722    out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
3723        " before checking if the fix worked if run with -fix");
3724    out.println("   -summary Print only summary of the tables and status.");
3725    out.println("   -metaonly Only check the state of ROOT and META tables.");
3726    out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta and root.");
3727
3728    out.println("");
3729    out.println("  Metadata Repair options: (expert features, use with caution!)");
3730    out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
3731    out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
3732    out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
3733    out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
3734        + " Assumes META region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
3735    out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
3736    out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
3737    out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
3738    out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
3739    out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
3740    out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
3741    out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
3742    out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
3743    out.println("   -fixSplitParents  Try to force offline split parents to be online.");
3744    out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
3745    out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
3746    out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
3747
3748    out.println("");
3749    out.println("  Datafile Repair options: (expert features, use with caution!)");
3750    out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
3751    out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
3752
3753    out.println("");
3754    out.println("  Metadata Repair shortcuts");
3755    out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
3756        "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles");
3757    out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
3758
3759    out.flush();
3760    errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
3761
3762    setRetCode(-2);
3763    return this;
3764  }
3765
3766  /**
3767   * Main program
3768   *
3769   * @param args
3770   * @throws Exception
3771   */
3772  public static void main(String[] args) throws Exception {
3773    // create a fsck object
3774    Configuration conf = HBaseConfiguration.create();
3775    Path hbasedir = new Path(conf.get(HConstants.HBASE_DIR));
3776    URI defaultFs = hbasedir.getFileSystem(conf).getUri();
3777    conf.set("fs.defaultFS", defaultFs.toString());     // for hadoop 0.21+
3778    conf.set("fs.default.name", defaultFs.toString());  // for hadoop 0.20
3779    int ret = ToolRunner.run(new HBaseFsck(conf), args);
3780    System.exit(ret);
3781  }
3782
3783  @Override
3784  public int run(String[] args) throws Exception {
3785    // reset the numThreads due to user may set it via generic options
3786    initialPoolNumThreads();
3787    
3788    exec(executor, args);
3789    return getRetCode();
3790  }
3791
3792  public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
3793    InterruptedException {
3794    long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
3795
3796    boolean checkCorruptHFiles = false;
3797    boolean sidelineCorruptHFiles = false;
3798
3799    // Process command-line args.
3800    for (int i = 0; i < args.length; i++) {
3801      String cmd = args[i];
3802      if (cmd.equals("-help") || cmd.equals("-h")) {
3803        return printUsageAndExit();
3804      } else if (cmd.equals("-details")) {
3805        setDisplayFullReport();
3806      } else if (cmd.equals("-timelag")) {
3807        if (i == args.length - 1) {
3808          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
3809          return printUsageAndExit();
3810        }
3811        try {
3812          long timelag = Long.parseLong(args[i+1]);
3813          setTimeLag(timelag);
3814        } catch (NumberFormatException e) {
3815          errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
3816          return printUsageAndExit();
3817        }
3818        i++;
3819      } else if (cmd.equals("-sleepBeforeRerun")) {
3820        if (i == args.length - 1) {
3821          errors.reportError(ERROR_CODE.WRONG_USAGE,
3822            "HBaseFsck: -sleepBeforeRerun needs a value.");
3823          return printUsageAndExit();
3824        }
3825        try {
3826          sleepBeforeRerun = Long.parseLong(args[i+1]);
3827        } catch (NumberFormatException e) {
3828          errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
3829          return printUsageAndExit();
3830        }
3831        i++;
3832      } else if (cmd.equals("-sidelineDir")) {
3833        if (i == args.length - 1) {
3834          errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
3835          return printUsageAndExit();
3836        }
3837        i++;
3838        setSidelineDir(args[i]);
3839      } else if (cmd.equals("-fix")) {
3840        errors.reportError(ERROR_CODE.WRONG_USAGE,
3841          "This option is deprecated, please use  -fixAssignments instead.");
3842        setFixAssignments(true);
3843      } else if (cmd.equals("-fixAssignments")) {
3844        setFixAssignments(true);
3845      } else if (cmd.equals("-fixMeta")) {
3846        setFixMeta(true);
3847      } else if (cmd.equals("-noHdfsChecking")) {
3848        setCheckHdfs(false);
3849      } else if (cmd.equals("-fixHdfsHoles")) {
3850        setFixHdfsHoles(true);
3851      } else if (cmd.equals("-fixHdfsOrphans")) {
3852        setFixHdfsOrphans(true);
3853      } else if (cmd.equals("-fixTableOrphans")) {
3854        setFixTableOrphans(true);
3855      } else if (cmd.equals("-fixHdfsOverlaps")) {
3856        setFixHdfsOverlaps(true);
3857      } else if (cmd.equals("-fixVersionFile")) {
3858        setFixVersionFile(true);
3859      } else if (cmd.equals("-sidelineBigOverlaps")) {
3860        setSidelineBigOverlaps(true);
3861      } else if (cmd.equals("-fixSplitParents")) {
3862        setFixSplitParents(true);
3863      } else if (cmd.equals("-ignorePreCheckPermission")) {
3864        setIgnorePreCheckPermission(true);
3865      } else if (cmd.equals("-checkCorruptHFiles")) {
3866        checkCorruptHFiles = true;
3867      } else if (cmd.equals("-sidelineCorruptHFiles")) {
3868        sidelineCorruptHFiles = true;
3869      } else if (cmd.equals("-fixReferenceFiles")) {
3870        setFixReferenceFiles(true);
3871      } else if (cmd.equals("-repair")) {
3872        // this attempts to merge overlapping hdfs regions, needs testing
3873        // under load
3874        setFixHdfsHoles(true);
3875        setFixHdfsOrphans(true);
3876        setFixMeta(true);
3877        setFixAssignments(true);
3878        setFixHdfsOverlaps(true);
3879        setFixVersionFile(true);
3880        setSidelineBigOverlaps(true);
3881        setFixSplitParents(false);
3882        setCheckHdfs(true);
3883        setFixReferenceFiles(true);
3884      } else if (cmd.equals("-repairHoles")) {
3885        // this will make all missing hdfs regions available but may lose data
3886        setFixHdfsHoles(true);
3887        setFixHdfsOrphans(false);
3888        setFixMeta(true);
3889        setFixAssignments(true);
3890        setFixHdfsOverlaps(false);
3891        setSidelineBigOverlaps(false);
3892        setFixSplitParents(false);
3893        setCheckHdfs(true);
3894      } else if (cmd.equals("-maxOverlapsToSideline")) {
3895        if (i == args.length - 1) {
3896          errors.reportError(ERROR_CODE.WRONG_USAGE,
3897            "-maxOverlapsToSideline needs a numeric value argument.");
3898          return printUsageAndExit();
3899        }
3900        try {
3901          int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
3902          setMaxOverlapsToSideline(maxOverlapsToSideline);
3903        } catch (NumberFormatException e) {
3904          errors.reportError(ERROR_CODE.WRONG_USAGE,
3905            "-maxOverlapsToSideline needs a numeric value argument.");
3906          return printUsageAndExit();
3907        }
3908        i++;
3909      } else if (cmd.equals("-maxMerge")) {
3910        if (i == args.length - 1) {
3911          errors.reportError(ERROR_CODE.WRONG_USAGE,
3912            "-maxMerge needs a numeric value argument.");
3913          return printUsageAndExit();
3914        }
3915        try {
3916          int maxMerge = Integer.parseInt(args[i+1]);
3917          setMaxMerge(maxMerge);
3918        } catch (NumberFormatException e) {
3919          errors.reportError(ERROR_CODE.WRONG_USAGE,
3920            "-maxMerge needs a numeric value argument.");
3921          return printUsageAndExit();
3922        }
3923        i++;
3924      } else if (cmd.equals("-summary")) {
3925        setSummary();
3926      } else if (cmd.equals("-metaonly")) {
3927        setCheckMetaOnly();
3928      } else if (cmd.equals("-boundaries")) {
3929        setRegionBoundariesCheck();
3930      } else if (cmd.startsWith("-")) {
3931        errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
3932        return printUsageAndExit();
3933      } else {
3934        includeTable(cmd);
3935        errors.print("Allow checking/fixes for table: " + cmd);
3936      }
3937    }
3938
3939    // pre-check current user has FS write permission or not
3940    try {
3941      preCheckPermission();
3942    } catch (AccessControlException ace) {
3943      Runtime.getRuntime().exit(-1);
3944    } catch (IOException ioe) {
3945      Runtime.getRuntime().exit(-1);
3946    }
3947
3948    // do the real work of hbck
3949    connect();
3950
3951    try {
3952      // if corrupt file mode is on, first fix them since they may be opened later
3953      if (checkCorruptHFiles || sidelineCorruptHFiles) {
3954        LOG.info("Checking all hfiles for corruption");
3955        HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
3956        setHFileCorruptionChecker(hfcc); // so we can get result
3957        Collection<String> tables = getIncludedTables();
3958        Collection<Path> tableDirs = new ArrayList<Path>();
3959        Path rootdir = FSUtils.getRootDir(getConf());
3960        if (tables.size() > 0) {
3961          for (String t : tables) {
3962            tableDirs.add(FSUtils.getTablePath(rootdir, t));
3963          }
3964        } else {
3965          tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
3966        }
3967        hfcc.checkTables(tableDirs);
3968        hfcc.report(errors);
3969      }
3970
3971      // check and fix table integrity, region consistency.
3972      int code = onlineHbck();
3973      setRetCode(code);
3974      // If we have changed the HBase state it is better to run hbck again
3975      // to see if we haven't broken something else in the process.
3976      // We run it only once more because otherwise we can easily fall into
3977      // an infinite loop.
3978      if (shouldRerun()) {
3979        try {
3980          LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
3981          Thread.sleep(sleepBeforeRerun);
3982        } catch (InterruptedException ie) {
3983          return this;
3984        }
3985        // Just report
3986        setFixAssignments(false);
3987        setFixMeta(false);
3988        setFixHdfsHoles(false);
3989        setFixHdfsOverlaps(false);
3990        setFixVersionFile(false);
3991        setFixTableOrphans(false);
3992        errors.resetErrors();
3993        code = onlineHbck();
3994        setRetCode(code);
3995      }
3996    } finally {
3997      IOUtils.cleanup(null, connection, meta, admin);
3998    }
3999    return this;
4000  }
4001
4002  /**
4003   * ls -r for debugging purposes
4004   */
4005  void debugLsr(Path p) throws IOException {
4006    debugLsr(getConf(), p, errors);
4007  }
4008
4009  /**
4010   * ls -r for debugging purposes
4011   */
4012  public static void debugLsr(Configuration conf,
4013      Path p) throws IOException {
4014    debugLsr(conf, p, new PrintingErrorReporter());
4015  }
4016
4017  /**
4018   * ls -r for debugging purposes
4019   */
4020  public static void debugLsr(Configuration conf,
4021      Path p, ErrorReporter errors) throws IOException {
4022    if (!LOG.isDebugEnabled() || p == null) {
4023      return;
4024    }
4025    FileSystem fs = p.getFileSystem(conf);
4026
4027    if (!fs.exists(p)) {
4028      // nothing
4029      return;
4030    }
4031    errors.print(p.toString());
4032
4033    if (fs.isFile(p)) {
4034      return;
4035    }
4036
4037    if (fs.getFileStatus(p).isDir()) {
4038      FileStatus[] fss= fs.listStatus(p);
4039      for (FileStatus status : fss) {
4040        debugLsr(conf, status.getPath(), errors);
4041      }
4042    }
4043  }
4044}