View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.IOException;
22  import java.io.InterruptedIOException;
23  import java.lang.reflect.Constructor;
24  import java.lang.reflect.InvocationTargetException;
25  import java.net.InetAddress;
26  import java.net.InetSocketAddress;
27  import java.net.UnknownHostException;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Collection;
31  import java.util.Collections;
32  import java.util.Comparator;
33  import java.util.HashSet;
34  import java.util.Iterator;
35  import java.util.List;
36  import java.util.Map;
37  import java.util.Set;
38  import java.util.concurrent.TimeUnit;
39  import java.util.concurrent.atomic.AtomicReference;
40  import java.util.regex.Pattern;
41  
42  import javax.servlet.ServletException;
43  import javax.servlet.http.HttpServlet;
44  import javax.servlet.http.HttpServletRequest;
45  import javax.servlet.http.HttpServletResponse;
46  
47  import org.apache.commons.logging.Log;
48  import org.apache.commons.logging.LogFactory;
49  import org.apache.hadoop.conf.Configuration;
50  import org.apache.hadoop.fs.Path;
51  import org.apache.hadoop.hbase.ClusterStatus;
52  import org.apache.hadoop.hbase.CoordinatedStateException;
53  import org.apache.hadoop.hbase.CoordinatedStateManager;
54  import org.apache.hadoop.hbase.DoNotRetryIOException;
55  import org.apache.hadoop.hbase.HBaseIOException;
56  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
57  import org.apache.hadoop.hbase.HColumnDescriptor;
58  import org.apache.hadoop.hbase.HConstants;
59  import org.apache.hadoop.hbase.HRegionInfo;
60  import org.apache.hadoop.hbase.HRegionLocation;
61  import org.apache.hadoop.hbase.HTableDescriptor;
62  import org.apache.hadoop.hbase.MasterNotRunningException;
63  import org.apache.hadoop.hbase.MetaTableAccessor;
64  import org.apache.hadoop.hbase.NamespaceDescriptor;
65  import org.apache.hadoop.hbase.NamespaceNotFoundException;
66  import org.apache.hadoop.hbase.PleaseHoldException;
67  import org.apache.hadoop.hbase.RegionLocations;
68  import org.apache.hadoop.hbase.Server;
69  import org.apache.hadoop.hbase.ServerLoad;
70  import org.apache.hadoop.hbase.ServerName;
71  import org.apache.hadoop.hbase.TableDescriptors;
72  import org.apache.hadoop.hbase.TableName;
73  import org.apache.hadoop.hbase.TableNotDisabledException;
74  import org.apache.hadoop.hbase.TableNotFoundException;
75  import org.apache.hadoop.hbase.UnknownRegionException;
76  import org.apache.hadoop.hbase.classification.InterfaceAudience;
77  import org.apache.hadoop.hbase.client.MetaScanner;
78  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
79  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
80  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
81  import org.apache.hadoop.hbase.client.Result;
82  import org.apache.hadoop.hbase.client.TableState;
83  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
84  import org.apache.hadoop.hbase.exceptions.DeserializationException;
85  import org.apache.hadoop.hbase.executor.ExecutorType;
86  import org.apache.hadoop.hbase.ipc.RequestContext;
87  import org.apache.hadoop.hbase.ipc.RpcServer;
88  import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
89  import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
90  import org.apache.hadoop.hbase.master.balancer.BalancerChore;
91  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
92  import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
93  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
94  import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
95  import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
96  import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
97  import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
98  import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
99  import org.apache.hadoop.hbase.master.handler.DispatchMergingRegionHandler;
100 import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
101 import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
102 import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
103 import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
104 import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
105 import org.apache.hadoop.hbase.master.handler.TruncateTableHandler;
106 import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
107 import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
108 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
109 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
110 import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
111 import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
112 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
113 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
114 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
115 import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
116 import org.apache.hadoop.hbase.quotas.RegionStateListener;
117 import org.apache.hadoop.hbase.regionserver.HRegionServer;
118 import org.apache.hadoop.hbase.regionserver.RSRpcServices;
119 import org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost;
120 import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
121 import org.apache.hadoop.hbase.replication.regionserver.Replication;
122 import org.apache.hadoop.hbase.security.UserProvider;
123 import org.apache.hadoop.hbase.util.Addressing;
124 import org.apache.hadoop.hbase.util.Bytes;
125 import org.apache.hadoop.hbase.util.CompressionTest;
126 import org.apache.hadoop.hbase.util.EncryptionTest;
127 import org.apache.hadoop.hbase.util.FSUtils;
128 import org.apache.hadoop.hbase.util.HBaseFsckRepair;
129 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
130 import org.apache.hadoop.hbase.util.HasThread;
131 import org.apache.hadoop.hbase.util.Pair;
132 import org.apache.hadoop.hbase.util.Threads;
133 import org.apache.hadoop.hbase.util.VersionInfo;
134 import org.apache.hadoop.hbase.util.ZKDataMigrator;
135 import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
136 import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
137 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
138 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
139 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
140 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
141 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
142 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
143 import org.apache.zookeeper.KeeperException;
144 import org.mortbay.jetty.Connector;
145 import org.mortbay.jetty.nio.SelectChannelConnector;
146 import org.mortbay.jetty.servlet.Context;
147 
148 import com.google.common.annotations.VisibleForTesting;
149 import com.google.common.collect.Maps;
150 import com.google.protobuf.Descriptors;
151 import com.google.protobuf.Service;
152 
153 /**
154  * HMaster is the "master server" for HBase. An HBase cluster has one active
155  * master.  If many masters are started, all compete.  Whichever wins goes on to
156  * run the cluster.  All others park themselves in their constructor until
157  * master or cluster shutdown or until the active master loses its lease in
158  * zookeeper.  Thereafter, all running master jostle to take over master role.
159  *
160  * <p>The Master can be asked shutdown the cluster. See {@link #shutdown()}.  In
161  * this case it will tell all regionservers to go down and then wait on them
162  * all reporting in that they are down.  This master will then shut itself down.
163  *
164  * <p>You can also shutdown just this master.  Call {@link #stopMaster()}.
165  *
166  * @see org.apache.zookeeper.Watcher
167  */
168 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
169 @SuppressWarnings("deprecation")
170 public class HMaster extends HRegionServer implements MasterServices, Server {
171   private static final Log LOG = LogFactory.getLog(HMaster.class.getName());
172 
173   /**
174    * Protection against zombie master. Started once Master accepts active responsibility and
175    * starts taking over responsibilities. Allows a finite time window before giving up ownership.
176    */
177   private static class InitializationMonitor extends HasThread {
178     /** The amount of time in milliseconds to sleep before checking initialization status. */
179     public static final String TIMEOUT_KEY = "hbase.master.initializationmonitor.timeout";
180     public static final long TIMEOUT_DEFAULT = TimeUnit.MILLISECONDS.convert(15, TimeUnit.MINUTES);
181 
182     /**
183      * When timeout expired and initialization has not complete, call {@link System#exit(int)} when
184      * true, do nothing otherwise.
185      */
186     public static final String HALT_KEY = "hbase.master.initializationmonitor.haltontimeout";
187     public static final boolean HALT_DEFAULT = false;
188 
189     private final HMaster master;
190     private final long timeout;
191     private final boolean haltOnTimeout;
192 
193     /** Creates a Thread that monitors the {@link #isInitialized()} state. */
194     InitializationMonitor(HMaster master) {
195       super("MasterInitializationMonitor");
196       this.master = master;
197       this.timeout = master.getConfiguration().getLong(TIMEOUT_KEY, TIMEOUT_DEFAULT);
198       this.haltOnTimeout = master.getConfiguration().getBoolean(HALT_KEY, HALT_DEFAULT);
199       this.setDaemon(true);
200     }
201 
202     @Override
203     public void run() {
204       try {
205         while (!master.isStopped() && master.isActiveMaster()) {
206           Thread.sleep(timeout);
207           if (master.isInitialized()) {
208             LOG.debug("Initialization completed within allotted tolerance. Monitor exiting.");
209           } else {
210             LOG.error("Master failed to complete initialization after " + timeout + "ms. Please"
211                 + " consider submitting a bug report including a thread dump of this process.");
212             if (haltOnTimeout) {
213               LOG.error("Zombie Master exiting. Thread dump to stdout");
214               Threads.printThreadInfo(System.out, "Zombie HMaster");
215               System.exit(-1);
216             }
217           }
218         }
219       } catch (InterruptedException ie) {
220         LOG.trace("InitMonitor thread interrupted. Existing.");
221       }
222     }
223   }
224 
225   // MASTER is name of the webapp and the attribute name used stuffing this
226   //instance into web context.
227   public static final String MASTER = "master";
228 
229   // Manager and zk listener for master election
230   private final ActiveMasterManager activeMasterManager;
231   // Region server tracker
232   RegionServerTracker regionServerTracker;
233   // Draining region server tracker
234   private DrainingServerTracker drainingServerTracker;
235   // Tracker for load balancer state
236   LoadBalancerTracker loadBalancerTracker;
237 
238   /** Namespace stuff */
239   private TableNamespaceManager tableNamespaceManager;
240 
241   // Metrics for the HMaster
242   final MetricsMaster metricsMaster;
243   // file system manager for the master FS operations
244   private MasterFileSystem fileSystemManager;
245 
246   // server manager to deal with region server info
247   volatile ServerManager serverManager;
248 
249   // manager of assignment nodes in zookeeper
250   AssignmentManager assignmentManager;
251 
252   // buffer for "fatal error" notices from region servers
253   // in the cluster. This is only used for assisting
254   // operations/debugging.
255   MemoryBoundedLogMessageBuffer rsFatals;
256 
257   // flag set after we become the active master (used for testing)
258   private volatile boolean isActiveMaster = false;
259 
260   // flag set after we complete initialization once active,
261   // it is not private since it's used in unit tests
262   volatile boolean initialized = false;
263 
264   // flag set after master services are started,
265   // initialization may have not completed yet.
266   volatile boolean serviceStarted = false;
267 
268   // flag set after we complete assignMeta.
269   private volatile boolean serverShutdownHandlerEnabled = false;
270 
271   LoadBalancer balancer;
272   private BalancerChore balancerChore;
273   private ClusterStatusChore clusterStatusChore;
274   private ClusterStatusPublisher clusterStatusPublisherChore = null;
275 
276   CatalogJanitor catalogJanitorChore;
277   private LogCleaner logCleaner;
278   private HFileCleaner hfileCleaner;
279 
280   MasterCoprocessorHost cpHost;
281 
282   private final boolean preLoadTableDescriptors;
283 
284   // Time stamps for when a hmaster became active
285   private long masterActiveTime;
286 
287   //should we check the compression codec type at master side, default true, HBASE-6370
288   private final boolean masterCheckCompression;
289 
290   //should we check encryption settings at master side, default true
291   private final boolean masterCheckEncryption;
292 
293   Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
294 
295   // monitor for snapshot of hbase tables
296   SnapshotManager snapshotManager;
297   // monitor for distributed procedures
298   MasterProcedureManagerHost mpmHost;
299 
300   private MasterQuotaManager quotaManager;
301 
302   // handle table states
303   private TableStateManager tableStateManager;
304 
305   /** flag used in test cases in order to simulate RS failures during master initialization */
306   private volatile boolean initializationBeforeMetaAssignment = false;
307 
308   /** jetty server for master to redirect requests to regionserver infoServer */
309   private org.mortbay.jetty.Server masterJettyServer;
310 
311   public static class RedirectServlet extends HttpServlet {
312     private static final long serialVersionUID = 2894774810058302472L;
313     private static int regionServerInfoPort;
314 
315     @Override
316     public void doGet(HttpServletRequest request,
317         HttpServletResponse response) throws ServletException, IOException {
318       String redirectUrl = request.getScheme() + "://"
319         + request.getServerName() + ":" + regionServerInfoPort
320         + request.getRequestURI();
321       response.sendRedirect(redirectUrl);
322     }
323   }
324 
325   /**
326    * Initializes the HMaster. The steps are as follows:
327    * <p>
328    * <ol>
329    * <li>Initialize the local HRegionServer
330    * <li>Start the ActiveMasterManager.
331    * </ol>
332    * <p>
333    * Remaining steps of initialization occur in
334    * #finishActiveMasterInitialization(MonitoredTask) after
335    * the master becomes the active one.
336    *
337    * @throws KeeperException
338    * @throws IOException
339    */
340   public HMaster(final Configuration conf, CoordinatedStateManager csm)
341       throws IOException, KeeperException {
342     super(conf, csm);
343     this.rsFatals = new MemoryBoundedLogMessageBuffer(
344       conf.getLong("hbase.master.buffer.for.rs.fatals", 1*1024*1024));
345 
346     LOG.info("hbase.rootdir=" + FSUtils.getRootDir(this.conf) +
347         ", hbase.cluster.distributed=" + this.conf.getBoolean(HConstants.CLUSTER_DISTRIBUTED, false));
348 
349     // Disable usage of meta replicas in the master
350     this.conf.setBoolean(HConstants.USE_META_REPLICAS, false);
351 
352     Replication.decorateMasterConfiguration(this.conf);
353 
354     // Hack! Maps DFSClient => Master for logs.  HDFS made this
355     // config param for task trackers, but we can piggyback off of it.
356     if (this.conf.get("mapreduce.task.attempt.id") == null) {
357       this.conf.set("mapreduce.task.attempt.id", "hb_m_" + this.serverName.toString());
358     }
359 
360     // should we check the compression codec type at master side, default true, HBASE-6370
361     this.masterCheckCompression = conf.getBoolean("hbase.master.check.compression", true);
362 
363     // should we check encryption settings at master side, default true
364     this.masterCheckEncryption = conf.getBoolean("hbase.master.check.encryption", true);
365 
366     this.metricsMaster = new MetricsMaster( new MetricsMasterWrapperImpl(this));
367 
368     // preload table descriptor at startup
369     this.preLoadTableDescriptors = conf.getBoolean("hbase.master.preload.tabledescriptors", true);
370 
371     // Do we publish the status?
372 
373     boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED,
374         HConstants.STATUS_PUBLISHED_DEFAULT);
375     Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
376         conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
377             ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS,
378             ClusterStatusPublisher.Publisher.class);
379 
380     if (shouldPublish) {
381       if (publisherClass == null) {
382         LOG.warn(HConstants.STATUS_PUBLISHED + " is true, but " +
383             ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS +
384             " is not set - not publishing status");
385       } else {
386         clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass);
387         getChoreService().scheduleChore(clusterStatusPublisherChore);
388       }
389     }
390     activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName, this);
391     int infoPort = putUpJettyServer();
392     startActiveMasterManager(infoPort);
393   }
394 
395   // return the actual infoPort, -1 means disable info server.
396   private int putUpJettyServer() throws IOException {
397     if (!conf.getBoolean("hbase.master.infoserver.redirect", true)) {
398       return -1;
399     }
400     int infoPort = conf.getInt("hbase.master.info.port.orig",
401       HConstants.DEFAULT_MASTER_INFOPORT);
402     // -1 is for disabling info server, so no redirecting
403     if (infoPort < 0 || infoServer == null) {
404       return -1;
405     }
406     String addr = conf.get("hbase.master.info.bindAddress", "0.0.0.0");
407     if (!Addressing.isLocalAddress(InetAddress.getByName(addr))) {
408       String msg =
409           "Failed to start redirecting jetty server. Address " + addr
410               + " does not belong to this host. Correct configuration parameter: "
411               + "hbase.master.info.bindAddress";
412       LOG.error(msg);
413       throw new IOException(msg);
414     }
415 
416     RedirectServlet.regionServerInfoPort = infoServer.getPort();
417     masterJettyServer = new org.mortbay.jetty.Server();
418     Connector connector = new SelectChannelConnector();
419     connector.setHost(addr);
420     connector.setPort(infoPort);
421     masterJettyServer.addConnector(connector);
422     masterJettyServer.setStopAtShutdown(true);
423     Context context = new Context(masterJettyServer, "/", Context.NO_SESSIONS);
424     context.addServlet(RedirectServlet.class, "/*");
425     try {
426       masterJettyServer.start();
427     } catch (Exception e) {
428       throw new IOException("Failed to start redirecting jetty server", e);
429     }
430     return connector.getLocalPort();
431   }
432 
433   @Override
434   protected TableDescriptors getFsTableDescriptors() throws IOException {
435     return super.getFsTableDescriptors();
436   }
437 
438   /**
439    * For compatibility, if failed with regionserver credentials, try the master one
440    */
441   @Override
442   protected void login(UserProvider user, String host) throws IOException {
443     try {
444       super.login(user, host);
445     } catch (IOException ie) {
446       user.login("hbase.master.keytab.file",
447         "hbase.master.kerberos.principal", host);
448     }
449   }
450 
451   /**
452    * If configured to put regions on active master,
453    * wait till a backup master becomes active.
454    * Otherwise, loop till the server is stopped or aborted.
455    */
456   @Override
457   protected void waitForMasterActive(){
458     boolean tablesOnMaster = BaseLoadBalancer.tablesOnMaster(conf);
459     while (!(tablesOnMaster && isActiveMaster)
460         && !isStopped() && !isAborted()) {
461       sleeper.sleep();
462     }
463   }
464 
465   @VisibleForTesting
466   public MasterRpcServices getMasterRpcServices() {
467     return (MasterRpcServices)rpcServices;
468   }
469 
470   public boolean balanceSwitch(final boolean b) throws IOException {
471     return getMasterRpcServices().switchBalancer(b, BalanceSwitchMode.ASYNC);
472   }
473 
474   @Override
475   protected String getProcessName() {
476     return MASTER;
477   }
478 
479   @Override
480   protected boolean canCreateBaseZNode() {
481     return true;
482   }
483 
484   @Override
485   protected boolean canUpdateTableDescriptor() {
486     return true;
487   }
488 
489   @Override
490   protected RSRpcServices createRpcServices() throws IOException {
491     return new MasterRpcServices(this);
492   }
493 
494   @Override
495   protected void configureInfoServer() {
496     infoServer.addServlet("master-status", "/master-status", MasterStatusServlet.class);
497     infoServer.setAttribute(MASTER, this);
498     if (BaseLoadBalancer.tablesOnMaster(conf)) {
499       super.configureInfoServer();
500     }
501   }
502 
503   @Override
504   protected Class<? extends HttpServlet> getDumpServlet() {
505     return MasterDumpServlet.class;
506   }
507 
508   /**
509    * Emit the HMaster metrics, such as region in transition metrics.
510    * Surrounding in a try block just to be sure metrics doesn't abort HMaster.
511    */
512   @Override
513   protected void doMetrics() {
514     try {
515       if (assignmentManager != null) {
516         assignmentManager.updateRegionsInTransitionMetrics();
517       }
518     } catch (Throwable e) {
519       LOG.error("Couldn't update metrics: " + e.getMessage());
520     }
521   }
522 
523   MetricsMaster getMasterMetrics() {
524     return metricsMaster;
525   }
526 
527   /**
528    * Initialize all ZK based system trackers.
529    * @throws IOException
530    * @throws InterruptedException
531    * @throws KeeperException
532    * @throws CoordinatedStateException
533    */
534   void initializeZKBasedSystemTrackers() throws IOException,
535       InterruptedException, KeeperException, CoordinatedStateException {
536     this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
537     this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
538     this.loadBalancerTracker.start();
539     this.assignmentManager = new AssignmentManager(this, serverManager,
540       this.balancer, this.service, this.metricsMaster,
541       this.tableLockManager, tableStateManager);
542 
543     this.regionServerTracker = new RegionServerTracker(zooKeeper, this,
544         this.serverManager);
545     this.regionServerTracker.start();
546 
547     this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this,
548       this.serverManager);
549     this.drainingServerTracker.start();
550 
551     // Set the cluster as up.  If new RSs, they'll be waiting on this before
552     // going ahead with their startup.
553     boolean wasUp = this.clusterStatusTracker.isClusterUp();
554     if (!wasUp) this.clusterStatusTracker.setClusterUp();
555 
556     LOG.info("Server active/primary master=" + this.serverName +
557         ", sessionid=0x" +
558         Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
559         ", setting cluster-up flag (Was=" + wasUp + ")");
560 
561     // create/initialize the snapshot manager and other procedure managers
562     this.snapshotManager = new SnapshotManager();
563     this.mpmHost = new MasterProcedureManagerHost();
564     this.mpmHost.register(this.snapshotManager);
565     this.mpmHost.register(new MasterFlushTableProcedureManager());
566     this.mpmHost.loadProcedures(conf);
567     this.mpmHost.initialize(this, this.metricsMaster);
568 
569     // migrating existent table state from zk
570     for (Map.Entry<TableName, TableState.State> entry : ZKDataMigrator
571         .queryForTableStates(getZooKeeper()).entrySet()) {
572       LOG.info("Converting state from zk to new states:" + entry);
573       tableStateManager.setTableState(entry.getKey(), entry.getValue());
574     }
575     ZKUtil.deleteChildrenRecursively(getZooKeeper(), getZooKeeper().tableZNode);
576   }
577 
578   /**
579    * Finish initialization of HMaster after becoming the primary master.
580    *
581    * <ol>
582    * <li>Initialize master components - file system manager, server manager,
583    *     assignment manager, region server tracker, etc</li>
584    * <li>Start necessary service threads - balancer, catalog janior,
585    *     executor services, etc</li>
586    * <li>Set cluster as UP in ZooKeeper</li>
587    * <li>Wait for RegionServers to check-in</li>
588    * <li>Split logs and perform data recovery, if necessary</li>
589    * <li>Ensure assignment of meta/namespace regions<li>
590    * <li>Handle either fresh cluster start or master failover</li>
591    * </ol>
592    *
593    * @throws IOException
594    * @throws InterruptedException
595    * @throws KeeperException
596    * @throws CoordinatedStateException
597    */
598   private void finishActiveMasterInitialization(MonitoredTask status)
599       throws IOException, InterruptedException, KeeperException, CoordinatedStateException {
600 
601     isActiveMaster = true;
602     Thread zombieDetector = new Thread(new InitializationMonitor(this));
603     zombieDetector.start();
604 
605     /*
606      * We are active master now... go initialize components we need to run.
607      * Note, there may be dross in zk from previous runs; it'll get addressed
608      * below after we determine if cluster startup or failover.
609      */
610 
611     status.setStatus("Initializing Master file system");
612 
613     this.masterActiveTime = System.currentTimeMillis();
614     // TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
615     this.fileSystemManager = new MasterFileSystem(this, this);
616 
617     // enable table descriptors cache
618     this.tableDescriptors.setCacheOn();
619     // set the META's descriptor to the correct replication
620     this.tableDescriptors.get(TableName.META_TABLE_NAME).setRegionReplication(
621         conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM));
622     // warm-up HTDs cache on master initialization
623     if (preLoadTableDescriptors) {
624       status.setStatus("Pre-loading table descriptors");
625       this.tableDescriptors.getAll();
626     }
627 
628     // publish cluster ID
629     status.setStatus("Publishing Cluster ID in ZooKeeper");
630     ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
631     this.serverManager = createServerManager(this, this);
632 
633     setupClusterConnection();
634 
635     // Invalidate all write locks held previously
636     this.tableLockManager.reapWriteLocks();
637     this.tableStateManager = new TableStateManager(this);
638 
639     status.setStatus("Initializing ZK system trackers");
640     initializeZKBasedSystemTrackers();
641 
642     // initialize master side coprocessors before we start handling requests
643     status.setStatus("Initializing master coprocessors");
644     this.cpHost = new MasterCoprocessorHost(this, this.conf);
645 
646     // start up all service threads.
647     status.setStatus("Initializing master service threads");
648     startServiceThreads();
649 
650     // Wake up this server to check in
651     sleeper.skipSleepCycle();
652 
653     // Wait for region servers to report in
654     this.serverManager.waitForRegionServers(status);
655     // Check zk for region servers that are up but didn't register
656     for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
657       // The isServerOnline check is opportunistic, correctness is handled inside
658       if (!this.serverManager.isServerOnline(sn)
659           && serverManager.checkAndRecordNewServer(sn, ServerLoad.EMPTY_SERVERLOAD)) {
660         LOG.info("Registered server found up in zk but who has not yet reported in: " + sn);
661       }
662     }
663 
664     // get a list for previously failed RS which need log splitting work
665     // we recover hbase:meta region servers inside master initialization and
666     // handle other failed servers in SSH in order to start up master node ASAP
667     Set<ServerName> previouslyFailedServers = this.fileSystemManager
668         .getFailedServersFromLogFolders();
669 
670     // remove stale recovering regions from previous run
671     this.fileSystemManager.removeStaleRecoveringRegionsFromZK(previouslyFailedServers);
672 
673     // log splitting for hbase:meta server
674     ServerName oldMetaServerLocation = metaTableLocator.getMetaRegionLocation(this.getZooKeeper());
675     if (oldMetaServerLocation != null && previouslyFailedServers.contains(oldMetaServerLocation)) {
676       splitMetaLogBeforeAssignment(oldMetaServerLocation);
677       // Note: we can't remove oldMetaServerLocation from previousFailedServers list because it
678       // may also host user regions
679     }
680     Set<ServerName> previouslyFailedMetaRSs = getPreviouselyFailedMetaServersFromZK();
681     // need to use union of previouslyFailedMetaRSs recorded in ZK and previouslyFailedServers
682     // instead of previouslyFailedMetaRSs alone to address the following two situations:
683     // 1) the chained failure situation(recovery failed multiple times in a row).
684     // 2) master get killed right before it could delete the recovering hbase:meta from ZK while the
685     // same server still has non-meta wals to be replayed so that
686     // removeStaleRecoveringRegionsFromZK can't delete the stale hbase:meta region
687     // Passing more servers into splitMetaLog is all right. If a server doesn't have hbase:meta wal,
688     // there is no op for the server.
689     previouslyFailedMetaRSs.addAll(previouslyFailedServers);
690 
691     this.initializationBeforeMetaAssignment = true;
692 
693     // Wait for regionserver to finish initialization.
694     if (BaseLoadBalancer.tablesOnMaster(conf)) {
695       waitForServerOnline();
696     }
697 
698     //initialize load balancer
699     this.balancer.setClusterStatus(getClusterStatus());
700     this.balancer.setMasterServices(this);
701     this.balancer.initialize();
702 
703     // Check if master is shutting down because of some issue
704     // in initializing the regionserver or the balancer.
705     if(isStopped()) return;
706 
707     // Make sure meta assigned before proceeding.
708     status.setStatus("Assigning Meta Region");
709     assignMeta(status, previouslyFailedMetaRSs, HRegionInfo.DEFAULT_REPLICA_ID);
710     // check if master is shutting down because above assignMeta could return even hbase:meta isn't
711     // assigned when master is shutting down
712     if(isStopped()) return;
713 
714     status.setStatus("Submitting log splitting work for previously failed region servers");
715     // Master has recovered hbase:meta region server and we put
716     // other failed region servers in a queue to be handled later by SSH
717     for (ServerName tmpServer : previouslyFailedServers) {
718       this.serverManager.processDeadServer(tmpServer, true);
719     }
720 
721     // Fix up assignment manager status
722     status.setStatus("Starting assignment manager");
723     this.assignmentManager.joinCluster();
724 
725     //set cluster status again after user regions are assigned
726     this.balancer.setClusterStatus(getClusterStatus());
727 
728     // Start balancer and meta catalog janitor after meta and regions have
729     // been assigned.
730     status.setStatus("Starting balancer and catalog janitor");
731     this.clusterStatusChore = new ClusterStatusChore(this, balancer);
732     getChoreService().scheduleChore(clusterStatusChore);
733     this.balancerChore = new BalancerChore(this);
734     getChoreService().scheduleChore(balancerChore);
735     this.catalogJanitorChore = new CatalogJanitor(this, this);
736     getChoreService().scheduleChore(catalogJanitorChore);
737 
738     status.setStatus("Starting namespace manager");
739     initNamespace();
740 
741     if (this.cpHost != null) {
742       try {
743         this.cpHost.preMasterInitialization();
744       } catch (IOException e) {
745         LOG.error("Coprocessor preMasterInitialization() hook failed", e);
746       }
747     }
748 
749     status.markComplete("Initialization successful");
750     LOG.info("Master has completed initialization");
751     configurationManager.registerObserver(this.balancer);
752     initialized = true;
753     // assign the meta replicas
754     Set<ServerName> EMPTY_SET = new HashSet<ServerName>();
755     int numReplicas = conf.getInt(HConstants.META_REPLICAS_NUM,
756            HConstants.DEFAULT_META_REPLICA_NUM);
757     for (int i = 1; i < numReplicas; i++) {
758       assignMeta(status, EMPTY_SET, i);
759     }
760     unassignExcessMetaReplica(zooKeeper, numReplicas);
761 
762     status.setStatus("Starting quota manager");
763     initQuotaManager();
764 
765     // clear the dead servers with same host name and port of online server because we are not
766     // removing dead server with same hostname and port of rs which is trying to check in before
767     // master initialization. See HBASE-5916.
768     this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
769 
770     if (this.cpHost != null) {
771       // don't let cp initialization errors kill the master
772       try {
773         this.cpHost.postStartMaster();
774       } catch (IOException ioe) {
775         LOG.error("Coprocessor postStartMaster() hook failed", ioe);
776       }
777     }
778 
779     zombieDetector.interrupt();
780   }
781 
782   /**
783    * Create a {@link ServerManager} instance.
784    * @param master
785    * @param services
786    * @return An instance of {@link ServerManager}
787    * @throws org.apache.hadoop.hbase.ZooKeeperConnectionException
788    * @throws IOException
789    */
790   ServerManager createServerManager(final Server master,
791       final MasterServices services)
792   throws IOException {
793     // We put this out here in a method so can do a Mockito.spy and stub it out
794     // w/ a mocked up ServerManager.
795     return new ServerManager(master, services);
796   }
797 
798   private void unassignExcessMetaReplica(ZooKeeperWatcher zkw, int numMetaReplicasConfigured) {
799     // unassign the unneeded replicas (for e.g., if the previous master was configured
800     // with a replication of 3 and now it is 2, we need to unassign the 1 unneeded replica)
801     try {
802       List<String> metaReplicaZnodes = zooKeeper.getMetaReplicaNodes();
803       for (String metaReplicaZnode : metaReplicaZnodes) {
804         int replicaId = zooKeeper.getMetaReplicaIdFromZnode(metaReplicaZnode);
805         if (replicaId >= numMetaReplicasConfigured) {
806           RegionState r = MetaTableLocator.getMetaRegionState(zkw, replicaId);
807           LOG.info("Closing excess replica of meta region " + r.getRegion());
808           // send a close and wait for a max of 30 seconds
809           ServerManager.closeRegionSilentlyAndWait(getConnection(), r.getServerName(),
810               r.getRegion(), 30000);
811           ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(replicaId));
812         }
813       }
814     } catch (Exception ex) {
815       // ignore the exception since we don't want the master to be wedged due to potential
816       // issues in the cleanup of the extra regions. We can do that cleanup via hbck or manually
817       LOG.warn("Ignoring exception " + ex);
818     }
819   }
820 
821   /**
822    * Check <code>hbase:meta</code> is assigned. If not, assign it.
823    * @param status MonitoredTask
824    * @param previouslyFailedMetaRSs
825    * @param replicaId
826    * @throws InterruptedException
827    * @throws IOException
828    * @throws KeeperException
829    */
830   void assignMeta(MonitoredTask status, Set<ServerName> previouslyFailedMetaRSs, int replicaId)
831       throws InterruptedException, IOException, KeeperException {
832     // Work on meta region
833     int assigned = 0;
834     long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
835     if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
836       status.setStatus("Assigning hbase:meta region");
837     } else {
838       status.setStatus("Assigning hbase:meta region, replicaId " + replicaId);
839     }
840 
841     // Get current meta state from zk.
842     RegionState metaState = MetaTableLocator.getMetaRegionState(getZooKeeper(), replicaId);
843     HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO,
844         replicaId);
845     RegionStates regionStates = assignmentManager.getRegionStates();
846     regionStates.createRegionState(hri, metaState.getState(),
847         metaState.getServerName(), null);
848 
849     if (!metaState.isOpened() || !metaTableLocator.verifyMetaRegionLocation(
850         this.getConnection(), this.getZooKeeper(), timeout, replicaId)) {
851       ServerName currentMetaServer = metaState.getServerName();
852       if (serverManager.isServerOnline(currentMetaServer)) {
853         if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
854           LOG.info("Meta was in transition on " + currentMetaServer);
855         } else {
856           LOG.info("Meta with replicaId " + replicaId + " was in transition on " +
857                     currentMetaServer);
858         }
859         assignmentManager.processRegionsInTransition(Arrays.asList(metaState));
860       } else {
861         if (currentMetaServer != null) {
862           if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
863             splitMetaLogBeforeAssignment(currentMetaServer);
864             regionStates.logSplit(HRegionInfo.FIRST_META_REGIONINFO);
865             previouslyFailedMetaRSs.add(currentMetaServer);
866           }
867         }
868         LOG.info("Re-assigning hbase:meta with replicaId, " + replicaId +
869             " it was on " + currentMetaServer);
870         assignmentManager.assignMeta(hri);
871       }
872       assigned++;
873     }
874 
875     if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID)
876       getTableStateManager().setTableState(TableName.META_TABLE_NAME, TableState.State.ENABLED);
877     // TODO: should we prevent from using state manager before meta was initialized?
878     // tableStateManager.start();
879 
880     if ((RecoveryMode.LOG_REPLAY == this.getMasterFileSystem().getLogRecoveryMode())
881         && (!previouslyFailedMetaRSs.isEmpty())) {
882       // replay WAL edits mode need new hbase:meta RS is assigned firstly
883       status.setStatus("replaying log for Meta Region");
884       this.fileSystemManager.splitMetaLog(previouslyFailedMetaRSs);
885     }
886 
887     this.assignmentManager.setEnabledTable(TableName.META_TABLE_NAME);
888     tableStateManager.start();
889 
890     // Make sure a hbase:meta location is set. We need to enable SSH here since
891     // if the meta region server is died at this time, we need it to be re-assigned
892     // by SSH so that system tables can be assigned.
893     // No need to wait for meta is assigned = 0 when meta is just verified.
894     if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) enableServerShutdownHandler(assigned != 0);
895     LOG.info("hbase:meta with replicaId " + replicaId + " assigned=" + assigned + ", location="
896       + metaTableLocator.getMetaRegionLocation(this.getZooKeeper(), replicaId));
897     status.setStatus("META assigned.");
898   }
899 
900   void initNamespace() throws IOException {
901     //create namespace manager
902     tableNamespaceManager = new TableNamespaceManager(this);
903     tableNamespaceManager.start();
904   }
905 
906   void initQuotaManager() throws IOException {
907     quotaManager = new MasterQuotaManager(this);
908     this.assignmentManager.setRegionStateListener((RegionStateListener)quotaManager);
909     quotaManager.start();
910   }
911 
912   boolean isCatalogJanitorEnabled() {
913     return catalogJanitorChore != null ?
914       catalogJanitorChore.getEnabled() : false;
915   }
916 
917   private void splitMetaLogBeforeAssignment(ServerName currentMetaServer) throws IOException {
918     if (RecoveryMode.LOG_REPLAY == this.getMasterFileSystem().getLogRecoveryMode()) {
919       // In log replay mode, we mark hbase:meta region as recovering in ZK
920       Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
921       regions.add(HRegionInfo.FIRST_META_REGIONINFO);
922       this.fileSystemManager.prepareLogReplay(currentMetaServer, regions);
923     } else {
924       // In recovered.edits mode: create recovered edits file for hbase:meta server
925       this.fileSystemManager.splitMetaLog(currentMetaServer);
926     }
927   }
928 
929   private void enableServerShutdownHandler(
930       final boolean waitForMeta) throws IOException, InterruptedException {
931     // If ServerShutdownHandler is disabled, we enable it and expire those dead
932     // but not expired servers. This is required so that if meta is assigning to
933     // a server which dies after assignMeta starts assignment,
934     // SSH can re-assign it. Otherwise, we will be
935     // stuck here waiting forever if waitForMeta is specified.
936     if (!serverShutdownHandlerEnabled) {
937       serverShutdownHandlerEnabled = true;
938       this.serverManager.processQueuedDeadServers();
939     }
940 
941     if (waitForMeta) {
942       metaTableLocator.waitMetaRegionLocation(this.getZooKeeper());
943     }
944   }
945 
946   /**
947    * This function returns a set of region server names under hbase:meta recovering region ZK node
948    * @return Set of meta server names which were recorded in ZK
949    * @throws KeeperException
950    */
951   private Set<ServerName> getPreviouselyFailedMetaServersFromZK() throws KeeperException {
952     Set<ServerName> result = new HashSet<ServerName>();
953     String metaRecoveringZNode = ZKUtil.joinZNode(zooKeeper.recoveringRegionsZNode,
954       HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
955     List<String> regionFailedServers = ZKUtil.listChildrenNoWatch(zooKeeper, metaRecoveringZNode);
956     if (regionFailedServers == null) return result;
957 
958     for(String failedServer : regionFailedServers) {
959       ServerName server = ServerName.parseServerName(failedServer);
960       result.add(server);
961     }
962     return result;
963   }
964 
965   @Override
966   public TableDescriptors getTableDescriptors() {
967     return this.tableDescriptors;
968   }
969 
970   @Override
971   public ServerManager getServerManager() {
972     return this.serverManager;
973   }
974 
975   @Override
976   public MasterFileSystem getMasterFileSystem() {
977     return this.fileSystemManager;
978   }
979 
980   @Override
981   public TableStateManager getTableStateManager() {
982     return tableStateManager;
983   }
984 
985   /*
986    * Start up all services. If any of these threads gets an unhandled exception
987    * then they just die with a logged message.  This should be fine because
988    * in general, we do not expect the master to get such unhandled exceptions
989    *  as OOMEs; it should be lightly loaded. See what HRegionServer does if
990    *  need to install an unexpected exception handler.
991    */
992   private void startServiceThreads() throws IOException{
993    // Start the executor service pools
994    this.service.startExecutorService(ExecutorType.MASTER_OPEN_REGION,
995       conf.getInt("hbase.master.executor.openregion.threads", 5));
996    this.service.startExecutorService(ExecutorType.MASTER_CLOSE_REGION,
997       conf.getInt("hbase.master.executor.closeregion.threads", 5));
998    this.service.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
999       conf.getInt("hbase.master.executor.serverops.threads", 5));
1000    this.service.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
1001       conf.getInt("hbase.master.executor.serverops.threads", 5));
1002    this.service.startExecutorService(ExecutorType.M_LOG_REPLAY_OPS,
1003       conf.getInt("hbase.master.executor.logreplayops.threads", 10));
1004 
1005    // We depend on there being only one instance of this executor running
1006    // at a time.  To do concurrency, would need fencing of enable/disable of
1007    // tables.
1008    // Any time changing this maxThreads to > 1, pls see the comment at
1009    // AccessController#postCreateTableHandler
1010    this.service.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
1011 
1012    // Start log cleaner thread
1013    int cleanerInterval = conf.getInt("hbase.master.cleaner.interval", 60 * 1000);
1014    this.logCleaner =
1015       new LogCleaner(cleanerInterval,
1016          this, conf, getMasterFileSystem().getFileSystem(),
1017          getMasterFileSystem().getOldLogDir());
1018     getChoreService().scheduleChore(logCleaner);
1019 
1020    //start the hfile archive cleaner thread
1021     Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
1022     this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem()
1023         .getFileSystem(), archiveDir);
1024     getChoreService().scheduleChore(hfileCleaner);
1025     serviceStarted = true;
1026     if (LOG.isTraceEnabled()) {
1027       LOG.trace("Started service threads");
1028     }
1029   }
1030 
1031   @Override
1032   protected void stopServiceThreads() {
1033     if (masterJettyServer != null) {
1034       LOG.info("Stopping master jetty server");
1035       try {
1036         masterJettyServer.stop();
1037       } catch (Exception e) {
1038         LOG.error("Failed to stop master jetty server", e);
1039       }
1040     }
1041     super.stopServiceThreads();
1042     stopChores();
1043     // Wait for all the remaining region servers to report in IFF we were
1044     // running a cluster shutdown AND we were NOT aborting.
1045     if (!isAborted() && this.serverManager != null &&
1046         this.serverManager.isClusterShutdown()) {
1047       this.serverManager.letRegionServersShutdown();
1048     }
1049     if (LOG.isDebugEnabled()) {
1050       LOG.debug("Stopping service threads");
1051     }
1052     // Clean up and close up shop
1053     if (this.logCleaner != null) this.logCleaner.cancel(true);
1054     if (this.hfileCleaner != null) this.hfileCleaner.cancel(true);
1055     if (this.quotaManager != null) this.quotaManager.stop();
1056     if (this.activeMasterManager != null) this.activeMasterManager.stop();
1057     if (this.serverManager != null) this.serverManager.stop();
1058     if (this.assignmentManager != null) this.assignmentManager.stop();
1059     if (this.fileSystemManager != null) this.fileSystemManager.stop();
1060     if (this.mpmHost != null) this.mpmHost.stop("server shutting down.");
1061   }
1062 
1063   private void stopChores() {
1064     if (this.balancerChore != null) {
1065       this.balancerChore.cancel(true);
1066     }
1067     if (this.clusterStatusChore != null) {
1068       this.clusterStatusChore.cancel(true);
1069     }
1070     if (this.catalogJanitorChore != null) {
1071       this.catalogJanitorChore.cancel(true);
1072     }
1073     if (this.clusterStatusPublisherChore != null){
1074       clusterStatusPublisherChore.cancel(true);
1075     }
1076   }
1077 
1078   /**
1079    * @return Get remote side's InetAddress
1080    * @throws UnknownHostException
1081    */
1082   InetAddress getRemoteInetAddress(final int port,
1083       final long serverStartCode) throws UnknownHostException {
1084     // Do it out here in its own little method so can fake an address when
1085     // mocking up in tests.
1086     InetAddress ia = RpcServer.getRemoteIp();
1087 
1088     // The call could be from the local regionserver,
1089     // in which case, there is no remote address.
1090     if (ia == null && serverStartCode == startcode) {
1091       InetSocketAddress isa = rpcServices.getSocketAddress();
1092       if (isa != null && isa.getPort() == port) {
1093         ia = isa.getAddress();
1094       }
1095     }
1096     return ia;
1097   }
1098 
1099   /**
1100    * @return Maximum time we should run balancer for
1101    */
1102   private int getBalancerCutoffTime() {
1103     int balancerCutoffTime =
1104       getConfiguration().getInt("hbase.balancer.max.balancing", -1);
1105     if (balancerCutoffTime == -1) {
1106       // No time period set so create one
1107       int balancerPeriod =
1108         getConfiguration().getInt("hbase.balancer.period", 300000);
1109       balancerCutoffTime = balancerPeriod;
1110       // If nonsense period, set it to balancerPeriod
1111       if (balancerCutoffTime <= 0) balancerCutoffTime = balancerPeriod;
1112     }
1113     return balancerCutoffTime;
1114   }
1115 
1116   public boolean balance() throws IOException {
1117     // if master not initialized, don't run balancer.
1118     if (!this.initialized) {
1119       LOG.debug("Master has not been initialized, don't run balancer.");
1120       return false;
1121     }
1122     // Do this call outside of synchronized block.
1123     int maximumBalanceTime = getBalancerCutoffTime();
1124     synchronized (this.balancer) {
1125       // If balance not true, don't run balancer.
1126       if (!this.loadBalancerTracker.isBalancerOn()) return false;
1127       // Only allow one balance run at at time.
1128       if (this.assignmentManager.getRegionStates().isRegionsInTransition()) {
1129         Map<String, RegionState> regionsInTransition =
1130           this.assignmentManager.getRegionStates().getRegionsInTransition();
1131         LOG.debug("Not running balancer because " + regionsInTransition.size() +
1132           " region(s) in transition: " + org.apache.commons.lang.StringUtils.
1133             abbreviate(regionsInTransition.toString(), 256));
1134         return false;
1135       }
1136       if (this.serverManager.areDeadServersInProgress()) {
1137         LOG.debug("Not running balancer because processing dead regionserver(s): " +
1138           this.serverManager.getDeadServers());
1139         return false;
1140       }
1141 
1142       if (this.cpHost != null) {
1143         try {
1144           if (this.cpHost.preBalance()) {
1145             LOG.debug("Coprocessor bypassing balancer request");
1146             return false;
1147           }
1148         } catch (IOException ioe) {
1149           LOG.error("Error invoking master coprocessor preBalance()", ioe);
1150           return false;
1151         }
1152       }
1153 
1154       Map<TableName, Map<ServerName, List<HRegionInfo>>> assignmentsByTable =
1155         this.assignmentManager.getRegionStates().getAssignmentsByTable();
1156 
1157       List<RegionPlan> plans = new ArrayList<RegionPlan>();
1158       //Give the balancer the current cluster state.
1159       this.balancer.setClusterStatus(getClusterStatus());
1160       for (Map<ServerName, List<HRegionInfo>> assignments : assignmentsByTable.values()) {
1161         List<RegionPlan> partialPlans = this.balancer.balanceCluster(assignments);
1162         if (partialPlans != null) plans.addAll(partialPlans);
1163       }
1164       long cutoffTime = System.currentTimeMillis() + maximumBalanceTime;
1165       int rpCount = 0;  // number of RegionPlans balanced so far
1166       long totalRegPlanExecTime = 0;
1167       if (plans != null && !plans.isEmpty()) {
1168         for (RegionPlan plan: plans) {
1169           LOG.info("balance " + plan);
1170           long balStartTime = System.currentTimeMillis();
1171           //TODO: bulk assign
1172           this.assignmentManager.balance(plan);
1173           totalRegPlanExecTime += System.currentTimeMillis()-balStartTime;
1174           rpCount++;
1175           if (rpCount < plans.size() &&
1176               // if performing next balance exceeds cutoff time, exit the loop
1177               (System.currentTimeMillis() + (totalRegPlanExecTime / rpCount)) > cutoffTime) {
1178             //TODO: After balance, there should not be a cutoff time (keeping it as
1179             // a security net for now)
1180             LOG.debug("No more balancing till next balance run; maximumBalanceTime=" +
1181               maximumBalanceTime);
1182             break;
1183           }
1184         }
1185       }
1186       if (this.cpHost != null) {
1187         try {
1188           this.cpHost.postBalance(rpCount < plans.size() ? plans.subList(0, rpCount) : plans);
1189         } catch (IOException ioe) {
1190           // balancing already succeeded so don't change the result
1191           LOG.error("Error invoking master coprocessor postBalance()", ioe);
1192         }
1193       }
1194     }
1195     // If LoadBalancer did not generate any plans, it means the cluster is already balanced.
1196     // Return true indicating a success.
1197     return true;
1198   }
1199 
1200   /**
1201    * @return Client info for use as prefix on an audit log string; who did an action
1202    */
1203   String getClientIdAuditPrefix() {
1204     return "Client=" + RequestContext.getRequestUserName() + "/" +
1205       RequestContext.get().getRemoteAddress();
1206   }
1207 
1208   /**
1209    * Switch for the background CatalogJanitor thread.
1210    * Used for testing.  The thread will continue to run.  It will just be a noop
1211    * if disabled.
1212    * @param b If false, the catalog janitor won't do anything.
1213    */
1214   public void setCatalogJanitorEnabled(final boolean b) {
1215     this.catalogJanitorChore.setEnabled(b);
1216   }
1217 
1218   @Override
1219   public void dispatchMergingRegions(final HRegionInfo region_a,
1220       final HRegionInfo region_b, final boolean forcible) throws IOException {
1221     checkInitialized();
1222     this.service.submit(new DispatchMergingRegionHandler(this,
1223         this.catalogJanitorChore, region_a, region_b, forcible));
1224   }
1225 
1226   void move(final byte[] encodedRegionName,
1227       final byte[] destServerName) throws HBaseIOException {
1228     RegionState regionState = assignmentManager.getRegionStates().
1229       getRegionState(Bytes.toString(encodedRegionName));
1230     if (regionState == null) {
1231       throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
1232     }
1233 
1234     HRegionInfo hri = regionState.getRegion();
1235     ServerName dest;
1236     if (destServerName == null || destServerName.length == 0) {
1237       LOG.info("Passed destination servername is null/empty so " +
1238         "choosing a server at random");
1239       final List<ServerName> destServers = this.serverManager.createDestinationServersList(
1240         regionState.getServerName());
1241       dest = balancer.randomAssignment(hri, destServers);
1242       if (dest == null) {
1243         LOG.debug("Unable to determine a plan to assign " + hri);
1244         return;
1245       }
1246     } else {
1247       dest = ServerName.valueOf(Bytes.toString(destServerName));
1248       if (dest.equals(serverName) && balancer instanceof BaseLoadBalancer
1249           && !((BaseLoadBalancer)balancer).shouldBeOnMaster(hri)) {
1250         // To avoid unnecessary region moving later by balancer. Don't put user
1251         // regions on master. Regions on master could be put on other region
1252         // server intentionally by test however.
1253         LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1254           + " to avoid unnecessary region moving later by load balancer,"
1255           + " because it should not be on master");
1256         return;
1257       }
1258     }
1259 
1260     if (dest.equals(regionState.getServerName())) {
1261       LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1262         + " because region already assigned to the same server " + dest + ".");
1263       return;
1264     }
1265 
1266     // Now we can do the move
1267     RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
1268 
1269     try {
1270       checkInitialized();
1271       if (this.cpHost != null) {
1272         if (this.cpHost.preMove(hri, rp.getSource(), rp.getDestination())) {
1273           return;
1274         }
1275       }
1276       LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
1277       this.assignmentManager.balance(rp);
1278       if (this.cpHost != null) {
1279         this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
1280       }
1281     } catch (IOException ioe) {
1282       if (ioe instanceof HBaseIOException) {
1283         throw (HBaseIOException)ioe;
1284       }
1285       throw new HBaseIOException(ioe);
1286     }
1287   }
1288 
1289   @Override
1290   public void createTable(HTableDescriptor hTableDescriptor,
1291       byte [][] splitKeys) throws IOException {
1292     if (isStopped()) {
1293       throw new MasterNotRunningException();
1294     }
1295 
1296     String namespace = hTableDescriptor.getTableName().getNamespaceAsString();
1297     ensureNamespaceExists(namespace);
1298 
1299     HRegionInfo[] newRegions = getHRegionInfos(hTableDescriptor, splitKeys);
1300     checkInitialized();
1301     sanityCheckTableDescriptor(hTableDescriptor);
1302     this.quotaManager.checkNamespaceTableAndRegionQuota(hTableDescriptor.getTableName(),
1303       newRegions.length);
1304     if (cpHost != null) {
1305       cpHost.preCreateTable(hTableDescriptor, newRegions);
1306     }
1307     LOG.info(getClientIdAuditPrefix() + " create " + hTableDescriptor);
1308     this.service.submit(new CreateTableHandler(this,
1309       this.fileSystemManager, hTableDescriptor, conf,
1310       newRegions, this).prepare());
1311     if (cpHost != null) {
1312       cpHost.postCreateTable(hTableDescriptor, newRegions);
1313     }
1314 
1315   }
1316 
1317   /**
1318    * Checks whether the table conforms to some sane limits, and configured
1319    * values (compression, etc) work. Throws an exception if something is wrong.
1320    * @throws IOException
1321    */
1322   private void sanityCheckTableDescriptor(final HTableDescriptor htd) throws IOException {
1323     final String CONF_KEY = "hbase.table.sanity.checks";
1324     if (!conf.getBoolean(CONF_KEY, true)) {
1325       return;
1326     }
1327     String tableVal = htd.getConfigurationValue(CONF_KEY);
1328     if (tableVal != null && !Boolean.valueOf(tableVal)) {
1329       return;
1330     }
1331 
1332     // check max file size
1333     long maxFileSizeLowerLimit = 2 * 1024 * 1024L; // 2M is the default lower limit
1334     long maxFileSize = htd.getMaxFileSize();
1335     if (maxFileSize < 0) {
1336       maxFileSize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, maxFileSizeLowerLimit);
1337     }
1338     if (maxFileSize < conf.getLong("hbase.hregion.max.filesize.limit", maxFileSizeLowerLimit)) {
1339       throw new DoNotRetryIOException("MAX_FILESIZE for table descriptor or "
1340         + "\"hbase.hregion.max.filesize\" (" + maxFileSize
1341         + ") is too small, which might cause over splitting into unmanageable "
1342         + "number of regions. Set " + CONF_KEY + " to false at conf or table descriptor "
1343           + "if you want to bypass sanity checks");
1344     }
1345 
1346     // check flush size
1347     long flushSizeLowerLimit = 1024 * 1024L; // 1M is the default lower limit
1348     long flushSize = htd.getMemStoreFlushSize();
1349     if (flushSize < 0) {
1350       flushSize = conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, flushSizeLowerLimit);
1351     }
1352     if (flushSize < conf.getLong("hbase.hregion.memstore.flush.size.limit", flushSizeLowerLimit)) {
1353       throw new DoNotRetryIOException("MEMSTORE_FLUSHSIZE for table descriptor or "
1354           + "\"hbase.hregion.memstore.flush.size\" ("+flushSize+") is too small, which might cause"
1355           + " very frequent flushing. Set " + CONF_KEY + " to false at conf or table descriptor "
1356           + "if you want to bypass sanity checks");
1357     }
1358 
1359     // check that coprocessors and other specified plugin classes can be loaded
1360     try {
1361       checkClassLoading(conf, htd);
1362     } catch (Exception ex) {
1363       throw new DoNotRetryIOException(ex);
1364     }
1365 
1366     // check compression can be loaded
1367     try {
1368       checkCompression(htd);
1369     } catch (IOException e) {
1370       throw new DoNotRetryIOException(e.getMessage(), e);
1371     }
1372 
1373     // check encryption can be loaded
1374     try {
1375       checkEncryption(conf, htd);
1376     } catch (IOException e) {
1377       throw new DoNotRetryIOException(e.getMessage(), e);
1378     }
1379 
1380     // check that we have at least 1 CF
1381     if (htd.getColumnFamilies().length == 0) {
1382       throw new DoNotRetryIOException("Table should have at least one column family "
1383           + "Set "+CONF_KEY+" at conf or table descriptor if you want to bypass sanity checks");
1384     }
1385 
1386     for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1387       if (hcd.getTimeToLive() <= 0) {
1388         throw new DoNotRetryIOException("TTL for column family " + hcd.getNameAsString()
1389           + "  must be positive. Set " + CONF_KEY + " to false at conf or table descriptor "
1390           + "if you want to bypass sanity checks");
1391       }
1392 
1393       // check blockSize
1394       if (hcd.getBlocksize() < 1024 || hcd.getBlocksize() > 16 * 1024 * 1024) {
1395         throw new DoNotRetryIOException("Block size for column family " + hcd.getNameAsString()
1396           + "  must be between 1K and 16MB Set "+CONF_KEY+" to false at conf or table descriptor "
1397           + "if you want to bypass sanity checks");
1398       }
1399 
1400       // check versions
1401       if (hcd.getMinVersions() < 0) {
1402         throw new DoNotRetryIOException("Min versions for column family " + hcd.getNameAsString()
1403           + "  must be positive. Set " + CONF_KEY + " to false at conf or table descriptor "
1404           + "if you want to bypass sanity checks");
1405       }
1406       // max versions already being checked
1407 
1408       // check replication scope
1409       if (hcd.getScope() < 0) {
1410         throw new DoNotRetryIOException("Replication scope for column family "
1411           + hcd.getNameAsString() + "  must be positive. Set " + CONF_KEY + " to false at conf "
1412           + "or table descriptor if you want to bypass sanity checks");
1413       }
1414 
1415       // TODO: should we check coprocessors and encryption ?
1416     }
1417   }
1418 
1419   private void startActiveMasterManager(int infoPort) throws KeeperException {
1420     String backupZNode = ZKUtil.joinZNode(
1421       zooKeeper.backupMasterAddressesZNode, serverName.toString());
1422     /*
1423     * Add a ZNode for ourselves in the backup master directory since we
1424     * may not become the active master. If so, we want the actual active
1425     * master to know we are backup masters, so that it won't assign
1426     * regions to us if so configured.
1427     *
1428     * If we become the active master later, ActiveMasterManager will delete
1429     * this node explicitly.  If we crash before then, ZooKeeper will delete
1430     * this node for us since it is ephemeral.
1431     */
1432     LOG.info("Adding backup master ZNode " + backupZNode);
1433     if (!MasterAddressTracker.setMasterAddress(zooKeeper, backupZNode,
1434         serverName, infoPort)) {
1435       LOG.warn("Failed create of " + backupZNode + " by " + serverName);
1436     }
1437 
1438     activeMasterManager.setInfoPort(infoPort);
1439     // Start a thread to try to become the active master, so we won't block here
1440     Threads.setDaemonThreadRunning(new Thread(new Runnable() {
1441       @Override
1442       public void run() {
1443         int timeout = conf.getInt(HConstants.ZK_SESSION_TIMEOUT,
1444           HConstants.DEFAULT_ZK_SESSION_TIMEOUT);
1445         // If we're a backup master, stall until a primary to writes his address
1446         if (conf.getBoolean(HConstants.MASTER_TYPE_BACKUP,
1447             HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
1448           LOG.debug("HMaster started in backup mode. "
1449             + "Stalling until master znode is written.");
1450           // This will only be a minute or so while the cluster starts up,
1451           // so don't worry about setting watches on the parent znode
1452           while (!activeMasterManager.hasActiveMaster()) {
1453             LOG.debug("Waiting for master address ZNode to be written "
1454               + "(Also watching cluster state node)");
1455             Threads.sleep(timeout);
1456           }
1457         }
1458         MonitoredTask status = TaskMonitor.get().createStatus("Master startup");
1459         status.setDescription("Master startup");
1460         try {
1461           if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, status)) {
1462             finishActiveMasterInitialization(status);
1463           }
1464         } catch (Throwable t) {
1465           status.setStatus("Failed to become active: " + t.getMessage());
1466           LOG.fatal("Failed to become active master", t);
1467           // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
1468           if (t instanceof NoClassDefFoundError &&
1469               t.getMessage()
1470                   .contains("org/apache/hadoop/hdfs/protocol/FSConstants$SafeModeAction")) {
1471             // improved error message for this special case
1472             abort("HBase is having a problem with its Hadoop jars.  You may need to "
1473               + "recompile HBase against Hadoop version "
1474               +  org.apache.hadoop.util.VersionInfo.getVersion()
1475               + " or change your hadoop jars to start properly", t);
1476           } else {
1477             abort("Unhandled exception. Starting shutdown.", t);
1478           }
1479         } finally {
1480           status.cleanup();
1481         }
1482       }
1483     }, getServerName().toShortString() + ".activeMasterManager"));
1484   }
1485 
1486   private void checkCompression(final HTableDescriptor htd)
1487   throws IOException {
1488     if (!this.masterCheckCompression) return;
1489     for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1490       checkCompression(hcd);
1491     }
1492   }
1493 
1494   private void checkCompression(final HColumnDescriptor hcd)
1495   throws IOException {
1496     if (!this.masterCheckCompression) return;
1497     CompressionTest.testCompression(hcd.getCompression());
1498     CompressionTest.testCompression(hcd.getCompactionCompression());
1499   }
1500 
1501   private void checkEncryption(final Configuration conf, final HTableDescriptor htd)
1502   throws IOException {
1503     if (!this.masterCheckEncryption) return;
1504     for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1505       checkEncryption(conf, hcd);
1506     }
1507   }
1508 
1509   private void checkEncryption(final Configuration conf, final HColumnDescriptor hcd)
1510   throws IOException {
1511     if (!this.masterCheckEncryption) return;
1512     EncryptionTest.testEncryption(conf, hcd.getEncryptionType(), hcd.getEncryptionKey());
1513   }
1514 
1515   private void checkClassLoading(final Configuration conf, final HTableDescriptor htd)
1516   throws IOException {
1517     RegionSplitPolicy.getSplitPolicyClass(htd, conf);
1518     RegionCoprocessorHost.testTableCoprocessorAttrs(conf, htd);
1519   }
1520 
1521   private HRegionInfo[] getHRegionInfos(HTableDescriptor hTableDescriptor,
1522     byte[][] splitKeys) {
1523     long regionId = System.currentTimeMillis();
1524     HRegionInfo[] hRegionInfos = null;
1525     if (splitKeys == null || splitKeys.length == 0) {
1526       hRegionInfos = new HRegionInfo[]{new HRegionInfo(hTableDescriptor.getTableName(), null, null,
1527                 false, regionId)};
1528     } else {
1529       int numRegions = splitKeys.length + 1;
1530       hRegionInfos = new HRegionInfo[numRegions];
1531       byte[] startKey = null;
1532       byte[] endKey = null;
1533       for (int i = 0; i < numRegions; i++) {
1534         endKey = (i == splitKeys.length) ? null : splitKeys[i];
1535         hRegionInfos[i] =
1536              new HRegionInfo(hTableDescriptor.getTableName(), startKey, endKey,
1537                  false, regionId);
1538         startKey = endKey;
1539       }
1540     }
1541     return hRegionInfos;
1542   }
1543 
1544   private static boolean isCatalogTable(final TableName tableName) {
1545     return tableName.equals(TableName.META_TABLE_NAME);
1546   }
1547 
1548   @Override
1549   public void deleteTable(final TableName tableName) throws IOException {
1550     checkInitialized();
1551     if (cpHost != null) {
1552       cpHost.preDeleteTable(tableName);
1553     }
1554     LOG.info(getClientIdAuditPrefix() + " delete " + tableName);
1555     this.service.submit(new DeleteTableHandler(tableName, this, this).prepare());
1556     if (cpHost != null) {
1557       cpHost.postDeleteTable(tableName);
1558     }
1559   }
1560 
1561   @Override
1562   public void truncateTable(TableName tableName, boolean preserveSplits) throws IOException {
1563     checkInitialized();
1564     if (cpHost != null) {
1565       cpHost.preTruncateTable(tableName);
1566     }
1567     LOG.info(getClientIdAuditPrefix() + " truncate " + tableName);
1568     TruncateTableHandler handler = new TruncateTableHandler(tableName, this, this, preserveSplits);
1569     handler.prepare();
1570     handler.process();
1571     if (cpHost != null) {
1572       cpHost.postTruncateTable(tableName);
1573     }
1574   }
1575 
1576   @Override
1577   public void addColumn(final TableName tableName, final HColumnDescriptor columnDescriptor)
1578       throws IOException {
1579     checkInitialized();
1580     checkCompression(columnDescriptor);
1581     checkEncryption(conf, columnDescriptor);
1582     if (cpHost != null) {
1583       if (cpHost.preAddColumn(tableName, columnDescriptor)) {
1584         return;
1585       }
1586     }
1587     //TODO: we should process this (and some others) in an executor
1588     new TableAddFamilyHandler(tableName, columnDescriptor, this, this).prepare().process();
1589     if (cpHost != null) {
1590       cpHost.postAddColumn(tableName, columnDescriptor);
1591     }
1592   }
1593 
1594   @Override
1595   public void modifyColumn(TableName tableName, HColumnDescriptor descriptor)
1596       throws IOException {
1597     checkInitialized();
1598     checkCompression(descriptor);
1599     checkEncryption(conf, descriptor);
1600     if (cpHost != null) {
1601       if (cpHost.preModifyColumn(tableName, descriptor)) {
1602         return;
1603       }
1604     }
1605     LOG.info(getClientIdAuditPrefix() + " modify " + descriptor);
1606     new TableModifyFamilyHandler(tableName, descriptor, this, this)
1607       .prepare().process();
1608     if (cpHost != null) {
1609       cpHost.postModifyColumn(tableName, descriptor);
1610     }
1611   }
1612 
1613   @Override
1614   public void deleteColumn(final TableName tableName, final byte[] columnName)
1615       throws IOException {
1616     checkInitialized();
1617     if (cpHost != null) {
1618       if (cpHost.preDeleteColumn(tableName, columnName)) {
1619         return;
1620       }
1621     }
1622     LOG.info(getClientIdAuditPrefix() + " delete " + Bytes.toString(columnName));
1623     new TableDeleteFamilyHandler(tableName, columnName, this, this).prepare().process();
1624     if (cpHost != null) {
1625       cpHost.postDeleteColumn(tableName, columnName);
1626     }
1627   }
1628 
1629   @Override
1630   public void enableTable(final TableName tableName) throws IOException {
1631     checkInitialized();
1632     if (cpHost != null) {
1633       cpHost.preEnableTable(tableName);
1634     }
1635     LOG.info(getClientIdAuditPrefix() + " enable " + tableName);
1636     this.service.submit(new EnableTableHandler(this, tableName,
1637       assignmentManager, tableLockManager, false).prepare());
1638     if (cpHost != null) {
1639       cpHost.postEnableTable(tableName);
1640    }
1641   }
1642 
1643   @Override
1644   public void disableTable(final TableName tableName) throws IOException {
1645     checkInitialized();
1646     if (cpHost != null) {
1647       cpHost.preDisableTable(tableName);
1648     }
1649     LOG.info(getClientIdAuditPrefix() + " disable " + tableName);
1650     this.service.submit(new DisableTableHandler(this, tableName,
1651       assignmentManager, tableLockManager, false).prepare());
1652     if (cpHost != null) {
1653       cpHost.postDisableTable(tableName);
1654     }
1655   }
1656 
1657   /**
1658    * Return the region and current deployment for the region containing
1659    * the given row. If the region cannot be found, returns null. If it
1660    * is found, but not currently deployed, the second element of the pair
1661    * may be null.
1662    */
1663   @VisibleForTesting // Used by TestMaster.
1664   Pair<HRegionInfo, ServerName> getTableRegionForRow(
1665       final TableName tableName, final byte [] rowKey)
1666   throws IOException {
1667     final AtomicReference<Pair<HRegionInfo, ServerName>> result =
1668       new AtomicReference<Pair<HRegionInfo, ServerName>>(null);
1669 
1670     MetaScannerVisitor visitor =
1671       new MetaScannerVisitorBase() {
1672         @Override
1673         public boolean processRow(Result data) throws IOException {
1674           if (data == null || data.size() <= 0) {
1675             return true;
1676           }
1677           Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(data);
1678           if (pair == null) {
1679             return false;
1680           }
1681           if (!pair.getFirst().getTable().equals(tableName)) {
1682             return false;
1683           }
1684           result.set(pair);
1685           return true;
1686         }
1687     };
1688 
1689     MetaScanner.metaScan(clusterConnection, visitor, tableName, rowKey, 1);
1690     return result.get();
1691   }
1692 
1693   @Override
1694   public void modifyTable(final TableName tableName, final HTableDescriptor descriptor)
1695       throws IOException {
1696     checkInitialized();
1697     sanityCheckTableDescriptor(descriptor);
1698     if (cpHost != null) {
1699       cpHost.preModifyTable(tableName, descriptor);
1700     }
1701     LOG.info(getClientIdAuditPrefix() + " modify " + tableName);
1702     new ModifyTableHandler(tableName, descriptor, this, this).prepare().process();
1703     if (cpHost != null) {
1704       cpHost.postModifyTable(tableName, descriptor);
1705     }
1706   }
1707 
1708   @Override
1709   public void checkTableModifiable(final TableName tableName)
1710       throws IOException, TableNotFoundException, TableNotDisabledException {
1711     if (isCatalogTable(tableName)) {
1712       throw new IOException("Can't modify catalog tables");
1713     }
1714     if (!MetaTableAccessor.tableExists(getConnection(), tableName)) {
1715       throw new TableNotFoundException(tableName);
1716     }
1717     if (!getAssignmentManager().getTableStateManager().
1718         isTableState(tableName, TableState.State.DISABLED)) {
1719       throw new TableNotDisabledException(tableName);
1720     }
1721   }
1722 
1723   /**
1724    * @return cluster status
1725    */
1726   public ClusterStatus getClusterStatus() throws InterruptedIOException {
1727     // Build Set of backup masters from ZK nodes
1728     List<String> backupMasterStrings;
1729     try {
1730       backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
1731         this.zooKeeper.backupMasterAddressesZNode);
1732     } catch (KeeperException e) {
1733       LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
1734       backupMasterStrings = new ArrayList<String>(0);
1735     }
1736     List<ServerName> backupMasters = new ArrayList<ServerName>(
1737                                           backupMasterStrings.size());
1738     for (String s: backupMasterStrings) {
1739       try {
1740         byte [] bytes;
1741         try {
1742           bytes = ZKUtil.getData(this.zooKeeper, ZKUtil.joinZNode(
1743               this.zooKeeper.backupMasterAddressesZNode, s));
1744         } catch (InterruptedException e) {
1745           throw new InterruptedIOException();
1746         }
1747         if (bytes != null) {
1748           ServerName sn;
1749           try {
1750             sn = ServerName.parseFrom(bytes);
1751           } catch (DeserializationException e) {
1752             LOG.warn("Failed parse, skipping registering backup server", e);
1753             continue;
1754           }
1755           backupMasters.add(sn);
1756         }
1757       } catch (KeeperException e) {
1758         LOG.warn(this.zooKeeper.prefix("Unable to get information about " +
1759                  "backup servers"), e);
1760       }
1761     }
1762     Collections.sort(backupMasters, new Comparator<ServerName>() {
1763       @Override
1764       public int compare(ServerName s1, ServerName s2) {
1765         return s1.getServerName().compareTo(s2.getServerName());
1766       }});
1767 
1768     String clusterId = fileSystemManager != null ?
1769       fileSystemManager.getClusterId().toString() : null;
1770     Map<String, RegionState> regionsInTransition = assignmentManager != null ?
1771       assignmentManager.getRegionStates().getRegionsInTransition() : null;
1772     String[] coprocessors = cpHost != null ? getMasterCoprocessors() : null;
1773     boolean balancerOn = loadBalancerTracker != null ?
1774       loadBalancerTracker.isBalancerOn() : false;
1775     Map<ServerName, ServerLoad> onlineServers = null;
1776     Set<ServerName> deadServers = null;
1777     if (serverManager != null) {
1778       deadServers = serverManager.getDeadServers().copyServerNames();
1779       onlineServers = serverManager.getOnlineServers();
1780     }
1781     return new ClusterStatus(VersionInfo.getVersion(), clusterId,
1782       onlineServers, deadServers, serverName, backupMasters,
1783       regionsInTransition, coprocessors, balancerOn);
1784   }
1785 
1786   /**
1787    * The set of loaded coprocessors is stored in a static set. Since it's
1788    * statically allocated, it does not require that HMaster's cpHost be
1789    * initialized prior to accessing it.
1790    * @return a String representation of the set of names of the loaded
1791    * coprocessors.
1792    */
1793   public static String getLoadedCoprocessors() {
1794     return CoprocessorHost.getLoadedCoprocessors().toString();
1795   }
1796 
1797   /**
1798    * @return timestamp in millis when HMaster was started.
1799    */
1800   public long getMasterStartTime() {
1801     return startcode;
1802   }
1803 
1804   /**
1805    * @return timestamp in millis when HMaster became the active master.
1806    */
1807   public long getMasterActiveTime() {
1808     return masterActiveTime;
1809   }
1810 
1811   public int getRegionServerInfoPort(final ServerName sn) {
1812     RegionServerInfo info = this.regionServerTracker.getRegionServerInfo(sn);
1813     if (info == null || info.getInfoPort() == 0) {
1814       return conf.getInt(HConstants.REGIONSERVER_INFO_PORT,
1815         HConstants.DEFAULT_REGIONSERVER_INFOPORT);
1816     }
1817     return info.getInfoPort();
1818   }
1819 
1820   /**
1821    * @return array of coprocessor SimpleNames.
1822    */
1823   public String[] getMasterCoprocessors() {
1824     Set<String> masterCoprocessors = getMasterCoprocessorHost().getCoprocessors();
1825     return masterCoprocessors.toArray(new String[masterCoprocessors.size()]);
1826   }
1827 
1828   @Override
1829   public void abort(final String msg, final Throwable t) {
1830     if (isAborted() || isStopped()) {
1831       return;
1832     }
1833     if (cpHost != null) {
1834       // HBASE-4014: dump a list of loaded coprocessors.
1835       LOG.fatal("Master server abort: loaded coprocessors are: " +
1836           getLoadedCoprocessors());
1837     }
1838     if (t != null) LOG.fatal(msg, t);
1839     stop(msg);
1840   }
1841 
1842   @Override
1843   public ZooKeeperWatcher getZooKeeper() {
1844     return zooKeeper;
1845   }
1846 
1847   @Override
1848   public MasterCoprocessorHost getMasterCoprocessorHost() {
1849     return cpHost;
1850   }
1851 
1852   @Override
1853   public MasterQuotaManager getMasterQuotaManager() {
1854     return quotaManager;
1855   }
1856 
1857   @Override
1858   public ServerName getServerName() {
1859     return this.serverName;
1860   }
1861 
1862   @Override
1863   public AssignmentManager getAssignmentManager() {
1864     return this.assignmentManager;
1865   }
1866 
1867   public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
1868     return rsFatals;
1869   }
1870 
1871   public void shutdown() {
1872     if (cpHost != null) {
1873       try {
1874         cpHost.preShutdown();
1875       } catch (IOException ioe) {
1876         LOG.error("Error call master coprocessor preShutdown()", ioe);
1877       }
1878     }
1879 
1880     if (this.serverManager != null) {
1881       this.serverManager.shutdownCluster();
1882     }
1883     if (this.clusterStatusTracker != null){
1884       try {
1885         this.clusterStatusTracker.setClusterDown();
1886       } catch (KeeperException e) {
1887         LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
1888       }
1889     }
1890   }
1891 
1892   public void stopMaster() {
1893     if (cpHost != null) {
1894       try {
1895         cpHost.preStopMaster();
1896       } catch (IOException ioe) {
1897         LOG.error("Error call master coprocessor preStopMaster()", ioe);
1898       }
1899     }
1900     stop("Stopped by " + Thread.currentThread().getName());
1901   }
1902 
1903   void checkServiceStarted() throws ServerNotRunningYetException {
1904     if (!serviceStarted) {
1905       throw new ServerNotRunningYetException("Server is not running yet");
1906     }
1907   }
1908 
1909   void checkInitialized() throws PleaseHoldException, ServerNotRunningYetException {
1910     checkServiceStarted();
1911     if (!this.initialized) {
1912       throw new PleaseHoldException("Master is initializing");
1913     }
1914   }
1915 
1916   void checkNamespaceManagerReady() throws IOException {
1917     checkInitialized();
1918     if (tableNamespaceManager == null ||
1919         !tableNamespaceManager.isTableAvailableAndInitialized()) {
1920       throw new IOException("Table Namespace Manager not ready yet, try again later");
1921     }
1922   }
1923   /**
1924    * Report whether this master is currently the active master or not.
1925    * If not active master, we are parked on ZK waiting to become active.
1926    *
1927    * This method is used for testing.
1928    *
1929    * @return true if active master, false if not.
1930    */
1931   public boolean isActiveMaster() {
1932     return isActiveMaster;
1933   }
1934 
1935   /**
1936    * Report whether this master has completed with its initialization and is
1937    * ready.  If ready, the master is also the active master.  A standby master
1938    * is never ready.
1939    *
1940    * This method is used for testing.
1941    *
1942    * @return true if master is ready to go, false if not.
1943    */
1944   @Override
1945   public boolean isInitialized() {
1946     return initialized;
1947   }
1948 
1949   /**
1950    * ServerShutdownHandlerEnabled is set false before completing
1951    * assignMeta to prevent processing of ServerShutdownHandler.
1952    * @return true if assignMeta has completed;
1953    */
1954   @Override
1955   public boolean isServerShutdownHandlerEnabled() {
1956     return this.serverShutdownHandlerEnabled;
1957   }
1958 
1959   /**
1960    * Report whether this master has started initialization and is about to do meta region assignment
1961    * @return true if master is in initialization & about to assign hbase:meta regions
1962    */
1963   public boolean isInitializationStartsMetaRegionAssignment() {
1964     return this.initializationBeforeMetaAssignment;
1965   }
1966 
1967   public void assignRegion(HRegionInfo hri) {
1968     assignmentManager.assign(hri);
1969   }
1970 
1971   /**
1972    * Compute the average load across all region servers.
1973    * Currently, this uses a very naive computation - just uses the number of
1974    * regions being served, ignoring stats about number of requests.
1975    * @return the average load
1976    */
1977   public double getAverageLoad() {
1978     if (this.assignmentManager == null) {
1979       return 0;
1980     }
1981 
1982     RegionStates regionStates = this.assignmentManager.getRegionStates();
1983     if (regionStates == null) {
1984       return 0;
1985     }
1986     return regionStates.getAverageLoad();
1987   }
1988 
1989   @Override
1990   public boolean registerService(Service instance) {
1991     /*
1992      * No stacking of instances is allowed for a single service name
1993      */
1994     Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
1995     if (coprocessorServiceHandlers.containsKey(serviceDesc.getFullName())) {
1996       LOG.error("Coprocessor service "+serviceDesc.getFullName()+
1997           " already registered, rejecting request from "+instance
1998       );
1999       return false;
2000     }
2001 
2002     coprocessorServiceHandlers.put(serviceDesc.getFullName(), instance);
2003     if (LOG.isDebugEnabled()) {
2004       LOG.debug("Registered master coprocessor service: service="+serviceDesc.getFullName());
2005     }
2006     return true;
2007   }
2008 
2009   /**
2010    * Utility for constructing an instance of the passed HMaster class.
2011    * @param masterClass
2012    * @param conf
2013    * @return HMaster instance.
2014    */
2015   public static HMaster constructMaster(Class<? extends HMaster> masterClass,
2016       final Configuration conf, final CoordinatedStateManager cp)  {
2017     try {
2018       Constructor<? extends HMaster> c =
2019         masterClass.getConstructor(Configuration.class, CoordinatedStateManager.class);
2020       return c.newInstance(conf, cp);
2021     } catch (InvocationTargetException ite) {
2022       Throwable target = ite.getTargetException() != null?
2023         ite.getTargetException(): ite;
2024       if (target.getCause() != null) target = target.getCause();
2025       throw new RuntimeException("Failed construction of Master: " +
2026         masterClass.toString(), target);
2027     } catch (Exception e) {
2028       throw new RuntimeException("Failed construction of Master: " +
2029         masterClass.toString() + ((e.getCause() != null)?
2030           e.getCause().getMessage(): ""), e);
2031     }
2032   }
2033 
2034   /**
2035    * @see org.apache.hadoop.hbase.master.HMasterCommandLine
2036    */
2037   public static void main(String [] args) {
2038     VersionInfo.logVersion();
2039     new HMasterCommandLine(HMaster.class).doMain(args);
2040   }
2041 
2042   public HFileCleaner getHFileCleaner() {
2043     return this.hfileCleaner;
2044   }
2045 
2046   /**
2047    * Exposed for TESTING!
2048    * @return the underlying snapshot manager
2049    */
2050   public SnapshotManager getSnapshotManagerForTesting() {
2051     return this.snapshotManager;
2052   }
2053 
2054   @Override
2055   public void createNamespace(NamespaceDescriptor descriptor) throws IOException {
2056     TableName.isLegalNamespaceName(Bytes.toBytes(descriptor.getName()));
2057     checkNamespaceManagerReady();
2058     if (cpHost != null) {
2059       if (cpHost.preCreateNamespace(descriptor)) {
2060         return;
2061       }
2062     }
2063     LOG.info(getClientIdAuditPrefix() + " creating " + descriptor);
2064     tableNamespaceManager.create(descriptor);
2065     if (cpHost != null) {
2066       cpHost.postCreateNamespace(descriptor);
2067     }
2068   }
2069 
2070   @Override
2071   public void modifyNamespace(NamespaceDescriptor descriptor) throws IOException {
2072     TableName.isLegalNamespaceName(Bytes.toBytes(descriptor.getName()));
2073     checkNamespaceManagerReady();
2074     if (cpHost != null) {
2075       if (cpHost.preModifyNamespace(descriptor)) {
2076         return;
2077       }
2078     }
2079     LOG.info(getClientIdAuditPrefix() + " modify " + descriptor);
2080     tableNamespaceManager.update(descriptor);
2081     if (cpHost != null) {
2082       cpHost.postModifyNamespace(descriptor);
2083     }
2084   }
2085 
2086   @Override
2087   public void deleteNamespace(String name) throws IOException {
2088     checkNamespaceManagerReady();
2089     if (cpHost != null) {
2090       if (cpHost.preDeleteNamespace(name)) {
2091         return;
2092       }
2093     }
2094     LOG.info(getClientIdAuditPrefix() + " delete " + name);
2095     tableNamespaceManager.remove(name);
2096     if (cpHost != null) {
2097       cpHost.postDeleteNamespace(name);
2098     }
2099   }
2100 
2101   /**
2102    * Ensure that the specified namespace exists, otherwise throws a NamespaceNotFoundException
2103    *
2104    * @param name the namespace to check
2105    * @throws IOException if the namespace manager is not ready yet.
2106    * @throws NamespaceNotFoundException if the namespace does not exists
2107    */
2108   private void ensureNamespaceExists(final String name)
2109       throws IOException, NamespaceNotFoundException {
2110     checkNamespaceManagerReady();
2111     NamespaceDescriptor nsd = tableNamespaceManager.get(name);
2112     if (nsd == null) {
2113       throw new NamespaceNotFoundException(name);
2114     }
2115   }
2116 
2117   @Override
2118   public NamespaceDescriptor getNamespaceDescriptor(String name) throws IOException {
2119     checkNamespaceManagerReady();
2120 
2121     if (cpHost != null) {
2122       cpHost.preGetNamespaceDescriptor(name);
2123     }
2124 
2125     NamespaceDescriptor nsd = tableNamespaceManager.get(name);
2126     if (nsd == null) {
2127       throw new NamespaceNotFoundException(name);
2128     }
2129 
2130     if (cpHost != null) {
2131       cpHost.postGetNamespaceDescriptor(nsd);
2132     }
2133 
2134     return nsd;
2135   }
2136 
2137   @Override
2138   public List<NamespaceDescriptor> listNamespaceDescriptors() throws IOException {
2139     checkNamespaceManagerReady();
2140 
2141     final List<NamespaceDescriptor> descriptors = new ArrayList<NamespaceDescriptor>();
2142     boolean bypass = false;
2143     if (cpHost != null) {
2144       bypass = cpHost.preListNamespaceDescriptors(descriptors);
2145     }
2146 
2147     if (!bypass) {
2148       descriptors.addAll(tableNamespaceManager.list());
2149 
2150       if (cpHost != null) {
2151         cpHost.postListNamespaceDescriptors(descriptors);
2152       }
2153     }
2154     return descriptors;
2155   }
2156 
2157   @Override
2158   public List<HTableDescriptor> listTableDescriptorsByNamespace(String name) throws IOException {
2159     ensureNamespaceExists(name);
2160     return listTableDescriptors(name, null, null, true);
2161   }
2162 
2163   @Override
2164   public List<TableName> listTableNamesByNamespace(String name) throws IOException {
2165     ensureNamespaceExists(name);
2166     return listTableNames(name, null, true);
2167   }
2168 
2169   /**
2170    * Returns the list of table descriptors that match the specified request
2171    *
2172    * @param namespace the namespace to query, or null if querying for all
2173    * @param regex The regular expression to match against, or null if querying for all
2174    * @param tableNameList the list of table names, or null if querying for all
2175    * @param includeSysTables False to match only against userspace tables
2176    * @return the list of table descriptors
2177    */
2178   public List<HTableDescriptor> listTableDescriptors(final String namespace, final String regex,
2179       final List<TableName> tableNameList, final boolean includeSysTables)
2180       throws IOException {
2181     final List<HTableDescriptor> descriptors = new ArrayList<HTableDescriptor>();
2182 
2183     boolean bypass = false;
2184     if (cpHost != null) {
2185       bypass = cpHost.preGetTableDescriptors(tableNameList, descriptors, regex);
2186     }
2187 
2188     if (!bypass) {
2189       if (tableNameList == null || tableNameList.size() == 0) {
2190         // request for all TableDescriptors
2191         Collection<HTableDescriptor> htds;
2192         if (namespace != null && namespace.length() > 0) {
2193           htds = tableDescriptors.getByNamespace(namespace).values();
2194         } else {
2195           htds = tableDescriptors.getAll().values();
2196         }
2197 
2198         for (HTableDescriptor desc: htds) {
2199           if (tableStateManager.isTablePresent(desc.getTableName())
2200               && (includeSysTables || !desc.getTableName().isSystemTable())) {
2201             descriptors.add(desc);
2202           }
2203         }
2204       } else {
2205         for (TableName s: tableNameList) {
2206           if (tableStateManager.isTablePresent(s)) {
2207             HTableDescriptor desc = tableDescriptors.get(s);
2208             if (desc != null) {
2209               descriptors.add(desc);
2210             }
2211           }
2212         }
2213       }
2214 
2215       // Retains only those matched by regular expression.
2216       if (regex != null) {
2217         filterTablesByRegex(descriptors, Pattern.compile(regex));
2218       }
2219 
2220       if (cpHost != null) {
2221         cpHost.postGetTableDescriptors(tableNameList, descriptors, regex);
2222       }
2223     }
2224     return descriptors;
2225   }
2226 
2227   /**
2228    * Returns the list of table names that match the specified request
2229    * @param regex The regular expression to match against, or null if querying for all
2230    * @param namespace the namespace to query, or null if querying for all
2231    * @param includeSysTables False to match only against userspace tables
2232    * @return the list of table names
2233    */
2234   public List<TableName> listTableNames(final String namespace, final String regex,
2235       final boolean includeSysTables) throws IOException {
2236     final List<HTableDescriptor> descriptors = new ArrayList<HTableDescriptor>();
2237 
2238     boolean bypass = false;
2239     if (cpHost != null) {
2240       bypass = cpHost.preGetTableNames(descriptors, regex);
2241     }
2242 
2243     if (!bypass) {
2244       // get all descriptors
2245       Collection<HTableDescriptor> htds;
2246       if (namespace != null && namespace.length() > 0) {
2247         htds = tableDescriptors.getByNamespace(namespace).values();
2248       } else {
2249         htds = tableDescriptors.getAll().values();
2250       }
2251 
2252       for (HTableDescriptor htd: htds) {
2253         if (includeSysTables || !htd.getTableName().isSystemTable()) {
2254           descriptors.add(htd);
2255         }
2256       }
2257 
2258       // Retains only those matched by regular expression.
2259       if (regex != null) {
2260         filterTablesByRegex(descriptors, Pattern.compile(regex));
2261       }
2262 
2263       if (cpHost != null) {
2264         cpHost.postGetTableNames(descriptors, regex);
2265       }
2266     }
2267 
2268     List<TableName> result = new ArrayList<TableName>(descriptors.size());
2269     for (HTableDescriptor htd: descriptors) {
2270       result.add(htd.getTableName());
2271     }
2272     return result;
2273   }
2274 
2275 
2276   /**
2277    * Removes the table descriptors that don't match the pattern.
2278    * @param descriptors list of table descriptors to filter
2279    * @param pattern the regex to use
2280    */
2281   private static void filterTablesByRegex(final Collection<HTableDescriptor> descriptors,
2282       final Pattern pattern) {
2283     final String defaultNS = NamespaceDescriptor.DEFAULT_NAMESPACE_NAME_STR;
2284     Iterator<HTableDescriptor> itr = descriptors.iterator();
2285     while (itr.hasNext()) {
2286       HTableDescriptor htd = itr.next();
2287       String tableName = htd.getTableName().getNameAsString();
2288       boolean matched = pattern.matcher(tableName).matches();
2289       if (!matched && htd.getTableName().getNamespaceAsString().equals(defaultNS)) {
2290         matched = pattern.matcher(defaultNS + TableName.NAMESPACE_DELIM + tableName).matches();
2291       }
2292       if (!matched) {
2293         itr.remove();
2294       }
2295     }
2296   }
2297 
2298   @Override
2299   public long getLastMajorCompactionTimestamp(TableName table) throws IOException {
2300     return getClusterStatus().getLastMajorCompactionTsForTable(table);
2301   }
2302 
2303   @Override
2304   public long getLastMajorCompactionTimestampForRegion(byte[] regionName) throws IOException {
2305     return getClusterStatus().getLastMajorCompactionTsForRegion(regionName);
2306   }
2307 }