View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.IOException;
22  import java.io.InterruptedIOException;
23  import java.lang.reflect.Constructor;
24  import java.lang.reflect.InvocationTargetException;
25  import java.net.InetAddress;
26  import java.net.InetSocketAddress;
27  import java.net.UnknownHostException;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Collections;
31  import java.util.Comparator;
32  import java.util.HashSet;
33  import java.util.List;
34  import java.util.Map;
35  import java.util.Set;
36  import java.util.concurrent.atomic.AtomicReference;
37  
38  import javax.servlet.ServletException;
39  import javax.servlet.http.HttpServlet;
40  import javax.servlet.http.HttpServletRequest;
41  import javax.servlet.http.HttpServletResponse;
42  
43  import org.apache.commons.logging.Log;
44  import org.apache.commons.logging.LogFactory;
45  import org.apache.hadoop.conf.Configuration;
46  import org.apache.hadoop.fs.Path;
47  import org.apache.hadoop.hbase.ClusterStatus;
48  import org.apache.hadoop.hbase.CoordinatedStateException;
49  import org.apache.hadoop.hbase.CoordinatedStateManager;
50  import org.apache.hadoop.hbase.DoNotRetryIOException;
51  import org.apache.hadoop.hbase.HBaseIOException;
52  import org.apache.hadoop.hbase.HColumnDescriptor;
53  import org.apache.hadoop.hbase.HConstants;
54  import org.apache.hadoop.hbase.HRegionInfo;
55  import org.apache.hadoop.hbase.HTableDescriptor;
56  import org.apache.hadoop.hbase.MasterNotRunningException;
57  import org.apache.hadoop.hbase.MetaTableAccessor;
58  import org.apache.hadoop.hbase.NamespaceDescriptor;
59  import org.apache.hadoop.hbase.NamespaceNotFoundException;
60  import org.apache.hadoop.hbase.PleaseHoldException;
61  import org.apache.hadoop.hbase.Server;
62  import org.apache.hadoop.hbase.ServerLoad;
63  import org.apache.hadoop.hbase.ServerName;
64  import org.apache.hadoop.hbase.TableDescriptors;
65  import org.apache.hadoop.hbase.TableName;
66  import org.apache.hadoop.hbase.TableNotDisabledException;
67  import org.apache.hadoop.hbase.TableNotFoundException;
68  import org.apache.hadoop.hbase.UnknownRegionException;
69  import org.apache.hadoop.hbase.classification.InterfaceAudience;
70  import org.apache.hadoop.hbase.client.MetaScanner;
71  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
72  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
73  import org.apache.hadoop.hbase.client.Result;
74  import org.apache.hadoop.hbase.client.TableState;
75  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
76  import org.apache.hadoop.hbase.exceptions.DeserializationException;
77  import org.apache.hadoop.hbase.executor.ExecutorType;
78  import org.apache.hadoop.hbase.ipc.RequestContext;
79  import org.apache.hadoop.hbase.ipc.RpcServer;
80  import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
81  import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
82  import org.apache.hadoop.hbase.master.balancer.BalancerChore;
83  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
84  import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
85  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
86  import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
87  import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
88  import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
89  import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
90  import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
91  import org.apache.hadoop.hbase.master.handler.DispatchMergingRegionHandler;
92  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
93  import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
94  import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
95  import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
96  import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
97  import org.apache.hadoop.hbase.master.handler.TruncateTableHandler;
98  import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
99  import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
100 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
101 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
102 import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
103 import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
104 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
105 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
106 import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
107 import org.apache.hadoop.hbase.regionserver.HRegionServer;
108 import org.apache.hadoop.hbase.regionserver.RSRpcServices;
109 import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
110 import org.apache.hadoop.hbase.replication.regionserver.Replication;
111 import org.apache.hadoop.hbase.security.UserProvider;
112 import org.apache.hadoop.hbase.util.Addressing;
113 import org.apache.hadoop.hbase.util.Bytes;
114 import org.apache.hadoop.hbase.util.CompressionTest;
115 import org.apache.hadoop.hbase.util.FSUtils;
116 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
117 import org.apache.hadoop.hbase.util.Pair;
118 import org.apache.hadoop.hbase.util.Threads;
119 import org.apache.hadoop.hbase.util.VersionInfo;
120 import org.apache.hadoop.hbase.util.ZKDataMigrator;
121 import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
122 import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
123 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
124 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
125 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
126 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
127 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
128 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
129 import org.apache.zookeeper.KeeperException;
130 import org.apache.zookeeper.Watcher;
131 import org.mortbay.jetty.Connector;
132 import org.mortbay.jetty.nio.SelectChannelConnector;
133 import org.mortbay.jetty.servlet.Context;
134 
135 import com.google.common.annotations.VisibleForTesting;
136 import com.google.common.collect.Lists;
137 import com.google.common.collect.Maps;
138 import com.google.protobuf.Descriptors;
139 import com.google.protobuf.Service;
140 
141 /**
142  * HMaster is the "master server" for HBase. An HBase cluster has one active
143  * master.  If many masters are started, all compete.  Whichever wins goes on to
144  * run the cluster.  All others park themselves in their constructor until
145  * master or cluster shutdown or until the active master loses its lease in
146  * zookeeper.  Thereafter, all running master jostle to take over master role.
147  *
148  * <p>The Master can be asked shutdown the cluster. See {@link #shutdown()}.  In
149  * this case it will tell all regionservers to go down and then wait on them
150  * all reporting in that they are down.  This master will then shut itself down.
151  *
152  * <p>You can also shutdown just this master.  Call {@link #stopMaster()}.
153  *
154  * @see Watcher
155  */
156 @InterfaceAudience.Private
157 @SuppressWarnings("deprecation")
158 public class HMaster extends HRegionServer implements MasterServices, Server {
159   private static final Log LOG = LogFactory.getLog(HMaster.class.getName());
160 
161   // MASTER is name of the webapp and the attribute name used stuffing this
162   //instance into web context.
163   public static final String MASTER = "master";
164 
165   // Manager and zk listener for master election
166   private ActiveMasterManager activeMasterManager;
167   // Region server tracker
168   RegionServerTracker regionServerTracker;
169   // Draining region server tracker
170   private DrainingServerTracker drainingServerTracker;
171   // Tracker for load balancer state
172   LoadBalancerTracker loadBalancerTracker;
173 
174   /** Namespace stuff */
175   private TableNamespaceManager tableNamespaceManager;
176 
177   // Metrics for the HMaster
178   final MetricsMaster metricsMaster;
179   // file system manager for the master FS operations
180   private MasterFileSystem fileSystemManager;
181 
182   // server manager to deal with region server info
183   volatile ServerManager serverManager;
184 
185   // manager of assignment nodes in zookeeper
186   AssignmentManager assignmentManager;
187 
188   // buffer for "fatal error" notices from region servers
189   // in the cluster. This is only used for assisting
190   // operations/debugging.
191   MemoryBoundedLogMessageBuffer rsFatals;
192 
193   // flag set after we become the active master (used for testing)
194   private volatile boolean isActiveMaster = false;
195 
196   // flag set after we complete initialization once active,
197   // it is not private since it's used in unit tests
198   volatile boolean initialized = false;
199 
200   // flag set after master services are started,
201   // initialization may have not completed yet.
202   volatile boolean serviceStarted = false;
203 
204   // flag set after we complete assignMeta.
205   private volatile boolean serverShutdownHandlerEnabled = false;
206 
207   LoadBalancer balancer;
208   private BalancerChore balancerChore;
209   private ClusterStatusChore clusterStatusChore;
210   private ClusterStatusPublisher clusterStatusPublisherChore = null;
211 
212   CatalogJanitor catalogJanitorChore;
213   private LogCleaner logCleaner;
214   private HFileCleaner hfileCleaner;
215 
216   MasterCoprocessorHost cpHost;
217 
218   private final boolean preLoadTableDescriptors;
219 
220   // Time stamps for when a hmaster became active
221   private long masterActiveTime;
222 
223   //should we check the compression codec type at master side, default true, HBASE-6370
224   private final boolean masterCheckCompression;
225 
226   Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
227 
228   // monitor for snapshot of hbase tables
229   SnapshotManager snapshotManager;
230   // monitor for distributed procedures
231   MasterProcedureManagerHost mpmHost;
232 
233   private MasterQuotaManager quotaManager;
234 
235   // handle table states
236   private TableStateManager tableStateManager;
237 
238   /** flag used in test cases in order to simulate RS failures during master initialization */
239   private volatile boolean initializationBeforeMetaAssignment = false;
240 
241   /** jetty server for master to redirect requests to regionserver infoServer */
242   private org.mortbay.jetty.Server masterJettyServer;
243 
244   private int masterInfoPort;
245   public static class RedirectServlet extends HttpServlet {
246     private static final long serialVersionUID = 2894774810058302472L;
247     private static int regionServerInfoPort;
248 
249     @Override
250     public void doGet(HttpServletRequest request,
251         HttpServletResponse response) throws ServletException, IOException {
252       String redirectUrl = request.getScheme() + "://"
253         + request.getServerName() + ":" + regionServerInfoPort
254         + request.getRequestURI();
255       response.sendRedirect(redirectUrl);
256     }
257   }
258 
259   /**
260    * Initializes the HMaster. The steps are as follows:
261    * <p>
262    * <ol>
263    * <li>Initialize the local HRegionServer
264    * <li>Start the ActiveMasterManager.
265    * </ol>
266    * <p>
267    * Remaining steps of initialization occur in
268    * #finishActiveMasterInitialization(MonitoredTask) after
269    * the master becomes the active one.
270    *
271    * @throws KeeperException
272    * @throws IOException
273    */
274   public HMaster(final Configuration conf, CoordinatedStateManager csm)
275       throws IOException, KeeperException {
276     super(conf, csm);
277     this.rsFatals = new MemoryBoundedLogMessageBuffer(
278       conf.getLong("hbase.master.buffer.for.rs.fatals", 1*1024*1024));
279 
280     LOG.info("hbase.rootdir=" + FSUtils.getRootDir(this.conf) +
281         ", hbase.cluster.distributed=" + this.conf.getBoolean(HConstants.CLUSTER_DISTRIBUTED, false));
282 
283     Replication.decorateMasterConfiguration(this.conf);
284 
285     // Hack! Maps DFSClient => Master for logs.  HDFS made this
286     // config param for task trackers, but we can piggyback off of it.
287     if (this.conf.get("mapreduce.task.attempt.id") == null) {
288       this.conf.set("mapreduce.task.attempt.id", "hb_m_" + this.serverName.toString());
289     }
290 
291     //should we check the compression codec type at master side, default true, HBASE-6370
292     this.masterCheckCompression = conf.getBoolean("hbase.master.check.compression", true);
293 
294     this.metricsMaster = new MetricsMaster( new MetricsMasterWrapperImpl(this));
295 
296     // preload table descriptor at startup
297     this.preLoadTableDescriptors = conf.getBoolean("hbase.master.preload.tabledescriptors", true);
298 
299     // Do we publish the status?
300     
301     boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED,
302         HConstants.STATUS_PUBLISHED_DEFAULT);
303     Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
304         conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
305             ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS,
306             ClusterStatusPublisher.Publisher.class);
307 
308     if (shouldPublish) {
309       if (publisherClass == null) {
310         LOG.warn(HConstants.STATUS_PUBLISHED + " is true, but " +
311             ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS +
312             " is not set - not publishing status");
313       } else {
314         clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass);
315         Threads.setDaemonThreadRunning(clusterStatusPublisherChore.getThread());
316       }
317     }
318     startActiveMasterManager();
319     putUpJettyServer();
320   }
321 
322   private void putUpJettyServer() throws IOException {
323     if (!conf.getBoolean("hbase.master.infoserver.redirect", true)) {
324       return;
325     }
326     int infoPort = conf.getInt("hbase.master.info.port.orig",
327       HConstants.DEFAULT_MASTER_INFOPORT);
328     // -1 is for disabling info server, so no redirecting
329     if (infoPort < 0 || infoServer == null) {
330       return;
331     }
332     String addr = conf.get("hbase.master.info.bindAddress", "0.0.0.0");
333     if (!Addressing.isLocalAddress(InetAddress.getByName(addr))) {
334       String msg =
335           "Failed to start redirecting jetty server. Address " + addr
336               + " does not belong to this host. Correct configuration parameter: "
337               + "hbase.master.info.bindAddress";
338       LOG.error(msg);
339       throw new IOException(msg);
340     }
341 
342     RedirectServlet.regionServerInfoPort = infoServer.getPort();
343     masterJettyServer = new org.mortbay.jetty.Server();
344     Connector connector = new SelectChannelConnector();
345     connector.setHost(addr);
346     connector.setPort(infoPort);
347     masterJettyServer.addConnector(connector);
348     masterJettyServer.setStopAtShutdown(true);
349     Context context = new Context(masterJettyServer, "/", Context.NO_SESSIONS);
350     context.addServlet(RedirectServlet.class, "/*");
351     try {
352       masterJettyServer.start();
353     } catch (Exception e) {
354       throw new IOException("Failed to start redirecting jetty server", e);
355     }
356     masterInfoPort = connector.getPort();
357   }
358 
359   /**
360    * For compatibility, if failed with regionserver credentials, try the master one
361    */
362   protected void login(UserProvider user, String host) throws IOException {
363     try {
364       super.login(user, host);
365     } catch (IOException ie) {
366       user.login("hbase.master.keytab.file",
367         "hbase.master.kerberos.principal", host);
368     }
369   }
370 
371   /**
372    * If configured to put regions on active master,
373    * wait till a backup master becomes active.
374    * Otherwise, loop till the server is stopped or aborted.
375    */
376   protected void waitForMasterActive(){
377     boolean tablesOnMaster = BaseLoadBalancer.tablesOnMaster(conf);
378     while (!(tablesOnMaster && isActiveMaster)
379         && !isStopped() && !isAborted()) {
380       sleeper.sleep();
381     }
382   }
383 
384   @VisibleForTesting
385   public MasterRpcServices getMasterRpcServices() {
386     return (MasterRpcServices)rpcServices;
387   }
388 
389   public boolean balanceSwitch(final boolean b) throws IOException {
390     return getMasterRpcServices().switchBalancer(b, BalanceSwitchMode.ASYNC);
391   }
392 
393   protected String getProcessName() {
394     return MASTER;
395   }
396 
397   protected boolean canCreateBaseZNode() {
398     return true;
399   }
400 
401   protected boolean canUpdateTableDescriptor() {
402     return true;
403   }
404 
405   protected RSRpcServices createRpcServices() throws IOException {
406     return new MasterRpcServices(this);
407   }
408 
409   protected void configureInfoServer() {
410     infoServer.addServlet("master-status", "/master-status", MasterStatusServlet.class);
411     infoServer.setAttribute(MASTER, this);
412     if (BaseLoadBalancer.tablesOnMaster(conf)) {
413       super.configureInfoServer();
414     }
415   }
416 
417   protected Class<? extends HttpServlet> getDumpServlet() {
418     return MasterDumpServlet.class;
419   }
420 
421   /**
422    * Emit the HMaster metrics, such as region in transition metrics.
423    * Surrounding in a try block just to be sure metrics doesn't abort HMaster.
424    */
425   protected void doMetrics() {
426     try {
427       if (assignmentManager != null) {
428         assignmentManager.updateRegionsInTransitionMetrics();
429       }
430     } catch (Throwable e) {
431       LOG.error("Couldn't update metrics: " + e.getMessage());
432     }
433   }
434 
435   MetricsMaster getMasterMetrics() {
436     return metricsMaster;
437   }
438 
439   /**
440    * Initialize all ZK based system trackers.
441    * @throws IOException
442    * @throws InterruptedException
443    * @throws KeeperException
444    * @throws CoordinatedStateException
445    */
446   void initializeZKBasedSystemTrackers() throws IOException,
447       InterruptedException, KeeperException, CoordinatedStateException {
448     this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
449     this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
450     this.loadBalancerTracker.start();
451     this.assignmentManager = new AssignmentManager(this, serverManager,
452       this.balancer, this.service, this.metricsMaster,
453       this.tableLockManager, tableStateManager);
454 
455     this.regionServerTracker = new RegionServerTracker(zooKeeper, this,
456         this.serverManager);
457     this.regionServerTracker.start();
458 
459     this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this,
460       this.serverManager);
461     this.drainingServerTracker.start();
462 
463     // Set the cluster as up.  If new RSs, they'll be waiting on this before
464     // going ahead with their startup.
465     boolean wasUp = this.clusterStatusTracker.isClusterUp();
466     if (!wasUp) this.clusterStatusTracker.setClusterUp();
467 
468     LOG.info("Server active/primary master=" + this.serverName +
469         ", sessionid=0x" +
470         Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
471         ", setting cluster-up flag (Was=" + wasUp + ")");
472 
473     // create/initialize the snapshot manager and other procedure managers
474     this.snapshotManager = new SnapshotManager();
475     this.mpmHost = new MasterProcedureManagerHost();
476     this.mpmHost.register(this.snapshotManager);
477     this.mpmHost.register(new MasterFlushTableProcedureManager());
478     this.mpmHost.loadProcedures(conf);
479     this.mpmHost.initialize(this, this.metricsMaster);
480 
481     // migrating existent table state from zk
482     for (Map.Entry<TableName, TableState.State> entry : ZKDataMigrator
483         .queryForTableStates(getZooKeeper()).entrySet()) {
484       LOG.info("Converting state from zk to new states:" + entry);
485       tableStateManager.setTableState(entry.getKey(), entry.getValue());
486     }
487     ZKUtil.deleteChildrenRecursively(getZooKeeper(), getZooKeeper().tableZNode);
488   }
489 
490   /**
491    * Finish initialization of HMaster after becoming the primary master.
492    *
493    * <ol>
494    * <li>Initialize master components - file system manager, server manager,
495    *     assignment manager, region server tracker, etc</li>
496    * <li>Start necessary service threads - balancer, catalog janior,
497    *     executor services, etc</li>
498    * <li>Set cluster as UP in ZooKeeper</li>
499    * <li>Wait for RegionServers to check-in</li>
500    * <li>Split logs and perform data recovery, if necessary</li>
501    * <li>Ensure assignment of meta/namespace regions<li>
502    * <li>Handle either fresh cluster start or master failover</li>
503    * </ol>
504    *
505    * @throws IOException
506    * @throws InterruptedException
507    * @throws KeeperException
508    * @throws CoordinatedStateException
509    */
510   private void finishActiveMasterInitialization(MonitoredTask status)
511       throws IOException, InterruptedException, KeeperException, CoordinatedStateException {
512 
513     isActiveMaster = true;
514 
515     /*
516      * We are active master now... go initialize components we need to run.
517      * Note, there may be dross in zk from previous runs; it'll get addressed
518      * below after we determine if cluster startup or failover.
519      */
520 
521     status.setStatus("Initializing Master file system");
522 
523     this.masterActiveTime = System.currentTimeMillis();
524     // TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
525     this.fileSystemManager = new MasterFileSystem(this, this);
526 
527     // enable table descriptors cache
528     this.tableDescriptors.setCacheOn();
529 
530     // warm-up HTDs cache on master initialization
531     if (preLoadTableDescriptors) {
532       status.setStatus("Pre-loading table descriptors");
533       this.tableDescriptors.getAll();
534     }
535 
536     // publish cluster ID
537     status.setStatus("Publishing Cluster ID in ZooKeeper");
538     ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
539     this.serverManager = createServerManager(this, this);
540 
541     synchronized (this) {
542       if (shortCircuitConnection == null) {
543         shortCircuitConnection = createShortCircuitConnection();
544         metaTableLocator = new MetaTableLocator();
545       }
546     }
547 
548     // Invalidate all write locks held previously
549     this.tableLockManager.reapWriteLocks();
550 
551     this.tableStateManager = new TableStateManager(this);
552     this.tableStateManager.start();
553 
554     status.setStatus("Initializing ZK system trackers");
555     initializeZKBasedSystemTrackers();
556 
557     // initialize master side coprocessors before we start handling requests
558     status.setStatus("Initializing master coprocessors");
559     this.cpHost = new MasterCoprocessorHost(this, this.conf);
560 
561     // start up all service threads.
562     status.setStatus("Initializing master service threads");
563     startServiceThreads();
564 
565     // Wake up this server to check in
566     sleeper.skipSleepCycle();
567 
568     // Wait for region servers to report in
569     this.serverManager.waitForRegionServers(status);
570     // Check zk for region servers that are up but didn't register
571     for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
572       // The isServerOnline check is opportunistic, correctness is handled inside
573       if (!this.serverManager.isServerOnline(sn)
574           && serverManager.checkAndRecordNewServer(sn, ServerLoad.EMPTY_SERVERLOAD)) {
575         LOG.info("Registered server found up in zk but who has not yet reported in: " + sn);
576       }
577     }
578 
579     // get a list for previously failed RS which need log splitting work
580     // we recover hbase:meta region servers inside master initialization and
581     // handle other failed servers in SSH in order to start up master node ASAP
582     Set<ServerName> previouslyFailedServers = this.fileSystemManager
583         .getFailedServersFromLogFolders();
584 
585     // remove stale recovering regions from previous run
586     this.fileSystemManager.removeStaleRecoveringRegionsFromZK(previouslyFailedServers);
587 
588     // log splitting for hbase:meta server
589     ServerName oldMetaServerLocation = metaTableLocator.getMetaRegionLocation(this.getZooKeeper());
590     if (oldMetaServerLocation != null && previouslyFailedServers.contains(oldMetaServerLocation)) {
591       splitMetaLogBeforeAssignment(oldMetaServerLocation);
592       // Note: we can't remove oldMetaServerLocation from previousFailedServers list because it
593       // may also host user regions
594     }
595     Set<ServerName> previouslyFailedMetaRSs = getPreviouselyFailedMetaServersFromZK();
596     // need to use union of previouslyFailedMetaRSs recorded in ZK and previouslyFailedServers
597     // instead of previouslyFailedMetaRSs alone to address the following two situations:
598     // 1) the chained failure situation(recovery failed multiple times in a row).
599     // 2) master get killed right before it could delete the recovering hbase:meta from ZK while the
600     // same server still has non-meta wals to be replayed so that
601     // removeStaleRecoveringRegionsFromZK can't delete the stale hbase:meta region
602     // Passing more servers into splitMetaLog is all right. If a server doesn't have hbase:meta wal,
603     // there is no op for the server.
604     previouslyFailedMetaRSs.addAll(previouslyFailedServers);
605 
606     this.initializationBeforeMetaAssignment = true;
607 
608     // Wait for regionserver to finish initialization.
609     if (BaseLoadBalancer.tablesOnMaster(conf)) {
610       waitForServerOnline();
611     }
612 
613     //initialize load balancer
614     this.balancer.setClusterStatus(getClusterStatus());
615     this.balancer.setMasterServices(this);
616     this.balancer.initialize();
617 
618     // Check if master is shutting down because of some issue
619     // in initializing the regionserver or the balancer.
620     if(isStopped()) return;
621 
622     // Make sure meta assigned before proceeding.
623     status.setStatus("Assigning Meta Region");
624     assignMeta(status, previouslyFailedMetaRSs);
625     // check if master is shutting down because above assignMeta could return even hbase:meta isn't
626     // assigned when master is shutting down
627     if(isStopped()) return;
628 
629     status.setStatus("Submitting log splitting work for previously failed region servers");
630     // Master has recovered hbase:meta region server and we put
631     // other failed region servers in a queue to be handled later by SSH
632     for (ServerName tmpServer : previouslyFailedServers) {
633       this.serverManager.processDeadServer(tmpServer, true);
634     }
635 
636     // Fix up assignment manager status
637     status.setStatus("Starting assignment manager");
638     this.assignmentManager.joinCluster();
639 
640     //set cluster status again after user regions are assigned
641     this.balancer.setClusterStatus(getClusterStatus());
642 
643     // Start balancer and meta catalog janitor after meta and regions have
644     // been assigned.
645     status.setStatus("Starting balancer and catalog janitor");
646     this.clusterStatusChore = new ClusterStatusChore(this, balancer);
647     Threads.setDaemonThreadRunning(clusterStatusChore.getThread());
648     this.balancerChore = new BalancerChore(this);
649     Threads.setDaemonThreadRunning(balancerChore.getThread());
650     this.catalogJanitorChore = new CatalogJanitor(this, this);
651     Threads.setDaemonThreadRunning(catalogJanitorChore.getThread());
652 
653     status.setStatus("Starting namespace manager");
654     initNamespace();
655 
656     status.setStatus("Starting quota manager");
657     initQuotaManager();
658 
659     if (this.cpHost != null) {
660       try {
661         this.cpHost.preMasterInitialization();
662       } catch (IOException e) {
663         LOG.error("Coprocessor preMasterInitialization() hook failed", e);
664       }
665     }
666 
667     status.markComplete("Initialization successful");
668     LOG.info("Master has completed initialization");
669     initialized = true;
670     // clear the dead servers with same host name and port of online server because we are not
671     // removing dead server with same hostname and port of rs which is trying to check in before
672     // master initialization. See HBASE-5916.
673     this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
674 
675     if (this.cpHost != null) {
676       // don't let cp initialization errors kill the master
677       try {
678         this.cpHost.postStartMaster();
679       } catch (IOException ioe) {
680         LOG.error("Coprocessor postStartMaster() hook failed", ioe);
681       }
682     }
683   }
684 
685   /**
686    * Create a {@link ServerManager} instance.
687    * @param master
688    * @param services
689    * @return An instance of {@link ServerManager}
690    * @throws org.apache.hadoop.hbase.ZooKeeperConnectionException
691    * @throws IOException
692    */
693   ServerManager createServerManager(final Server master,
694       final MasterServices services)
695   throws IOException {
696     // We put this out here in a method so can do a Mockito.spy and stub it out
697     // w/ a mocked up ServerManager.
698     return new ServerManager(master, services);
699   }
700 
701   /**
702    * Check <code>hbase:meta</code> is assigned. If not, assign it.
703    * @param status MonitoredTask
704    * @param previouslyFailedMetaRSs
705    * @throws InterruptedException
706    * @throws IOException
707    * @throws KeeperException
708    */
709   void assignMeta(MonitoredTask status, Set<ServerName> previouslyFailedMetaRSs)
710       throws InterruptedException, IOException, KeeperException {
711     // Work on meta region
712     int assigned = 0;
713     long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
714     status.setStatus("Assigning hbase:meta region");
715 
716     // Get current meta state from zk.
717     RegionState metaState = MetaTableLocator.getMetaRegionState(getZooKeeper());
718 
719     RegionStates regionStates = assignmentManager.getRegionStates();
720     regionStates.createRegionState(HRegionInfo.FIRST_META_REGIONINFO,
721       metaState.getState(), metaState.getServerName(), null);
722 
723     if (!metaState.isOpened() || !metaTableLocator.verifyMetaRegionLocation(
724         this.getShortCircuitConnection(), this.getZooKeeper(), timeout)) {
725       ServerName currentMetaServer = metaState.getServerName();
726       if (serverManager.isServerOnline(currentMetaServer)) {
727         LOG.info("Meta was in transition on " + currentMetaServer);
728         assignmentManager.processRegionsInTransition(Arrays.asList(metaState));
729       } else {
730         if (currentMetaServer != null) {
731           splitMetaLogBeforeAssignment(currentMetaServer);
732           regionStates.logSplit(HRegionInfo.FIRST_META_REGIONINFO);
733           previouslyFailedMetaRSs.add(currentMetaServer);
734         }
735         LOG.info("Re-assigning hbase:meta, it was on " + currentMetaServer);
736         assignmentManager.assignMeta();
737       }
738       assigned++;
739     }
740 
741     enableMeta(TableName.META_TABLE_NAME);
742 
743     if ((RecoveryMode.LOG_REPLAY == this.getMasterFileSystem().getLogRecoveryMode())
744         && (!previouslyFailedMetaRSs.isEmpty())) {
745       // replay WAL edits mode need new hbase:meta RS is assigned firstly
746       status.setStatus("replaying log for Meta Region");
747       this.fileSystemManager.splitMetaLog(previouslyFailedMetaRSs);
748     }
749 
750     // Make sure a hbase:meta location is set. We need to enable SSH here since
751     // if the meta region server is died at this time, we need it to be re-assigned
752     // by SSH so that system tables can be assigned.
753     // No need to wait for meta is assigned = 0 when meta is just verified.
754     enableServerShutdownHandler(assigned != 0);
755 
756     LOG.info("hbase:meta assigned=" + assigned + ", location="
757       + metaTableLocator.getMetaRegionLocation(this.getZooKeeper()));
758     status.setStatus("META assigned.");
759   }
760 
761   void initNamespace() throws IOException {
762     //create namespace manager
763     tableNamespaceManager = new TableNamespaceManager(this);
764     tableNamespaceManager.start();
765   }
766 
767   void initQuotaManager() throws IOException {
768     quotaManager = new MasterQuotaManager(this);
769     quotaManager.start();
770   }
771 
772   boolean isCatalogJanitorEnabled() {
773     return catalogJanitorChore != null ?
774       catalogJanitorChore.getEnabled() : false;
775   }
776 
777   private void splitMetaLogBeforeAssignment(ServerName currentMetaServer) throws IOException {
778     if (RecoveryMode.LOG_REPLAY == this.getMasterFileSystem().getLogRecoveryMode()) {
779       // In log replay mode, we mark hbase:meta region as recovering in ZK
780       Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
781       regions.add(HRegionInfo.FIRST_META_REGIONINFO);
782       this.fileSystemManager.prepareLogReplay(currentMetaServer, regions);
783     } else {
784       // In recovered.edits mode: create recovered edits file for hbase:meta server
785       this.fileSystemManager.splitMetaLog(currentMetaServer);
786     }
787   }
788 
789   private void enableServerShutdownHandler(
790       final boolean waitForMeta) throws IOException, InterruptedException {
791     // If ServerShutdownHandler is disabled, we enable it and expire those dead
792     // but not expired servers. This is required so that if meta is assigning to
793     // a server which dies after assignMeta starts assignment,
794     // SSH can re-assign it. Otherwise, we will be
795     // stuck here waiting forever if waitForMeta is specified.
796     if (!serverShutdownHandlerEnabled) {
797       serverShutdownHandlerEnabled = true;
798       this.serverManager.processQueuedDeadServers();
799     }
800 
801     if (waitForMeta) {
802       metaTableLocator.waitMetaRegionLocation(this.getZooKeeper());
803     }
804   }
805 
806   private void enableMeta(TableName metaTableName) {
807     if (!this.tableStateManager.isTableState(metaTableName,
808             TableState.State.ENABLED)) {
809       this.assignmentManager.setEnabledTable(metaTableName);
810     }
811   }
812 
813   /**
814    * This function returns a set of region server names under hbase:meta recovering region ZK node
815    * @return Set of meta server names which were recorded in ZK
816    * @throws KeeperException
817    */
818   private Set<ServerName> getPreviouselyFailedMetaServersFromZK() throws KeeperException {
819     Set<ServerName> result = new HashSet<ServerName>();
820     String metaRecoveringZNode = ZKUtil.joinZNode(zooKeeper.recoveringRegionsZNode,
821       HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
822     List<String> regionFailedServers = ZKUtil.listChildrenNoWatch(zooKeeper, metaRecoveringZNode);
823     if (regionFailedServers == null) return result;
824 
825     for(String failedServer : regionFailedServers) {
826       ServerName server = ServerName.parseServerName(failedServer);
827       result.add(server);
828     }
829     return result;
830   }
831 
832   @Override
833   public TableDescriptors getTableDescriptors() {
834     return this.tableDescriptors;
835   }
836 
837   @Override
838   public ServerManager getServerManager() {
839     return this.serverManager;
840   }
841 
842   @Override
843   public MasterFileSystem getMasterFileSystem() {
844     return this.fileSystemManager;
845   }
846 
847   @Override
848   public TableStateManager getTableStateManager() {
849     return tableStateManager;
850   }
851 
852   /*
853    * Start up all services. If any of these threads gets an unhandled exception
854    * then they just die with a logged message.  This should be fine because
855    * in general, we do not expect the master to get such unhandled exceptions
856    *  as OOMEs; it should be lightly loaded. See what HRegionServer does if
857    *  need to install an unexpected exception handler.
858    */
859   private void startServiceThreads() throws IOException{
860    // Start the executor service pools
861    this.service.startExecutorService(ExecutorType.MASTER_OPEN_REGION,
862       conf.getInt("hbase.master.executor.openregion.threads", 5));
863    this.service.startExecutorService(ExecutorType.MASTER_CLOSE_REGION,
864       conf.getInt("hbase.master.executor.closeregion.threads", 5));
865    this.service.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
866       conf.getInt("hbase.master.executor.serverops.threads", 5));
867    this.service.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
868       conf.getInt("hbase.master.executor.serverops.threads", 5));
869    this.service.startExecutorService(ExecutorType.M_LOG_REPLAY_OPS,
870       conf.getInt("hbase.master.executor.logreplayops.threads", 10));
871 
872    // We depend on there being only one instance of this executor running
873    // at a time.  To do concurrency, would need fencing of enable/disable of
874    // tables.
875    // Any time changing this maxThreads to > 1, pls see the comment at
876    // AccessController#postCreateTableHandler
877    this.service.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
878 
879    // Start log cleaner thread
880    int cleanerInterval = conf.getInt("hbase.master.cleaner.interval", 60 * 1000);
881    this.logCleaner =
882       new LogCleaner(cleanerInterval,
883          this, conf, getMasterFileSystem().getFileSystem(),
884          getMasterFileSystem().getOldLogDir());
885          Threads.setDaemonThreadRunning(logCleaner.getThread(),
886            getServerName().toShortString() + ".oldLogCleaner");
887 
888    //start the hfile archive cleaner thread
889     Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
890     this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem()
891         .getFileSystem(), archiveDir);
892     Threads.setDaemonThreadRunning(hfileCleaner.getThread(),
893       getServerName().toShortString() + ".archivedHFileCleaner");
894 
895     serviceStarted = true;
896     if (LOG.isTraceEnabled()) {
897       LOG.trace("Started service threads");
898     }
899   }
900 
901   protected void stopServiceThreads() {
902     if (masterJettyServer != null) {
903       LOG.info("Stopping master jetty server");
904       try {
905         masterJettyServer.stop();
906       } catch (Exception e) {
907         LOG.error("Failed to stop master jetty server", e);
908       }
909     }
910     super.stopServiceThreads();
911     stopChores();
912     // Wait for all the remaining region servers to report in IFF we were
913     // running a cluster shutdown AND we were NOT aborting.
914     if (!isAborted() && this.serverManager != null &&
915         this.serverManager.isClusterShutdown()) {
916       this.serverManager.letRegionServersShutdown();
917     }
918     if (LOG.isDebugEnabled()) {
919       LOG.debug("Stopping service threads");
920     }
921     // Clean up and close up shop
922     if (this.logCleaner!= null) this.logCleaner.interrupt();
923     if (this.hfileCleaner != null) this.hfileCleaner.interrupt();
924     if (this.quotaManager != null) this.quotaManager.stop();
925     if (this.activeMasterManager != null) this.activeMasterManager.stop();
926     if (this.serverManager != null) this.serverManager.stop();
927     if (this.assignmentManager != null) this.assignmentManager.stop();
928     if (this.fileSystemManager != null) this.fileSystemManager.stop();
929     if (this.mpmHost != null) this.mpmHost.stop("server shutting down.");
930   }
931 
932   private void stopChores() {
933     if (this.balancerChore != null) {
934       this.balancerChore.interrupt();
935     }
936     if (this.clusterStatusChore != null) {
937       this.clusterStatusChore.interrupt();
938     }
939     if (this.catalogJanitorChore != null) {
940       this.catalogJanitorChore.interrupt();
941     }
942     if (this.clusterStatusPublisherChore != null){
943       clusterStatusPublisherChore.interrupt();
944     }
945   }
946 
947   /**
948    * @return Get remote side's InetAddress
949    * @throws UnknownHostException
950    */
951   InetAddress getRemoteInetAddress(final int port,
952       final long serverStartCode) throws UnknownHostException {
953     // Do it out here in its own little method so can fake an address when
954     // mocking up in tests.
955     InetAddress ia = RpcServer.getRemoteIp();
956 
957     // The call could be from the local regionserver,
958     // in which case, there is no remote address.
959     if (ia == null && serverStartCode == startcode) {
960       InetSocketAddress isa = rpcServices.getSocketAddress();
961       if (isa != null && isa.getPort() == port) {
962         ia = isa.getAddress();
963       }
964     }
965     return ia;
966   }
967 
968   /**
969    * @return Maximum time we should run balancer for
970    */
971   private int getBalancerCutoffTime() {
972     int balancerCutoffTime =
973       getConfiguration().getInt("hbase.balancer.max.balancing", -1);
974     if (balancerCutoffTime == -1) {
975       // No time period set so create one
976       int balancerPeriod =
977         getConfiguration().getInt("hbase.balancer.period", 300000);
978       balancerCutoffTime = balancerPeriod;
979       // If nonsense period, set it to balancerPeriod
980       if (balancerCutoffTime <= 0) balancerCutoffTime = balancerPeriod;
981     }
982     return balancerCutoffTime;
983   }
984 
985   public boolean balance() throws IOException {
986     // if master not initialized, don't run balancer.
987     if (!this.initialized) {
988       LOG.debug("Master has not been initialized, don't run balancer.");
989       return false;
990     }
991     // Do this call outside of synchronized block.
992     int maximumBalanceTime = getBalancerCutoffTime();
993     synchronized (this.balancer) {
994       // If balance not true, don't run balancer.
995       if (!this.loadBalancerTracker.isBalancerOn()) return false;
996       // Only allow one balance run at at time.
997       if (this.assignmentManager.getRegionStates().isRegionsInTransition()) {
998         Map<String, RegionState> regionsInTransition =
999           this.assignmentManager.getRegionStates().getRegionsInTransition();
1000         LOG.debug("Not running balancer because " + regionsInTransition.size() +
1001           " region(s) in transition: " + org.apache.commons.lang.StringUtils.
1002             abbreviate(regionsInTransition.toString(), 256));
1003         return false;
1004       }
1005       if (this.serverManager.areDeadServersInProgress()) {
1006         LOG.debug("Not running balancer because processing dead regionserver(s): " +
1007           this.serverManager.getDeadServers());
1008         return false;
1009       }
1010 
1011       if (this.cpHost != null) {
1012         try {
1013           if (this.cpHost.preBalance()) {
1014             LOG.debug("Coprocessor bypassing balancer request");
1015             return false;
1016           }
1017         } catch (IOException ioe) {
1018           LOG.error("Error invoking master coprocessor preBalance()", ioe);
1019           return false;
1020         }
1021       }
1022 
1023       Map<TableName, Map<ServerName, List<HRegionInfo>>> assignmentsByTable =
1024         this.assignmentManager.getRegionStates().getAssignmentsByTable();
1025 
1026       List<RegionPlan> plans = new ArrayList<RegionPlan>();
1027       //Give the balancer the current cluster state.
1028       this.balancer.setClusterStatus(getClusterStatus());
1029       for (Map<ServerName, List<HRegionInfo>> assignments : assignmentsByTable.values()) {
1030         List<RegionPlan> partialPlans = this.balancer.balanceCluster(assignments);
1031         if (partialPlans != null) plans.addAll(partialPlans);
1032       }
1033       long cutoffTime = System.currentTimeMillis() + maximumBalanceTime;
1034       int rpCount = 0;  // number of RegionPlans balanced so far
1035       long totalRegPlanExecTime = 0;
1036       if (plans != null && !plans.isEmpty()) {
1037         for (RegionPlan plan: plans) {
1038           LOG.info("balance " + plan);
1039           long balStartTime = System.currentTimeMillis();
1040           //TODO: bulk assign
1041           this.assignmentManager.balance(plan);
1042           totalRegPlanExecTime += System.currentTimeMillis()-balStartTime;
1043           rpCount++;
1044           if (rpCount < plans.size() &&
1045               // if performing next balance exceeds cutoff time, exit the loop
1046               (System.currentTimeMillis() + (totalRegPlanExecTime / rpCount)) > cutoffTime) {
1047             //TODO: After balance, there should not be a cutoff time (keeping it as a security net for now)
1048             LOG.debug("No more balancing till next balance run; maximumBalanceTime=" +
1049               maximumBalanceTime);
1050             break;
1051           }
1052         }
1053       }
1054       if (this.cpHost != null) {
1055         try {
1056           this.cpHost.postBalance(rpCount < plans.size() ? plans.subList(0, rpCount) : plans);
1057         } catch (IOException ioe) {
1058           // balancing already succeeded so don't change the result
1059           LOG.error("Error invoking master coprocessor postBalance()", ioe);
1060         }
1061       }
1062     }
1063     // If LoadBalancer did not generate any plans, it means the cluster is already balanced.
1064     // Return true indicating a success.
1065     return true;
1066   }
1067 
1068   /**
1069    * @return Client info for use as prefix on an audit log string; who did an action
1070    */
1071   String getClientIdAuditPrefix() {
1072     return "Client=" + RequestContext.getRequestUserName() + "/" +
1073       RequestContext.get().getRemoteAddress();
1074   }
1075 
1076   /**
1077    * Switch for the background CatalogJanitor thread.
1078    * Used for testing.  The thread will continue to run.  It will just be a noop
1079    * if disabled.
1080    * @param b If false, the catalog janitor won't do anything.
1081    */
1082   public void setCatalogJanitorEnabled(final boolean b) {
1083     this.catalogJanitorChore.setEnabled(b);
1084   }
1085 
1086   @Override
1087   public void dispatchMergingRegions(final HRegionInfo region_a,
1088       final HRegionInfo region_b, final boolean forcible) throws IOException {
1089     checkInitialized();
1090     this.service.submit(new DispatchMergingRegionHandler(this,
1091         this.catalogJanitorChore, region_a, region_b, forcible));
1092   }
1093 
1094   void move(final byte[] encodedRegionName,
1095       final byte[] destServerName) throws HBaseIOException {
1096     RegionState regionState = assignmentManager.getRegionStates().
1097       getRegionState(Bytes.toString(encodedRegionName));
1098     if (regionState == null) {
1099       throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
1100     }
1101 
1102     HRegionInfo hri = regionState.getRegion();
1103     ServerName dest;
1104     if (destServerName == null || destServerName.length == 0) {
1105       LOG.info("Passed destination servername is null/empty so " +
1106         "choosing a server at random");
1107       final List<ServerName> destServers = this.serverManager.createDestinationServersList(
1108         regionState.getServerName());
1109       dest = balancer.randomAssignment(hri, destServers);
1110       if (dest == null) {
1111         LOG.debug("Unable to determine a plan to assign " + hri);
1112         return;
1113       }
1114     } else {
1115       dest = ServerName.valueOf(Bytes.toString(destServerName));
1116       if (dest.equals(serverName) && balancer instanceof BaseLoadBalancer
1117           && !((BaseLoadBalancer)balancer).shouldBeOnMaster(hri)) {
1118         // To avoid unnecessary region moving later by balancer. Don't put user
1119         // regions on master. Regions on master could be put on other region
1120         // server intentionally by test however.
1121         LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1122           + " to avoid unnecessary region moving later by load balancer,"
1123           + " because it should not be on master");
1124         return;
1125       }
1126     }
1127 
1128     if (dest.equals(regionState.getServerName())) {
1129       LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1130         + " because region already assigned to the same server " + dest + ".");
1131       return;
1132     }
1133 
1134     // Now we can do the move
1135     RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
1136 
1137     try {
1138       checkInitialized();
1139       if (this.cpHost != null) {
1140         if (this.cpHost.preMove(hri, rp.getSource(), rp.getDestination())) {
1141           return;
1142         }
1143       }
1144       LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
1145       this.assignmentManager.balance(rp);
1146       if (this.cpHost != null) {
1147         this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
1148       }
1149     } catch (IOException ioe) {
1150       if (ioe instanceof HBaseIOException) {
1151         throw (HBaseIOException)ioe;
1152       }
1153       throw new HBaseIOException(ioe);
1154     }
1155   }
1156 
1157   @Override
1158   public void createTable(HTableDescriptor hTableDescriptor,
1159       byte [][] splitKeys) throws IOException {
1160     if (isStopped()) {
1161       throw new MasterNotRunningException();
1162     }
1163 
1164     String namespace = hTableDescriptor.getTableName().getNamespaceAsString();
1165     getNamespaceDescriptor(namespace); // ensure namespace exists
1166 
1167     HRegionInfo[] newRegions = getHRegionInfos(hTableDescriptor, splitKeys);
1168     checkInitialized();
1169     sanityCheckTableDescriptor(hTableDescriptor);
1170     if (cpHost != null) {
1171       cpHost.preCreateTable(hTableDescriptor, newRegions);
1172     }
1173     LOG.info(getClientIdAuditPrefix() + " create " + hTableDescriptor);
1174     this.service.submit(new CreateTableHandler(this,
1175       this.fileSystemManager, hTableDescriptor, conf,
1176       newRegions, this).prepare());
1177     if (cpHost != null) {
1178       cpHost.postCreateTable(hTableDescriptor, newRegions);
1179     }
1180 
1181   }
1182 
1183   /**
1184    * Checks whether the table conforms to some sane limits, and configured
1185    * values (compression, etc) work. Throws an exception if something is wrong.
1186    * @throws IOException
1187    */
1188   private void sanityCheckTableDescriptor(final HTableDescriptor htd) throws IOException {
1189     final String CONF_KEY = "hbase.table.sanity.checks";
1190     if (!conf.getBoolean(CONF_KEY, true)) {
1191       return;
1192     }
1193     String tableVal = htd.getConfigurationValue(CONF_KEY);
1194     if (tableVal != null && !Boolean.valueOf(tableVal)) {
1195       return;
1196     }
1197 
1198     // check max file size
1199     long maxFileSizeLowerLimit = 2 * 1024 * 1024L; // 2M is the default lower limit
1200     long maxFileSize = htd.getMaxFileSize();
1201     if (maxFileSize < 0) {
1202       maxFileSize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, maxFileSizeLowerLimit);
1203     }
1204     if (maxFileSize < conf.getLong("hbase.hregion.max.filesize.limit", maxFileSizeLowerLimit)) {
1205       throw new DoNotRetryIOException("MAX_FILESIZE for table descriptor or "
1206         + "\"hbase.hregion.max.filesize\" (" + maxFileSize
1207         + ") is too small, which might cause over splitting into unmanageable "
1208         + "number of regions. Set " + CONF_KEY + " to false at conf or table descriptor "
1209           + "if you want to bypass sanity checks");
1210     }
1211 
1212     // check flush size
1213     long flushSizeLowerLimit = 1024 * 1024L; // 1M is the default lower limit
1214     long flushSize = htd.getMemStoreFlushSize();
1215     if (flushSize < 0) {
1216       flushSize = conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, flushSizeLowerLimit);
1217     }
1218     if (flushSize < conf.getLong("hbase.hregion.memstore.flush.size.limit", flushSizeLowerLimit)) {
1219       throw new DoNotRetryIOException("MEMSTORE_FLUSHSIZE for table descriptor or "
1220           + "\"hbase.hregion.memstore.flush.size\" ("+flushSize+") is too small, which might cause"
1221           + " very frequent flushing. Set " + CONF_KEY + " to false at conf or table descriptor "
1222           + "if you want to bypass sanity checks");
1223     }
1224 
1225     // check split policy class can be loaded
1226     try {
1227       RegionSplitPolicy.getSplitPolicyClass(htd, conf);
1228     } catch (Exception ex) {
1229       throw new DoNotRetryIOException(ex);
1230     }
1231 
1232     // check compression can be loaded
1233     checkCompression(htd);
1234 
1235     // check that we have at least 1 CF
1236     if (htd.getColumnFamilies().length == 0) {
1237       throw new DoNotRetryIOException("Table should have at least one column family "
1238           + "Set "+CONF_KEY+" at conf or table descriptor if you want to bypass sanity checks");
1239     }
1240 
1241     for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1242       if (hcd.getTimeToLive() <= 0) {
1243         throw new DoNotRetryIOException("TTL for column family " + hcd.getNameAsString()
1244           + "  must be positive. Set " + CONF_KEY + " to false at conf or table descriptor "
1245           + "if you want to bypass sanity checks");
1246       }
1247 
1248       // check blockSize
1249       if (hcd.getBlocksize() < 1024 || hcd.getBlocksize() > 16 * 1024 * 1024) {
1250         throw new DoNotRetryIOException("Block size for column family " + hcd.getNameAsString()
1251           + "  must be between 1K and 16MB Set "+CONF_KEY+" to false at conf or table descriptor "
1252           + "if you want to bypass sanity checks");
1253       }
1254 
1255       // check versions
1256       if (hcd.getMinVersions() < 0) {
1257         throw new DoNotRetryIOException("Min versions for column family " + hcd.getNameAsString()
1258           + "  must be positive. Set " + CONF_KEY + " to false at conf or table descriptor "
1259           + "if you want to bypass sanity checks");
1260       }
1261       // max versions already being checked
1262 
1263       // check replication scope
1264       if (hcd.getScope() < 0) {
1265         throw new DoNotRetryIOException("Replication scope for column family "
1266           + hcd.getNameAsString() + "  must be positive. Set " + CONF_KEY + " to false at conf "
1267           + "or table descriptor if you want to bypass sanity checks");
1268       }
1269 
1270       // TODO: should we check coprocessors and encryption ?
1271     }
1272   }
1273 
1274   private void startActiveMasterManager() throws KeeperException {
1275     String backupZNode = ZKUtil.joinZNode(
1276       zooKeeper.backupMasterAddressesZNode, serverName.toString());
1277     /*
1278     * Add a ZNode for ourselves in the backup master directory since we
1279     * may not become the active master. If so, we want the actual active
1280     * master to know we are backup masters, so that it won't assign
1281     * regions to us if so configured.
1282     *
1283     * If we become the active master later, ActiveMasterManager will delete
1284     * this node explicitly.  If we crash before then, ZooKeeper will delete
1285     * this node for us since it is ephemeral.
1286     */
1287     LOG.info("Adding backup master ZNode " + backupZNode);
1288     if (!MasterAddressTracker.setMasterAddress(zooKeeper, backupZNode,
1289         serverName, masterInfoPort)) {
1290       LOG.warn("Failed create of " + backupZNode + " by " + serverName);
1291     }
1292 
1293     activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName,
1294         masterInfoPort, this);
1295     // Start a thread to try to become the active master, so we won't block here
1296     Threads.setDaemonThreadRunning(new Thread(new Runnable() {
1297       public void run() {
1298         int timeout = conf.getInt(HConstants.ZK_SESSION_TIMEOUT,
1299           HConstants.DEFAULT_ZK_SESSION_TIMEOUT);
1300         // If we're a backup master, stall until a primary to writes his address
1301         if (conf.getBoolean(HConstants.MASTER_TYPE_BACKUP,
1302             HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
1303           LOG.debug("HMaster started in backup mode. "
1304             + "Stalling until master znode is written.");
1305           // This will only be a minute or so while the cluster starts up,
1306           // so don't worry about setting watches on the parent znode
1307           while (!activeMasterManager.hasActiveMaster()) {
1308             LOG.debug("Waiting for master address ZNode to be written "
1309               + "(Also watching cluster state node)");
1310             Threads.sleep(timeout);
1311           }
1312         }
1313         MonitoredTask status = TaskMonitor.get().createStatus("Master startup");
1314         status.setDescription("Master startup");
1315         try {
1316           if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, status)) {
1317             finishActiveMasterInitialization(status);
1318           }
1319         } catch (Throwable t) {
1320           status.setStatus("Failed to become active: " + t.getMessage());
1321           LOG.fatal("Failed to become active master", t);
1322           // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
1323           if (t instanceof NoClassDefFoundError &&
1324               t.getMessage().contains("org/apache/hadoop/hdfs/protocol/FSConstants$SafeModeAction")) {
1325             // improved error message for this special case
1326             abort("HBase is having a problem with its Hadoop jars.  You may need to "
1327               + "recompile HBase against Hadoop version "
1328               +  org.apache.hadoop.util.VersionInfo.getVersion()
1329               + " or change your hadoop jars to start properly", t);
1330           } else {
1331             abort("Unhandled exception. Starting shutdown.", t);
1332           }
1333         } finally {
1334           status.cleanup();
1335         }
1336       }
1337     }, getServerName().toShortString() + ".activeMasterManager"));
1338   }
1339 
1340   private void checkCompression(final HTableDescriptor htd)
1341   throws IOException {
1342     if (!this.masterCheckCompression) return;
1343     for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1344       checkCompression(hcd);
1345     }
1346   }
1347 
1348   private void checkCompression(final HColumnDescriptor hcd)
1349   throws IOException {
1350     if (!this.masterCheckCompression) return;
1351     CompressionTest.testCompression(hcd.getCompression());
1352     CompressionTest.testCompression(hcd.getCompactionCompression());
1353   }
1354 
1355   private HRegionInfo[] getHRegionInfos(HTableDescriptor hTableDescriptor,
1356     byte[][] splitKeys) {
1357     long regionId = System.currentTimeMillis();
1358     HRegionInfo[] hRegionInfos = null;
1359     if (splitKeys == null || splitKeys.length == 0) {
1360       hRegionInfos = new HRegionInfo[]{new HRegionInfo(hTableDescriptor.getTableName(), null, null,
1361                 false, regionId)};
1362     } else {
1363       int numRegions = splitKeys.length + 1;
1364       hRegionInfos = new HRegionInfo[numRegions];
1365       byte[] startKey = null;
1366       byte[] endKey = null;
1367       for (int i = 0; i < numRegions; i++) {
1368         endKey = (i == splitKeys.length) ? null : splitKeys[i];
1369         hRegionInfos[i] =
1370              new HRegionInfo(hTableDescriptor.getTableName(), startKey, endKey,
1371                  false, regionId);
1372         startKey = endKey;
1373       }
1374     }
1375     return hRegionInfos;
1376   }
1377 
1378   private static boolean isCatalogTable(final TableName tableName) {
1379     return tableName.equals(TableName.META_TABLE_NAME);
1380   }
1381 
1382   @Override
1383   public void deleteTable(final TableName tableName) throws IOException {
1384     checkInitialized();
1385     if (cpHost != null) {
1386       cpHost.preDeleteTable(tableName);
1387     }
1388     LOG.info(getClientIdAuditPrefix() + " delete " + tableName);
1389     this.service.submit(new DeleteTableHandler(tableName, this, this).prepare());
1390     if (cpHost != null) {
1391       cpHost.postDeleteTable(tableName);
1392     }
1393   }
1394 
1395   @Override
1396   public void truncateTable(TableName tableName, boolean preserveSplits) throws IOException {
1397     checkInitialized();
1398     if (cpHost != null) {
1399       cpHost.preTruncateTable(tableName);
1400     }
1401     LOG.info(getClientIdAuditPrefix() + " truncate " + tableName);
1402     TruncateTableHandler handler = new TruncateTableHandler(tableName, this, this, preserveSplits);
1403     handler.prepare();
1404     handler.process();
1405     if (cpHost != null) {
1406       cpHost.postTruncateTable(tableName);
1407     }
1408   }
1409 
1410   @Override
1411   public void addColumn(final TableName tableName, final HColumnDescriptor columnDescriptor)
1412       throws IOException {
1413     checkInitialized();
1414     checkCompression(columnDescriptor);
1415     if (cpHost != null) {
1416       if (cpHost.preAddColumn(tableName, columnDescriptor)) {
1417         return;
1418       }
1419     }
1420     //TODO: we should process this (and some others) in an executor
1421     new TableAddFamilyHandler(tableName, columnDescriptor, this, this).prepare().process();
1422     if (cpHost != null) {
1423       cpHost.postAddColumn(tableName, columnDescriptor);
1424     }
1425   }
1426 
1427   @Override
1428   public void modifyColumn(TableName tableName, HColumnDescriptor descriptor)
1429       throws IOException {
1430     checkInitialized();
1431     checkCompression(descriptor);
1432     if (cpHost != null) {
1433       if (cpHost.preModifyColumn(tableName, descriptor)) {
1434         return;
1435       }
1436     }
1437     LOG.info(getClientIdAuditPrefix() + " modify " + descriptor);
1438     new TableModifyFamilyHandler(tableName, descriptor, this, this)
1439       .prepare().process();
1440     if (cpHost != null) {
1441       cpHost.postModifyColumn(tableName, descriptor);
1442     }
1443   }
1444 
1445   @Override
1446   public void deleteColumn(final TableName tableName, final byte[] columnName)
1447       throws IOException {
1448     checkInitialized();
1449     if (cpHost != null) {
1450       if (cpHost.preDeleteColumn(tableName, columnName)) {
1451         return;
1452       }
1453     }
1454     LOG.info(getClientIdAuditPrefix() + " delete " + Bytes.toString(columnName));
1455     new TableDeleteFamilyHandler(tableName, columnName, this, this).prepare().process();
1456     if (cpHost != null) {
1457       cpHost.postDeleteColumn(tableName, columnName);
1458     }
1459   }
1460 
1461   @Override
1462   public void enableTable(final TableName tableName) throws IOException {
1463     checkInitialized();
1464     if (cpHost != null) {
1465       cpHost.preEnableTable(tableName);
1466     }
1467     LOG.info(getClientIdAuditPrefix() + " enable " + tableName);
1468     this.service.submit(new EnableTableHandler(this, tableName,
1469       assignmentManager, tableLockManager, false).prepare());
1470     if (cpHost != null) {
1471       cpHost.postEnableTable(tableName);
1472    }
1473   }
1474 
1475   @Override
1476   public void disableTable(final TableName tableName) throws IOException {
1477     checkInitialized();
1478     if (cpHost != null) {
1479       cpHost.preDisableTable(tableName);
1480     }
1481     LOG.info(getClientIdAuditPrefix() + " disable " + tableName);
1482     this.service.submit(new DisableTableHandler(this, tableName,
1483       assignmentManager, tableLockManager, false).prepare());
1484     if (cpHost != null) {
1485       cpHost.postDisableTable(tableName);
1486     }
1487   }
1488 
1489   /**
1490    * Return the region and current deployment for the region containing
1491    * the given row. If the region cannot be found, returns null. If it
1492    * is found, but not currently deployed, the second element of the pair
1493    * may be null.
1494    */
1495   Pair<HRegionInfo, ServerName> getTableRegionForRow(
1496       final TableName tableName, final byte [] rowKey)
1497   throws IOException {
1498     final AtomicReference<Pair<HRegionInfo, ServerName>> result =
1499       new AtomicReference<Pair<HRegionInfo, ServerName>>(null);
1500 
1501     MetaScannerVisitor visitor =
1502       new MetaScannerVisitorBase() {
1503         @Override
1504         public boolean processRow(Result data) throws IOException {
1505           if (data == null || data.size() <= 0) {
1506             return true;
1507           }
1508           Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(data);
1509           if (pair == null) {
1510             return false;
1511           }
1512           if (!pair.getFirst().getTable().equals(tableName)) {
1513             return false;
1514           }
1515           result.set(pair);
1516           return true;
1517         }
1518     };
1519 
1520     MetaScanner.metaScan(conf, visitor, tableName, rowKey, 1);
1521     return result.get();
1522   }
1523 
1524   @Override
1525   public void modifyTable(final TableName tableName, final HTableDescriptor descriptor)
1526       throws IOException {
1527     checkInitialized();
1528     sanityCheckTableDescriptor(descriptor);
1529     if (cpHost != null) {
1530       cpHost.preModifyTable(tableName, descriptor);
1531     }
1532     LOG.info(getClientIdAuditPrefix() + " modify " + tableName);
1533     new ModifyTableHandler(tableName, descriptor, this, this).prepare().process();
1534     if (cpHost != null) {
1535       cpHost.postModifyTable(tableName, descriptor);
1536     }
1537   }
1538 
1539   @Override
1540   public void checkTableModifiable(final TableName tableName)
1541       throws IOException, TableNotFoundException, TableNotDisabledException {
1542     if (isCatalogTable(tableName)) {
1543       throw new IOException("Can't modify catalog tables");
1544     }
1545     if (!MetaTableAccessor.tableExists(getShortCircuitConnection(), tableName)) {
1546       throw new TableNotFoundException(tableName);
1547     }
1548     if (!getAssignmentManager().getTableStateManager().
1549         isTableState(tableName, TableState.State.DISABLED)) {
1550       throw new TableNotDisabledException(tableName);
1551     }
1552   }
1553 
1554   /**
1555    * @return cluster status
1556    */
1557   public ClusterStatus getClusterStatus() throws InterruptedIOException {
1558     // Build Set of backup masters from ZK nodes
1559     List<String> backupMasterStrings;
1560     try {
1561       backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
1562         this.zooKeeper.backupMasterAddressesZNode);
1563     } catch (KeeperException e) {
1564       LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
1565       backupMasterStrings = new ArrayList<String>(0);
1566     }
1567     List<ServerName> backupMasters = new ArrayList<ServerName>(
1568                                           backupMasterStrings.size());
1569     for (String s: backupMasterStrings) {
1570       try {
1571         byte [] bytes;
1572         try {
1573           bytes = ZKUtil.getData(this.zooKeeper, ZKUtil.joinZNode(
1574               this.zooKeeper.backupMasterAddressesZNode, s));
1575         } catch (InterruptedException e) {
1576           throw new InterruptedIOException();
1577         }
1578         if (bytes != null) {
1579           ServerName sn;
1580           try {
1581             sn = ServerName.parseFrom(bytes);
1582           } catch (DeserializationException e) {
1583             LOG.warn("Failed parse, skipping registering backup server", e);
1584             continue;
1585           }
1586           backupMasters.add(sn);
1587         }
1588       } catch (KeeperException e) {
1589         LOG.warn(this.zooKeeper.prefix("Unable to get information about " +
1590                  "backup servers"), e);
1591       }
1592     }
1593     Collections.sort(backupMasters, new Comparator<ServerName>() {
1594       @Override
1595       public int compare(ServerName s1, ServerName s2) {
1596         return s1.getServerName().compareTo(s2.getServerName());
1597       }});
1598 
1599     String clusterId = fileSystemManager != null ?
1600       fileSystemManager.getClusterId().toString() : null;
1601     Map<String, RegionState> regionsInTransition = assignmentManager != null ?
1602       assignmentManager.getRegionStates().getRegionsInTransition() : null;
1603     String[] coprocessors = cpHost != null ? getMasterCoprocessors() : null;
1604     boolean balancerOn = loadBalancerTracker != null ?
1605       loadBalancerTracker.isBalancerOn() : false;
1606     Map<ServerName, ServerLoad> onlineServers = null;
1607     Set<ServerName> deadServers = null;
1608     if (serverManager != null) {
1609       deadServers = serverManager.getDeadServers().copyServerNames();
1610       onlineServers = serverManager.getOnlineServers();
1611     }
1612     return new ClusterStatus(VersionInfo.getVersion(), clusterId,
1613       onlineServers, deadServers, serverName, backupMasters,
1614       regionsInTransition, coprocessors, balancerOn);
1615   }
1616 
1617   /**
1618    * The set of loaded coprocessors is stored in a static set. Since it's
1619    * statically allocated, it does not require that HMaster's cpHost be
1620    * initialized prior to accessing it.
1621    * @return a String representation of the set of names of the loaded
1622    * coprocessors.
1623    */
1624   public static String getLoadedCoprocessors() {
1625     return CoprocessorHost.getLoadedCoprocessors().toString();
1626   }
1627 
1628   /**
1629    * @return timestamp in millis when HMaster was started.
1630    */
1631   public long getMasterStartTime() {
1632     return startcode;
1633   }
1634 
1635   /**
1636    * @return timestamp in millis when HMaster became the active master.
1637    */
1638   public long getMasterActiveTime() {
1639     return masterActiveTime;
1640   }
1641 
1642   public int getRegionServerInfoPort(final ServerName sn) {
1643     RegionServerInfo info = this.regionServerTracker.getRegionServerInfo(sn);
1644     if (info == null || info.getInfoPort() == 0) {
1645       return conf.getInt(HConstants.REGIONSERVER_INFO_PORT,
1646         HConstants.DEFAULT_REGIONSERVER_INFOPORT);
1647     }
1648     return info.getInfoPort();
1649   }
1650 
1651   /**
1652    * @return array of coprocessor SimpleNames.
1653    */
1654   public String[] getMasterCoprocessors() {
1655     Set<String> masterCoprocessors = getMasterCoprocessorHost().getCoprocessors();
1656     return masterCoprocessors.toArray(new String[masterCoprocessors.size()]);
1657   }
1658 
1659   @Override
1660   public void abort(final String msg, final Throwable t) {
1661     if (isAborted() || isStopped()) {
1662       return;
1663     }
1664     if (cpHost != null) {
1665       // HBASE-4014: dump a list of loaded coprocessors.
1666       LOG.fatal("Master server abort: loaded coprocessors are: " +
1667           getLoadedCoprocessors());
1668     }
1669     if (t != null) LOG.fatal(msg, t);
1670     stop(msg);
1671   }
1672 
1673   @Override
1674   public ZooKeeperWatcher getZooKeeper() {
1675     return zooKeeper;
1676   }
1677 
1678   @Override
1679   public MasterCoprocessorHost getMasterCoprocessorHost() {
1680     return cpHost;
1681   }
1682 
1683   @Override
1684   public MasterQuotaManager getMasterQuotaManager() {
1685     return quotaManager;
1686   }
1687 
1688   @Override
1689   public ServerName getServerName() {
1690     return this.serverName;
1691   }
1692 
1693   @Override
1694   public AssignmentManager getAssignmentManager() {
1695     return this.assignmentManager;
1696   }
1697 
1698   public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
1699     return rsFatals;
1700   }
1701 
1702   public void shutdown() {
1703     if (cpHost != null) {
1704       try {
1705         cpHost.preShutdown();
1706       } catch (IOException ioe) {
1707         LOG.error("Error call master coprocessor preShutdown()", ioe);
1708       }
1709     }
1710 
1711     if (this.serverManager != null) {
1712       this.serverManager.shutdownCluster();
1713     }
1714     if (this.clusterStatusTracker != null){
1715       try {
1716         this.clusterStatusTracker.setClusterDown();
1717       } catch (KeeperException e) {
1718         LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
1719       }
1720     }
1721   }
1722 
1723   public void stopMaster() {
1724     if (cpHost != null) {
1725       try {
1726         cpHost.preStopMaster();
1727       } catch (IOException ioe) {
1728         LOG.error("Error call master coprocessor preStopMaster()", ioe);
1729       }
1730     }
1731     stop("Stopped by " + Thread.currentThread().getName());
1732   }
1733 
1734   void checkServiceStarted() throws ServerNotRunningYetException {
1735     if (!serviceStarted) {
1736       throw new ServerNotRunningYetException("Server is not running yet");
1737     }
1738   }
1739 
1740   void checkInitialized() throws PleaseHoldException, ServerNotRunningYetException {
1741     checkServiceStarted();
1742     if (!this.initialized) {
1743       throw new PleaseHoldException("Master is initializing");
1744     }
1745   }
1746 
1747   void checkNamespaceManagerReady() throws IOException {
1748     checkInitialized();
1749     if (tableNamespaceManager == null ||
1750         !tableNamespaceManager.isTableAvailableAndInitialized()) {
1751       throw new IOException("Table Namespace Manager not ready yet, try again later");
1752     }
1753   }
1754   /**
1755    * Report whether this master is currently the active master or not.
1756    * If not active master, we are parked on ZK waiting to become active.
1757    *
1758    * This method is used for testing.
1759    *
1760    * @return true if active master, false if not.
1761    */
1762   public boolean isActiveMaster() {
1763     return isActiveMaster;
1764   }
1765 
1766   /**
1767    * Report whether this master has completed with its initialization and is
1768    * ready.  If ready, the master is also the active master.  A standby master
1769    * is never ready.
1770    *
1771    * This method is used for testing.
1772    *
1773    * @return true if master is ready to go, false if not.
1774    */
1775   @Override
1776   public boolean isInitialized() {
1777     return initialized;
1778   }
1779 
1780   /**
1781    * ServerShutdownHandlerEnabled is set false before completing
1782    * assignMeta to prevent processing of ServerShutdownHandler.
1783    * @return true if assignMeta has completed;
1784    */
1785   @Override
1786   public boolean isServerShutdownHandlerEnabled() {
1787     return this.serverShutdownHandlerEnabled;
1788   }
1789 
1790   /**
1791    * Report whether this master has started initialization and is about to do meta region assignment
1792    * @return true if master is in initialization & about to assign hbase:meta regions
1793    */
1794   public boolean isInitializationStartsMetaRegionAssignment() {
1795     return this.initializationBeforeMetaAssignment;
1796   }
1797 
1798   public void assignRegion(HRegionInfo hri) {
1799     assignmentManager.assign(hri);
1800   }
1801 
1802   /**
1803    * Compute the average load across all region servers.
1804    * Currently, this uses a very naive computation - just uses the number of
1805    * regions being served, ignoring stats about number of requests.
1806    * @return the average load
1807    */
1808   public double getAverageLoad() {
1809     if (this.assignmentManager == null) {
1810       return 0;
1811     }
1812 
1813     RegionStates regionStates = this.assignmentManager.getRegionStates();
1814     if (regionStates == null) {
1815       return 0;
1816     }
1817     return regionStates.getAverageLoad();
1818   }
1819 
1820   @Override
1821   public boolean registerService(Service instance) {
1822     /*
1823      * No stacking of instances is allowed for a single service name
1824      */
1825     Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
1826     if (coprocessorServiceHandlers.containsKey(serviceDesc.getFullName())) {
1827       LOG.error("Coprocessor service "+serviceDesc.getFullName()+
1828           " already registered, rejecting request from "+instance
1829       );
1830       return false;
1831     }
1832 
1833     coprocessorServiceHandlers.put(serviceDesc.getFullName(), instance);
1834     if (LOG.isDebugEnabled()) {
1835       LOG.debug("Registered master coprocessor service: service="+serviceDesc.getFullName());
1836     }
1837     return true;
1838   }
1839 
1840   /**
1841    * Utility for constructing an instance of the passed HMaster class.
1842    * @param masterClass
1843    * @param conf
1844    * @return HMaster instance.
1845    */
1846   public static HMaster constructMaster(Class<? extends HMaster> masterClass,
1847       final Configuration conf, final CoordinatedStateManager cp)  {
1848     try {
1849       Constructor<? extends HMaster> c =
1850         masterClass.getConstructor(Configuration.class, CoordinatedStateManager.class);
1851       return c.newInstance(conf, cp);
1852     } catch (InvocationTargetException ite) {
1853       Throwable target = ite.getTargetException() != null?
1854         ite.getTargetException(): ite;
1855       if (target.getCause() != null) target = target.getCause();
1856       throw new RuntimeException("Failed construction of Master: " +
1857         masterClass.toString(), target);
1858     } catch (Exception e) {
1859       throw new RuntimeException("Failed construction of Master: " +
1860         masterClass.toString() + ((e.getCause() != null)?
1861           e.getCause().getMessage(): ""), e);
1862     }
1863   }
1864 
1865   /**
1866    * @see org.apache.hadoop.hbase.master.HMasterCommandLine
1867    */
1868   public static void main(String [] args) {
1869     VersionInfo.logVersion();
1870     new HMasterCommandLine(HMaster.class).doMain(args);
1871   }
1872 
1873   public HFileCleaner getHFileCleaner() {
1874     return this.hfileCleaner;
1875   }
1876 
1877   /**
1878    * Exposed for TESTING!
1879    * @return the underlying snapshot manager
1880    */
1881   public SnapshotManager getSnapshotManagerForTesting() {
1882     return this.snapshotManager;
1883   }
1884 
1885   @Override
1886   public void createNamespace(NamespaceDescriptor descriptor) throws IOException {
1887     TableName.isLegalNamespaceName(Bytes.toBytes(descriptor.getName()));
1888     checkNamespaceManagerReady();
1889     if (cpHost != null) {
1890       if (cpHost.preCreateNamespace(descriptor)) {
1891         return;
1892       }
1893     }
1894     LOG.info(getClientIdAuditPrefix() + " creating " + descriptor);
1895     tableNamespaceManager.create(descriptor);
1896     if (cpHost != null) {
1897       cpHost.postCreateNamespace(descriptor);
1898     }
1899   }
1900 
1901   @Override
1902   public void modifyNamespace(NamespaceDescriptor descriptor) throws IOException {
1903     TableName.isLegalNamespaceName(Bytes.toBytes(descriptor.getName()));
1904     checkNamespaceManagerReady();
1905     if (cpHost != null) {
1906       if (cpHost.preModifyNamespace(descriptor)) {
1907         return;
1908       }
1909     }
1910     LOG.info(getClientIdAuditPrefix() + " modify " + descriptor);
1911     tableNamespaceManager.update(descriptor);
1912     if (cpHost != null) {
1913       cpHost.postModifyNamespace(descriptor);
1914     }
1915   }
1916 
1917   @Override
1918   public void deleteNamespace(String name) throws IOException {
1919     checkNamespaceManagerReady();
1920     if (cpHost != null) {
1921       if (cpHost.preDeleteNamespace(name)) {
1922         return;
1923       }
1924     }
1925     LOG.info(getClientIdAuditPrefix() + " delete " + name);
1926     tableNamespaceManager.remove(name);
1927     if (cpHost != null) {
1928       cpHost.postDeleteNamespace(name);
1929     }
1930   }
1931 
1932   @Override
1933   public NamespaceDescriptor getNamespaceDescriptor(String name) throws IOException {
1934     checkNamespaceManagerReady();
1935     NamespaceDescriptor nsd = tableNamespaceManager.get(name);
1936     if (nsd == null) {
1937       throw new NamespaceNotFoundException(name);
1938     }
1939     return nsd;
1940   }
1941 
1942   @Override
1943   public List<NamespaceDescriptor> listNamespaceDescriptors() throws IOException {
1944     checkNamespaceManagerReady();
1945     return Lists.newArrayList(tableNamespaceManager.list());
1946   }
1947 
1948   @Override
1949   public List<HTableDescriptor> listTableDescriptorsByNamespace(String name) throws IOException {
1950     getNamespaceDescriptor(name); // check that namespace exists
1951     return Lists.newArrayList(tableDescriptors.getByNamespace(name).values());
1952   }
1953 
1954   @Override
1955   public List<TableName> listTableNamesByNamespace(String name) throws IOException {
1956     List<TableName> tableNames = Lists.newArrayList();
1957     getNamespaceDescriptor(name); // check that namespace exists
1958     for (HTableDescriptor descriptor: tableDescriptors.getByNamespace(name).values()) {
1959       tableNames.add(descriptor.getTableName());
1960     }
1961     return tableNames;
1962   }
1963 }